# Hyper parameter search for CQL

In [15]:
!pip install d3rlpy



In [16]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools
import math
import subprocess
import os
import d3rlpy
# plt.style.use('matplotlibrc')

# from Python.data_sampler import *

## Building an MDPDataset

We first read in a large batch of samples from the file. As `d3rlpy` wants it in the form (observations, actions, rewards, terminal flags), we go ahead and do that. Here's a helper function to get a dataset from a list of chunks of your choosing.

In [17]:
import numpy as np
import torch
import random
import pandas
from Python.data_sampler import *

In [18]:
def get_dataset(chunks : list, batch_size=30000, 
                path="collected_data/rl_det_small.txt") -> d3rlpy.dataset.MDPDataset :
    random.seed(0)
    samples = DataSampler(path_to_data=path)
    samples.setting("coarse")
    states = []
    actions = []
    rewards = []
    next_states = []
    for chunk in chunks:
        samples.use_chunk(chunk)
        samples.read_chunk()
        [statesChunk, actionsChunk, rewardsChunk, nextStatesChunk] = samples.get_batch(batch_size)
        states.append(statesChunk)
        actions.append(actionsChunk)
        rewards.append(rewardsChunk)
        next_states.append(nextStatesChunk)
    states = torch.cat(states)
    actions = torch.cat(actions)
    rewards = torch.cat(rewards)
    next_states = torch.cat(next_states)
    terminals = np.zeros(len(states))
    terminals[::1111] = 1 #episode length 100, change if necessary
    print(states.shape)
    dataset = d3rlpy.dataset.MDPDataset(states.numpy(), 
                                        actions.numpy(), 
                                        rewards.numpy(), terminals)
    return dataset, states.numpy(), actions.numpy(), rewards.numpy()

We can build the dataset from there, just like this, and split into train and test sets.

In [19]:
dataset, states, actions, rewards = get_dataset([i+200 for  i in range(100)], path="../collected_data/rl_purestochastic.txt")

start
[ 0.00000000e+00  7.95731469e+08  1.43210892e-01 -3.25999953e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28049971e-01  6.00000000e-01  4.67532035e-01]
Read chunk # 201 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01  2.46000047e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.61927906e-02  2.08952959e-01]
Read chunk # 202 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.53789108e-01  1.32000047e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.71586383e-02 -6.00000000e-01 -5.33093061e-02]
Read chunk # 203 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.39489108e-01 -4.75999953e-02
 -9.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.41298638e-01  5.43892365e-01]
Read chunk # 204 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.48410892e-01  5.24000047e-02
  2.50999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.

[ 0.00000000e+00  7.95731469e+08  3.96710892e-01 -1.01999953e-02
 -1.04000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -6.00000000e-01 -4.37719266e-01]
Read chunk # 250 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.07710892e-01  3.78000047e-02
 -2.22000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -7.62082511e-02  1.61745700e-01]
Read chunk # 251 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  8.62108923e-02 -1.29999953e-02
 -1.66000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.01419569e-01  2.01047612e-01  6.00000000e-01]
Read chunk # 252 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  6.16108923e-02 -5.77999953e-02
  2.50999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.10771801e-01  4.84326755e-01  6.00000000e-01]
Read chunk # 253 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  7.75108923e-02 -4.85999953e-02
  2.30999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.000000

In [20]:
print("The behavior policy value statistics are:")
dataset.compute_stats()['return']

The behavior policy value statistics are:


{'mean': -269.3442,
 'std': 168.58556,
 'min': -635.52454,
 'max': 0.0,
 'histogram': (array([ 5,  3,  3,  3,  4,  2,  4,  1,  0,  2,  9,  7, 10,  7,  8,  7, 13,
         10,  1,  1]),
  array([-635.52454 , -603.7483  , -571.9721  , -540.19586 , -508.41962 ,
         -476.6434  , -444.8672  , -413.09094 , -381.31473 , -349.53848 ,
         -317.76227 , -285.98605 , -254.20981 , -222.4336  , -190.65736 ,
         -158.88113 , -127.104904,  -95.32868 ,  -63.552452,  -31.776226,
            0.      ], dtype=float32))}

In [21]:
from sklearn.model_selection import train_test_split
train_episodes, test_episodes = train_test_split(dataset, test_size=0.2)

## Setting up an Algorithm

In [22]:
from d3rlpy.algos import CQL
from d3rlpy.models.encoders import VectorEncoderFactory

from d3rlpy.preprocessing import MinMaxActionScaler
import random
action_scaler = MinMaxActionScaler(minimum=-0.6, maximum=0.6)

from d3rlpy.metrics.scorer import td_error_scorer
from d3rlpy.metrics.scorer import average_value_estimation_scorer
from d3rlpy.metrics.scorer import initial_state_value_estimation_scorer

from d3rlpy.ope import FQE
# metrics to evaluate with
from d3rlpy.metrics.scorer import soft_opc_scorer
import pickle as pkl
from statistics import harmonic_mean as hm

## Perform a random search on hyper params. 

As good fqe on both stochastic and deterministic data is needed for good performance during the evaluation step in the simulator, we take the harmonic mean of both fqes to understand the performance on each dataset. The harmonic mean is large only when each number whose mean is being taken is large. Therefore, a large harmonic mean corresponds to large numbers whose mean is being taken.


In [None]:

# The number of times we would wanna do a random search for hyper-params. In 
# every random search, we randomly pick a new set of hyper-params.
num_search_iterations = 40
largest_fqe = -np.inf

for i in range(num_search_iterations):

    random.seed()

    actor_lr_this_iter = random.uniform(1e-5, 1e-2)
    critic_lr_this_iter = random.uniform(1e-5, 1e-2)
    temp_lr_this_iter = random.uniform(1e-5, 1e-4)
    n_steps_this_iter = random.choice([1, 3, 5, 7])

    actor_encoder = VectorEncoderFactory(hidden_units=[12, 24, 36, 24, 12],
                                          activation='relu', use_batch_norm=True, dropout_rate=0.2)
    critic_encoder = VectorEncoderFactory(hidden_units=[12, 24, 24, 12],
                                          activation='relu', use_batch_norm=True, dropout_rate=0.2)

    print("search iteration: ", i)
    print("using hyper params: ", [actor_lr_this_iter, critic_lr_this_iter, 
                                   temp_lr_this_iter, n_steps_this_iter])

    model = CQL(q_func_factory='qr', #qr -> quantile regression q function, but you don't have to use this
                reward_scaler='standard',
                actor_encoder_factory = actor_encoder,
                critic_encoder_factory = critic_encoder,
                action_scaler=action_scaler,
                actor_learning_rate=actor_lr_this_iter, 
                critic_learning_rate=critic_lr_this_iter,
                temp_learning_rate=temp_lr_this_iter,
                n_steps=n_steps_this_iter, 
                use_gpu=True) #change it to true if you have one
    model.build_with_dataset(dataset)

    model.fit(train_episodes,
        eval_episodes=test_episodes,
        n_epochs=50, 
        tensorboard_dir='runs',
        scorers={
            'td_error': td_error_scorer,
            'init_value': initial_state_value_estimation_scorer,
            'ave_value': average_value_estimation_scorer
        })
    
    ope_dataset, states_ope, actions_ope, rewards_ope = get_dataset([i+1000 for i  in range(20)], 
                                                                    path="../collected_data/rl_stochpid.txt") #change if you'd prefer different chunks
    ope_train_episodes, ope_test_episodes = train_test_split(ope_dataset, test_size=0.2)

    fqe = FQE(algo=model, action_scaler = action_scaler, use_gpu=True) #change this if you have one!
    history_stoch = fqe.fit(ope_train_episodes,
        eval_episodes=ope_test_episodes,
        tensorboard_dir='runs',
        n_epochs=50, n_steps_per_epoch=1000, #change if overfitting/underfitting
        scorers={
           'init_value': initial_state_value_estimation_scorer,
            'ave_value': average_value_estimation_scorer,
           'soft_opc': soft_opc_scorer(return_threshold=0)
        })
    
    ope_dataset, states_ope, actions_ope, rewards_ope = get_dataset([i for i in range(40)], 
                                                                    path="../collected_data/rl_purestochastic.txt") #change if you'd prefer different chunks
    ope_train_episodes, ope_test_episodes = train_test_split(ope_dataset, test_size=0.2)

    fqe = FQE(algo=model, action_scaler = action_scaler, use_gpu=True) #change this if you have one!
    history_det = fqe.fit(ope_train_episodes,
        eval_episodes=ope_test_episodes,
        tensorboard_dir='runs',
        n_epochs=50, n_steps_per_epoch=1000, #change if overfitting/underfitting
        scorers={
           'init_value': initial_state_value_estimation_scorer,
            'ave_value': average_value_estimation_scorer,
           'soft_opc': soft_opc_scorer(return_threshold=0)
        })
    
    if(hm([history_stoch[-1][1]["ave_value"] + 50, 
           history_det[-1][1]["ave_value"] + 50]) \
                                                          > largest_fqe):
        largest_fqe = hm([history_stoch[-1][1]["ave_value"] + 50, 
                         history_det[-1][1]["ave_value"] + 50])

        # Save the hyper-params
        hyperparams = [actor_lr_this_iter, critic_lr_this_iter, 
                       temp_lr_this_iter, n_steps_this_iter]

        with open("hyperparams_cql.pkl", "wb") as f:
            print("most optimal hyper params for cql at this point: ", hyperparams)
            pkl.dump(hyperparams, f)

        # Save model and policy
        model.save_model("model_hyperparams_cql.pt")
        model.save_policy("policy_hyperparams_cql.pt")


search iteration:  0
using hyper params:  [0.0031407135725740544, 0.0013820489635677539, 7.31673811873202e-05, 1]
2022-04-21 21:52.43 [debug    ] RoundIterator is selected.
2022-04-21 21:52.43 [info     ] Directory is created at d3rlpy_logs/CQL_20220421215243
2022-04-21 21:52.43 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-21 21:52.43 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-21 21:52.43 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220421215243/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.0031407135725740544, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'alpha_learning_r

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 21:53.00 [info     ] CQL_20220421215243: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.00025672995286180793, 'time_algorithm_update': 0.048310447290453605, 'temp_loss': 4.874697296605634, 'temp': 0.9868872661811079, 'alpha_loss': -17.648908477298097, 'alpha': 1.0177169304362612, 'critic_loss': 28.417487921742346, 'actor_loss': -1.8406218001780483, 'time_step': 0.048632645882623046, 'td_error': 1.224320662493224, 'init_value': 0.07656802982091904, 'ave_value': 0.1957315176418038} step=346
2022-04-21 21:53.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 21:53.18 [info     ] CQL_20220421215243: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.0002572419326429422, 'time_algorithm_update': 0.04820082504625265, 'temp_loss': 4.856820766636402, 'temp': 0.9619505985968375, 'alpha_loss': -18.338288880497046, 'alpha': 1.0542209958065452, 'critic_loss': 29.698356832382995, 'actor_loss': -1.7210954730910373, 'time_step': 0.04852366378541627, 'td_error': 1.212277606704511, 'init_value': -0.12082002311944962, 'ave_value': 0.10047348111730127} step=692
2022-04-21 21:53.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 21:53.35 [info     ] CQL_20220421215243: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.00025385101406560467, 'time_algorithm_update': 0.0477954679830915, 'temp_loss': 4.737298408684703, 'temp': 0.9382300404454932, 'alpha_loss': -19.00228158035719, 'alpha': 1.0925243562356586, 'critic_loss': 38.21911907747302, 'actor_loss': -1.2225853234012691, 'time_step': 0.04811589841897777, 'td_error': 1.2124261161015852, 'init_value': -0.541253924369812, 'ave_value': -0.2661524358852433} step=1038
2022-04-21 21:53.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 21:53.52 [info     ] CQL_20220421215243: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.0002509899911163859, 'time_algorithm_update': 0.04614103529494622, 'temp_loss': 4.6198275047919655, 'temp': 0.9153824469257641, 'alpha_loss': -19.701729791012802, 'alpha': 1.132709576904429, 'critic_loss': 48.27062847159502, 'actor_loss': -0.7177253707468165, 'time_step': 0.046458297382200385, 'td_error': 1.213141425201065, 'init_value': -0.8678051829338074, 'ave_value': -0.572783284883411} step=1384
2022-04-21 21:53.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 21:54.09 [info     ] CQL_20220421215243: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.0002510878392037629, 'time_algorithm_update': 0.04635718309810396, 'temp_loss': 4.509203389889932, 'temp': 0.8932972993120293, 'alpha_loss': -20.43324664011167, 'alpha': 1.1748225995570938, 'critic_loss': 59.012246247660904, 'actor_loss': -0.1546649242336781, 'time_step': 0.04667846384765096, 'td_error': 1.2110075578598907, 'init_value': -1.1858024597167969, 'ave_value': -0.8814406027219882} step=1730
2022-04-21 21:54.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 21:54.26 [info     ] CQL_20220421215243: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.0002537917539563482, 'time_algorithm_update': 0.047150946076894774, 'temp_loss': 4.401181601375514, 'temp': 0.8718965318850699, 'alpha_loss': -21.200270118051872, 'alpha': 1.2189080973581083, 'critic_loss': 70.96510597184903, 'actor_loss': 0.4123420509418225, 'time_step': 0.04747101406141513, 'td_error': 1.2131401530206118, 'init_value': -1.793853759765625, 'ave_value': -1.4281456573928573} step=2076
2022-04-21 21:54.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 21:54.43 [info     ] CQL_20220421215243: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.00025149370204506584, 'time_algorithm_update': 0.04653161178434515, 'temp_loss': 4.2978560125207625, 'temp': 0.8511287733654066, 'alpha_loss': -22.000901905787472, 'alpha': 1.2649950261060903, 'critic_loss': 84.76841133867386, 'actor_loss': 0.9173403939587532, 'time_step': 0.04684919084427674, 'td_error': 1.21641450513446, 'init_value': -2.089219093322754, 'ave_value': -1.7391662346258328} step=2422
2022-04-21 21:54.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 21:55.00 [info     ] CQL_20220421215243: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.000254296153956066, 'time_algorithm_update': 0.04650254194447071, 'temp_loss': 4.194488222199368, 'temp': 0.8309549883955476, 'alpha_loss': -22.840061524010807, 'alpha': 1.3131283690474627, 'critic_loss': 101.20517922550268, 'actor_loss': 1.294419382348915, 'time_step': 0.04682228951095846, 'td_error': 1.221805521927232, 'init_value': -2.5648300647735596, 'ave_value': -2.1387617060094155} step=2768
2022-04-21 21:55.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 21:55.16 [info     ] CQL_20220421215243: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.00025106785614366475, 'time_algorithm_update': 0.04604694884636499, 'temp_loss': 4.0957051219278675, 'temp': 0.8113377185570712, 'alpha_loss': -23.713129677524456, 'alpha': 1.363349749173732, 'critic_loss': 120.58365201123188, 'actor_loss': 1.495786200024489, 'time_step': 0.0463591910511083, 'td_error': 1.2212449267920613, 'init_value': -2.6503608226776123, 'ave_value': -2.223592534731948} step=3114
2022-04-21 21:55.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 21:55.33 [info     ] CQL_20220421215243: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.0002505489856521518, 'time_algorithm_update': 0.04607140122121469, 'temp_loss': 4.00059496805158, 'temp': 0.7922375321043709, 'alpha_loss': -24.623194920534342, 'alpha': 1.415699639072308, 'critic_loss': 143.6633806173512, 'actor_loss': 1.4357007445627554, 'time_step': 0.046383755744537174, 'td_error': 1.221420807311531, 'init_value': -2.4105653762817383, 'ave_value': -2.0253659687996364} step=3460
2022-04-21 21:55.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 21:55.50 [info     ] CQL_20220421215243: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.00025473026870992144, 'time_algorithm_update': 0.046515116112769685, 'temp_loss': 3.906859915380533, 'temp': 0.7736332926446992, 'alpha_loss': -25.56815570765148, 'alpha': 1.4702310586251275, 'critic_loss': 170.07338176572944, 'actor_loss': 1.112726295614518, 'time_step': 0.04683384385412139, 'td_error': 1.2205937242986666, 'init_value': -1.9804658889770508, 'ave_value': -1.6526574920341832} step=3806
2022-04-21 21:55.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 21:56.07 [info     ] CQL_20220421215243: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.00025265547581490756, 'time_algorithm_update': 0.04653072426084838, 'temp_loss': 3.815537546411415, 'temp': 0.755501863584353, 'alpha_loss': -26.554881410102624, 'alpha': 1.527004827309206, 'critic_loss': 200.02596168297563, 'actor_loss': 0.5754009059205049, 'time_step': 0.046849265953019865, 'td_error': 1.2208467115884725, 'init_value': -1.3219270706176758, 'ave_value': -1.08533877326505} step=4152
2022-04-21 21:56.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 21:56.24 [info     ] CQL_20220421215243: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.00025108577199064926, 'time_algorithm_update': 0.04642310307894139, 'temp_loss': 3.726968349059882, 'temp': 0.7378205348646021, 'alpha_loss': -27.58108211252731, 'alpha': 1.5860850039245076, 'critic_loss': 233.06872951088613, 'actor_loss': -0.11323244097464949, 'time_step': 0.04674011985690608, 'td_error': 1.2238457029419572, 'init_value': -0.7104156613349915, 'ave_value': -0.5248186434372277} step=4498
2022-04-21 21:56.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 21:56.41 [info     ] CQL_20220421215243: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.0002502416599692637, 'time_algorithm_update': 0.04644490115215324, 'temp_loss': 3.6397841631332573, 'temp': 0.720575758487503, 'alpha_loss': -28.65143182787592, 'alpha': 1.6475420435729053, 'critic_loss': 267.8715504552588, 'actor_loss': -0.7471197206160926, 'time_step': 0.04676127847219478, 'td_error': 1.228052741455511, 'init_value': -0.313555508852005, 'ave_value': -0.1458278206639405} step=4844
2022-04-21 21:56.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 21:56.57 [info     ] CQL_20220421215243: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.0002474192249981654, 'time_algorithm_update': 0.046172695352852, 'temp_loss': 3.5541978585237715, 'temp': 0.7037522291516982, 'alpha_loss': -29.761875069899364, 'alpha': 1.7114559759983439, 'critic_loss': 301.84887263127143, 'actor_loss': -1.277630812687681, 'time_step': 0.04648348223956334, 'td_error': 1.2302964129044445, 'init_value': 0.24201157689094543, 'ave_value': 0.33902010031365964} step=5190
2022-04-21 21:56.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 21:57.14 [info     ] CQL_20220421215243: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.0002498406206252258, 'time_algorithm_update': 0.04627805500361272, 'temp_loss': 3.471247941772373, 'temp': 0.6873361180627966, 'alpha_loss': -30.914596469416093, 'alpha': 1.7779036284871184, 'critic_loss': 335.1497787740189, 'actor_loss': -1.7077516589550614, 'time_step': 0.046591419705076714, 'td_error': 1.2324220475661696, 'init_value': 0.6920597553253174, 'ave_value': 0.7547959318145184} step=5536
2022-04-21 21:57.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 21:57.32 [info     ] CQL_20220421215243: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.0002556370861957528, 'time_algorithm_update': 0.049975303556188684, 'temp_loss': 3.391471214377122, 'temp': 0.6713102764821466, 'alpha_loss': -32.11468102339375, 'alpha': 1.8469708517107661, 'critic_loss': 366.37038320199605, 'actor_loss': -2.09074768510168, 'time_step': 0.05029608472923323, 'td_error': 1.233739540506835, 'init_value': 1.007757306098938, 'ave_value': 1.0614926055066674} step=5882
2022-04-21 21:57.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 21:57.51 [info     ] CQL_20220421215243: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.00025745692280675634, 'time_algorithm_update': 0.050416387574521104, 'temp_loss': 3.3116220476999447, 'temp': 0.6556632980445906, 'alpha_loss': -33.36301365890944, 'alpha': 1.9187639207509213, 'critic_loss': 397.0975443228132, 'actor_loss': -2.43367554617755, 'time_step': 0.05073737477980597, 'td_error': 1.23439577106282, 'init_value': 1.3574241399765015, 'ave_value': 1.3925224254401503} step=6228
2022-04-21 21:57.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 21:58.09 [info     ] CQL_20220421215243: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.0002519250605147698, 'time_algorithm_update': 0.05037764111006191, 'temp_loss': 3.234078235019838, 'temp': 0.6403891657818259, 'alpha_loss': -34.66549261456969, 'alpha': 1.9933729640321236, 'critic_loss': 428.2588770871907, 'actor_loss': -2.7639859742511903, 'time_step': 0.05069188162081503, 'td_error': 1.2350206108431767, 'init_value': 1.6832700967788696, 'ave_value': 1.7084499531077117} step=6574
2022-04-21 21:58.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 21:58.27 [info     ] CQL_20220421215243: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.00025389166925683876, 'time_algorithm_update': 0.048231596202519586, 'temp_loss': 3.1588744317865096, 'temp': 0.6254772270001427, 'alpha_loss': -36.00475908841701, 'alpha': 2.0709034431876474, 'critic_loss': 457.74231671460103, 'actor_loss': -3.068007648335716, 'time_step': 0.048546753177753074, 'td_error': 1.2350018662412103, 'init_value': 1.9576311111450195, 'ave_value': 1.9845203977334325} step=6920
2022-04-21 21:58.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 21:58.44 [info     ] CQL_20220421215243: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.00025535801242541716, 'time_algorithm_update': 0.04861645615858839, 'temp_loss': 3.085913948240997, 'temp': 0.610912834288757, 'alpha_loss': -37.41386741020776, 'alpha': 2.151468658033823, 'critic_loss': 485.419445941903, 'actor_loss': -3.3654979398484866, 'time_step': 0.04893898205950081, 'td_error': 1.235711419425712, 'init_value': 2.3322174549102783, 'ave_value': 2.3458952152429458} step=7266
2022-04-21 21:58.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 21:59.02 [info     ] CQL_20220421215243: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.0002567692299109663, 'time_algorithm_update': 0.048902756905969166, 'temp_loss': 3.014240535008425, 'temp': 0.5966876745223999, 'alpha_loss': -38.8668366801532, 'alpha': 2.235185074668399, 'critic_loss': 511.9667742955202, 'actor_loss': -3.670127220236497, 'time_step': 0.04922324246753847, 'td_error': 1.2361445736511745, 'init_value': 2.657982587814331, 'ave_value': 2.667226696037014} step=7612
2022-04-21 21:59.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 21:59.20 [info     ] CQL_20220421215243: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.0002571309922058458, 'time_algorithm_update': 0.0493038734259633, 'temp_loss': 2.944019290064112, 'temp': 0.5827949877419224, 'alpha_loss': -40.385833354354595, 'alpha': 2.3221706565404903, 'critic_loss': 538.1707410867504, 'actor_loss': -3.9659628709616688, 'time_step': 0.04962839970009864, 'td_error': 1.2366356094494422, 'init_value': 2.9885549545288086, 'ave_value': 2.9980317408126247} step=7958
2022-04-21 21:59.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 21:59.38 [info     ] CQL_20220421215243: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.0002594035484887272, 'time_algorithm_update': 0.049294556496460314, 'temp_loss': 2.8752615465594165, 'temp': 0.5692291738669997, 'alpha_loss': -41.95161520676806, 'alpha': 2.4125420013604137, 'critic_loss': 561.2428714112739, 'actor_loss': -4.264183956763648, 'time_step': 0.049620809582616554, 'td_error': 1.2369614891629115, 'init_value': 3.2936859130859375, 'ave_value': 3.3012648982535953} step=8304
2022-04-21 21:59.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 21:59.56 [info     ] CQL_20220421215243: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.00025825900149483214, 'time_algorithm_update': 0.049291767136899034, 'temp_loss': 2.8082549151657634, 'temp': 0.5559806355162163, 'alpha_loss': -43.58258683970898, 'alpha': 2.5064308340149806, 'critic_loss': 581.1912277309881, 'actor_loss': -4.562216185420924, 'time_step': 0.04961679092032372, 'td_error': 1.2378041091691085, 'init_value': 3.6162049770355225, 'ave_value': 3.6236560796775654} step=8650
2022-04-21 21:59.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:00.14 [info     ] CQL_20220421215243: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.0002602028708926515, 'time_algorithm_update': 0.04869144500335517, 'temp_loss': 2.742807823109489, 'temp': 0.5430412649074731, 'alpha_loss': -45.28348819093208, 'alpha': 2.6039812985183186, 'critic_loss': 608.6103681443054, 'actor_loss': -4.84773588869613, 'time_step': 0.049016902212462674, 'td_error': 1.2386498007583833, 'init_value': 3.9232208728790283, 'ave_value': 3.9281547664861196} step=8996
2022-04-21 22:00.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:00.31 [info     ] CQL_20220421215243: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.00025730739439153947, 'time_algorithm_update': 0.04915083212659538, 'temp_loss': 2.678453585315991, 'temp': 0.5304040541883149, 'alpha_loss': -47.047245367414, 'alpha': 2.7053424795238956, 'critic_loss': 639.0624656015738, 'actor_loss': -5.12496400155084, 'time_step': 0.049474290340622035, 'td_error': 1.2394592287639752, 'init_value': 4.209930419921875, 'ave_value': 4.21462678273971} step=9342
2022-04-21 22:00.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:00.49 [info     ] CQL_20220421215243: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.0002551733413872691, 'time_algorithm_update': 0.048674039068938676, 'temp_loss': 2.61636869411248, 'temp': 0.5180608380392108, 'alpha_loss': -48.872932423056895, 'alpha': 2.810649964850762, 'critic_loss': 671.9054771820245, 'actor_loss': -5.393754593898795, 'time_step': 0.04899324916001689, 'td_error': 1.2401743179645786, 'init_value': 4.4705424308776855, 'ave_value': 4.475774321639747} step=9688
2022-04-21 22:00.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:01.07 [info     ] CQL_20220421215243: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.0002541845244479317, 'time_algorithm_update': 0.04894650740430534, 'temp_loss': 2.5554408041728025, 'temp': 0.506004915588853, 'alpha_loss': -50.77625118101263, 'alpha': 2.9200680510846175, 'critic_loss': 714.4787689385387, 'actor_loss': -5.6420584028166845, 'time_step': 0.04926460946915467, 'td_error': 1.2414913394940028, 'init_value': 4.76129674911499, 'ave_value': 4.765227093906864} step=10034
2022-04-21 22:01.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:01.25 [info     ] CQL_20220421215243: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.00024974690696407607, 'time_algorithm_update': 0.04839780978384735, 'temp_loss': 2.4961807631343778, 'temp': 0.4942305991587611, 'alpha_loss': -52.75708918488784, 'alpha': 3.033741687074562, 'critic_loss': 764.4507283999052, 'actor_loss': -5.886329179554316, 'time_step': 0.048713682014818134, 'td_error': 1.2423274728876896, 'init_value': 4.995110988616943, 'ave_value': 4.999647949792131} step=10380
2022-04-21 22:01.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:01.42 [info     ] CQL_20220421215243: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.0002529703812792122, 'time_algorithm_update': 0.048451943204582085, 'temp_loss': 2.4384917503147454, 'temp': 0.4827271871311816, 'alpha_loss': -54.80579188793381, 'alpha': 3.151846404709568, 'critic_loss': 825.5094625660449, 'actor_loss': -6.0984791344989935, 'time_step': 0.04877205253336471, 'td_error': 1.2430309952885052, 'init_value': 5.20033073425293, 'ave_value': 5.205894417082207} step=10726
2022-04-21 22:01.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:02.00 [info     ] CQL_20220421215243: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.00025379726652465114, 'time_algorithm_update': 0.048474826564678566, 'temp_loss': 2.3818345407530064, 'temp': 0.4714922331316623, 'alpha_loss': -56.94020992345204, 'alpha': 3.274552334939813, 'critic_loss': 890.9619461676978, 'actor_loss': -6.3259309771433045, 'time_step': 0.048794702987450395, 'td_error': 1.2456303345839341, 'init_value': 5.537442684173584, 'ave_value': 5.540161919401629} step=11072
2022-04-21 22:02.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:02.17 [info     ] CQL_20220421215243: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.00025195469056939803, 'time_algorithm_update': 0.048695534639964906, 'temp_loss': 2.3260616966754717, 'temp': 0.4605189231951113, 'alpha_loss': -59.15445914296056, 'alpha': 3.402034029795255, 'critic_loss': 956.082865279534, 'actor_loss': -6.548741839524639, 'time_step': 0.04901300896109873, 'td_error': 1.246543971510493, 'init_value': 5.734992980957031, 'ave_value': 5.738635027651654} step=11418
2022-04-21 22:02.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:02.35 [info     ] CQL_20220421215243: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.000267131480178392, 'time_algorithm_update': 0.048922442287378916, 'temp_loss': 2.2717616592528502, 'temp': 0.44980308594386703, 'alpha_loss': -61.45673179626465, 'alpha': 3.534472887915683, 'critic_loss': 1027.733446771699, 'actor_loss': -6.747777131251517, 'time_step': 0.049252401197576796, 'td_error': 1.2478784184408904, 'init_value': 5.9485578536987305, 'ave_value': 5.951973709442877} step=11764
2022-04-21 22:02.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:02.53 [info     ] CQL_20220421215243: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.0002588516025873967, 'time_algorithm_update': 0.0499121122966612, 'temp_loss': 2.2192405131510915, 'temp': 0.4393343386277987, 'alpha_loss': -63.86149399818024, 'alpha': 3.6720648500960684, 'critic_loss': 1109.3934911827132, 'actor_loss': -6.921736186639422, 'time_step': 0.050233921563694245, 'td_error': 1.2491864642775863, 'init_value': 6.139987945556641, 'ave_value': 6.143319539200813} step=12110
2022-04-21 22:02.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:03.11 [info     ] CQL_20220421215243: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.00025843333646741217, 'time_algorithm_update': 0.0494440559706936, 'temp_loss': 2.1675015274499883, 'temp': 0.4291107593416479, 'alpha_loss': -66.34827652418545, 'alpha': 3.815023677197495, 'critic_loss': 1193.7458767753117, 'actor_loss': -7.091264115592648, 'time_step': 0.04976630348690672, 'td_error': 1.2504298026014349, 'init_value': 6.3179521560668945, 'ave_value': 6.321619790083088} step=12456
2022-04-21 22:03.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:03.29 [info     ] CQL_20220421215243: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.00025236675505004177, 'time_algorithm_update': 0.04905810797145601, 'temp_loss': 2.1172869522447533, 'temp': 0.4191237651888346, 'alpha_loss': -68.92124663336429, 'alpha': 3.9635365491657586, 'critic_loss': 1281.3514023268153, 'actor_loss': -7.233216077606113, 'time_step': 0.04937380104395696, 'td_error': 1.2515536144153756, 'init_value': 6.472754955291748, 'ave_value': 6.47629944182731} step=12802
2022-04-21 22:03.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:03.48 [info     ] CQL_20220421215243: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.00026034688673956546, 'time_algorithm_update': 0.05260266044925403, 'temp_loss': 2.067100803287043, 'temp': 0.40937187208261105, 'alpha_loss': -71.60664522027693, 'alpha': 4.117832733716579, 'critic_loss': 1370.5936374554055, 'actor_loss': -7.373177958361675, 'time_step': 0.052925581876942185, 'td_error': 1.2534035616614219, 'init_value': 6.672050952911377, 'ave_value': 6.674420299661165} step=13148
2022-04-21 22:03.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:04.08 [info     ] CQL_20220421215243: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.0002667145922004832, 'time_algorithm_update': 0.05328064777947575, 'temp_loss': 2.0194746407470263, 'temp': 0.3998461079735287, 'alpha_loss': -74.39935922347053, 'alpha': 4.278144362344907, 'critic_loss': 1459.6046477742277, 'actor_loss': -7.500544944939586, 'time_step': 0.053613635156885046, 'td_error': 1.2538040697894897, 'init_value': 6.760544776916504, 'ave_value': 6.764533452729921} step=13494
2022-04-21 22:04.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:04.27 [info     ] CQL_20220421215243: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.0002765552156922445, 'time_algorithm_update': 0.054287492884376835, 'temp_loss': 1.9725098006987158, 'temp': 0.39054251371780574, 'alpha_loss': -77.29133555241403, 'alpha': 4.444701981682309, 'critic_loss': 1553.7351571672914, 'actor_loss': -7.618750131199126, 'time_step': 0.0546357948656027, 'td_error': 1.255078306004913, 'init_value': 6.8951239585876465, 'ave_value': 6.898206844234873} step=13840
2022-04-21 22:04.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:04.49 [info     ] CQL_20220421215243: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.0003096974654004753, 'time_algorithm_update': 0.05915034437455194, 'temp_loss': 1.9274514610367703, 'temp': 0.3814522432798595, 'alpha_loss': -80.30135799694612, 'alpha': 4.617730045594232, 'critic_loss': 1654.6532095319274, 'actor_loss': -7.710593839601286, 'time_step': 0.0595409704770656, 'td_error': 1.2559584681031382, 'init_value': 7.003905296325684, 'ave_value': 7.007189331511402} step=14186
2022-04-21 22:04.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:05.11 [info     ] CQL_20220421215243: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.00031140636157438243, 'time_algorithm_update': 0.05973577568296752, 'temp_loss': 1.8819077118283751, 'temp': 0.3725739584837346, 'alpha_loss': -83.43276686475456, 'alpha': 4.797505854182161, 'critic_loss': 1737.1544094195945, 'actor_loss': -7.828901866956942, 'time_step': 0.060127326518814, 'td_error': 1.257718493642908, 'init_value': 7.1741414070129395, 'ave_value': 7.176242461866991} step=14532
2022-04-21 22:05.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:05.32 [info     ] CQL_20220421215243: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.00030668967032019114, 'time_algorithm_update': 0.058608361751357946, 'temp_loss': 1.8382388722000784, 'temp': 0.36390307890197443, 'alpha_loss': -86.67135924410958, 'alpha': 4.9842697967683645, 'critic_loss': 1824.2399994072887, 'actor_loss': -7.944706488206896, 'time_step': 0.05899126130032402, 'td_error': 1.257960306483366, 'init_value': 7.235784530639648, 'ave_value': 7.239540208895209} step=14878
2022-04-21 22:05.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:05.52 [info     ] CQL_20220421215243: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.00030247393371052826, 'time_algorithm_update': 0.05678819920975349, 'temp_loss': 1.7953804645235139, 'temp': 0.3554345251335574, 'alpha_loss': -90.05242922126902, 'alpha': 5.178312261669622, 'critic_loss': 1937.9513987282107, 'actor_loss': -8.031120285133406, 'time_step': 0.057170567484949364, 'td_error': 1.2597688135833838, 'init_value': 7.397577285766602, 'ave_value': 7.399651688340152} step=15224
2022-04-21 22:05.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:06.13 [info     ] CQL_20220421215243: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.00031399244517949274, 'time_algorithm_update': 0.05695930312823698, 'temp_loss': 1.753324239584752, 'temp': 0.34716349805710633, 'alpha_loss': -93.55482033084583, 'alpha': 5.3799101575950665, 'critic_loss': 2033.706950457799, 'actor_loss': -8.113289179829504, 'time_step': 0.05735476306408127, 'td_error': 1.2608780910153559, 'init_value': 7.511180877685547, 'ave_value': 7.513114266013466} step=15570
2022-04-21 22:06.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:06.34 [info     ] CQL_20220421215243: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.0002959353386322198, 'time_algorithm_update': 0.05651268518039946, 'temp_loss': 1.712722160568127, 'temp': 0.3390841532304797, 'alpha_loss': -97.20169415777129, 'alpha': 5.589348729635249, 'critic_loss': 2116.6843805037483, 'actor_loss': -8.217006245100428, 'time_step': 0.0568883487943969, 'td_error': 1.261859863066364, 'init_value': 7.6081037521362305, 'ave_value': 7.6097890523329} step=15916
2022-04-21 22:06.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:06.55 [info     ] CQL_20220421215243: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.0003085412041989365, 'time_algorithm_update': 0.057004958907992856, 'temp_loss': 1.6727394361716474, 'temp': 0.3311923583807973, 'alpha_loss': -100.97349819558204, 'alpha': 5.806933476056667, 'critic_loss': 2145.2029041444634, 'actor_loss': -8.35679081547467, 'time_step': 0.05739432676679137, 'td_error': 1.263817962054256, 'init_value': 7.7826151847839355, 'ave_value': 7.78297627527265} step=16262
2022-04-21 22:06.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:07.15 [info     ] CQL_20220421215243: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.0003053328894466334, 'time_algorithm_update': 0.05684530114851935, 'temp_loss': 1.6336318254470825, 'temp': 0.32348615844125694, 'alpha_loss': -104.89353490840493, 'alpha': 6.032965635288657, 'critic_loss': 2050.856781182262, 'actor_loss': -8.566278964797885, 'time_step': 0.05723031752371375, 'td_error': 1.267397449174866, 'init_value': 8.077136993408203, 'ave_value': 8.074716218583896} step=16608
2022-04-21 22:07.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:07.36 [info     ] CQL_20220421215243: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.00030447637414656624, 'time_algorithm_update': 0.05715210796091598, 'temp_loss': 1.5958056629048607, 'temp': 0.3159578856877509, 'alpha_loss': -108.9951755545732, 'alpha': 6.267825223117894, 'critic_loss': 1801.8911813724937, 'actor_loss': -8.899389413050834, 'time_step': 0.057535580127914515, 'td_error': 1.2706213690365111, 'init_value': 8.3649320602417, 'ave_value': 8.362193864801586} step=16954
2022-04-21 22:07.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:07.57 [info     ] CQL_20220421215243: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.0003026482686831083, 'time_algorithm_update': 0.05706956827571626, 'temp_loss': 1.5585258044948467, 'temp': 0.3086054716151574, 'alpha_loss': -113.22732746807826, 'alpha': 6.5118241847595035, 'critic_loss': 1685.1722775497879, 'actor_loss': -9.09076329324976, 'time_step': 0.057450875381513826, 'td_error': 1.2733637657862524, 'init_value': 8.602872848510742, 'ave_value': 8.600375392709427} step=17300
2022-04-21 22:07.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421215243/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.5191

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-21 22:07.59 [info     ] FQE_20220421220757: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00014552007238548923, 'time_algorithm_update': 0.009640913411795375, 'loss': 0.008357789660579559, 'time_step': 0.009853822639189571, 'init_value': -0.18399301171302795, 'ave_value': -0.1058445597000353, 'soft_opc': nan} step=166




2022-04-21 22:07.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.01 [info     ] FQE_20220421220757: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.000152928283415645, 'time_algorithm_update': 0.009702635098652667, 'loss': 0.005010705738586475, 'time_step': 0.009921744645360005, 'init_value': -0.28527891635894775, 'ave_value': -0.1619041675466809, 'soft_opc': nan} step=332




2022-04-21 22:08.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.02 [info     ] FQE_20220421220757: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.0001490144844514778, 'time_algorithm_update': 0.009594815323151738, 'loss': 0.0041023456331634196, 'time_step': 0.009810268160808518, 'init_value': -0.3380855619907379, 'ave_value': -0.18955204502998238, 'soft_opc': nan} step=498




2022-04-21 22:08.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.04 [info     ] FQE_20220421220757: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00014966510864625494, 'time_algorithm_update': 0.009001996143754706, 'loss': 0.003676592394098056, 'time_step': 0.009219836039715502, 'init_value': -0.41323310136795044, 'ave_value': -0.2314954803845434, 'soft_opc': nan} step=664




2022-04-21 22:08.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.06 [info     ] FQE_20220421220757: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00015402845589511366, 'time_algorithm_update': 0.009722248617425022, 'loss': 0.0033456121384816983, 'time_step': 0.009945523307984134, 'init_value': -0.4398807883262634, 'ave_value': -0.2413692523670794, 'soft_opc': nan} step=830




2022-04-21 22:08.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.08 [info     ] FQE_20220421220757: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.000147219163825713, 'time_algorithm_update': 0.009691870356180582, 'loss': 0.003159540800759232, 'time_step': 0.009903208318963108, 'init_value': -0.4545958638191223, 'ave_value': -0.24456743482156312, 'soft_opc': nan} step=996




2022-04-21 22:08.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.09 [info     ] FQE_20220421220757: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015209812715829136, 'time_algorithm_update': 0.009481374039707413, 'loss': 0.002799509728540588, 'time_step': 0.009700469223849744, 'init_value': -0.5227130055427551, 'ave_value': -0.3029355195300603, 'soft_opc': nan} step=1162




2022-04-21 22:08.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.11 [info     ] FQE_20220421220757: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00014625543571380247, 'time_algorithm_update': 0.008773899940123042, 'loss': 0.0025698867975734174, 'time_step': 0.008981423205639943, 'init_value': -0.5928852558135986, 'ave_value': -0.3605418368385316, 'soft_opc': nan} step=1328




2022-04-21 22:08.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.13 [info     ] FQE_20220421220757: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00014648092798439852, 'time_algorithm_update': 0.009677035262785762, 'loss': 0.0024956880724310024, 'time_step': 0.009888255452535239, 'init_value': -0.6315464973449707, 'ave_value': -0.39409266036624707, 'soft_opc': nan} step=1494




2022-04-21 22:08.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.14 [info     ] FQE_20220421220757: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00014579152486410486, 'time_algorithm_update': 0.009504269404583666, 'loss': 0.0022972267891100823, 'time_step': 0.009710785854293639, 'init_value': -0.6914606094360352, 'ave_value': -0.4327306094302519, 'soft_opc': nan} step=1660




2022-04-21 22:08.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.16 [info     ] FQE_20220421220757: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00015087874538927194, 'time_algorithm_update': 0.00961043174008289, 'loss': 0.0023108003920516425, 'time_step': 0.009826157466474786, 'init_value': -0.7486444711685181, 'ave_value': -0.4787189323052361, 'soft_opc': nan} step=1826




2022-04-21 22:08.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.18 [info     ] FQE_20220421220757: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00015137856265148484, 'time_algorithm_update': 0.009193049855979091, 'loss': 0.002080454223183354, 'time_step': 0.009412728160260671, 'init_value': -0.7832358479499817, 'ave_value': -0.5035596832498774, 'soft_opc': nan} step=1992




2022-04-21 22:08.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.20 [info     ] FQE_20220421220757: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00015133116618696465, 'time_algorithm_update': 0.00950080515390419, 'loss': 0.002054234171014122, 'time_step': 0.009722785777356252, 'init_value': -0.8569386005401611, 'ave_value': -0.5551568132464413, 'soft_opc': nan} step=2158




2022-04-21 22:08.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.21 [info     ] FQE_20220421220757: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00014909778732851328, 'time_algorithm_update': 0.009584810360368476, 'loss': 0.002066544449961105, 'time_step': 0.009800184203917721, 'init_value': -0.9199441075325012, 'ave_value': -0.604130747135695, 'soft_opc': nan} step=2324




2022-04-21 22:08.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.23 [info     ] FQE_20220421220757: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00015394658927457878, 'time_algorithm_update': 0.009719457971044334, 'loss': 0.0020475949084313295, 'time_step': 0.009944928697792881, 'init_value': -0.9513270854949951, 'ave_value': -0.6278081851700942, 'soft_opc': nan} step=2490




2022-04-21 22:08.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.25 [info     ] FQE_20220421220757: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00014820874455463454, 'time_algorithm_update': 0.008992983634213367, 'loss': 0.002184670343286784, 'time_step': 0.009205759289753005, 'init_value': -1.0336027145385742, 'ave_value': -0.6990425039653305, 'soft_opc': nan} step=2656




2022-04-21 22:08.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.27 [info     ] FQE_20220421220757: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00015208663710628646, 'time_algorithm_update': 0.009241023695612529, 'loss': 0.0023276707508491845, 'time_step': 0.00946080253784915, 'init_value': -1.107060432434082, 'ave_value': -0.7572360421019094, 'soft_opc': nan} step=2822




2022-04-21 22:08.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.28 [info     ] FQE_20220421220757: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.000152003334229251, 'time_algorithm_update': 0.009703874588012695, 'loss': 0.0024988704381359137, 'time_step': 0.00992180496813303, 'init_value': -1.1474041938781738, 'ave_value': -0.7815654737186861, 'soft_opc': nan} step=2988




2022-04-21 22:08.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.30 [info     ] FQE_20220421220757: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00014801054115755013, 'time_algorithm_update': 0.009712607027536416, 'loss': 0.002427117855579828, 'time_step': 0.009926741381725633, 'init_value': -1.2449681758880615, 'ave_value': -0.8568408180665863, 'soft_opc': nan} step=3154




2022-04-21 22:08.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.32 [info     ] FQE_20220421220757: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.0001522948942988752, 'time_algorithm_update': 0.009643999927015189, 'loss': 0.0029035236902818285, 'time_step': 0.009865539619721562, 'init_value': -1.2986645698547363, 'ave_value': -0.9001717865467072, 'soft_opc': nan} step=3320




2022-04-21 22:08.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.34 [info     ] FQE_20220421220757: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00015239686851041862, 'time_algorithm_update': 0.009016449192920363, 'loss': 0.003177483536826211, 'time_step': 0.009231636323124529, 'init_value': -1.392229676246643, 'ave_value': -0.9749109734487427, 'soft_opc': nan} step=3486




2022-04-21 22:08.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.35 [info     ] FQE_20220421220757: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00015319255461175758, 'time_algorithm_update': 0.009600254426519555, 'loss': 0.0034934634485503614, 'time_step': 0.009819912623210126, 'init_value': -1.4393646717071533, 'ave_value': -1.0055299886376472, 'soft_opc': nan} step=3652




2022-04-21 22:08.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.37 [info     ] FQE_20220421220757: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.000150108911904944, 'time_algorithm_update': 0.009592673864709326, 'loss': 0.003926848870854971, 'time_step': 0.009808668171066836, 'init_value': -1.5177662372589111, 'ave_value': -1.0583263838613355, 'soft_opc': nan} step=3818




2022-04-21 22:08.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.39 [info     ] FQE_20220421220757: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.000151566712253065, 'time_algorithm_update': 0.009123473282319954, 'loss': 0.004143822273724791, 'time_step': 0.009340672607881477, 'init_value': -1.5551691055297852, 'ave_value': -1.0902273160380287, 'soft_opc': nan} step=3984




2022-04-21 22:08.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.40 [info     ] FQE_20220421220757: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.0001506431993231716, 'time_algorithm_update': 0.00891026674983013, 'loss': 0.004401749246283883, 'time_step': 0.009129642003990081, 'init_value': -1.6225181818008423, 'ave_value': -1.1419507710514842, 'soft_opc': nan} step=4150




2022-04-21 22:08.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.42 [info     ] FQE_20220421220757: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00015195737402123142, 'time_algorithm_update': 0.009707564330962768, 'loss': 0.005024211455791549, 'time_step': 0.009926063468657344, 'init_value': -1.7089488506317139, 'ave_value': -1.2125630142318236, 'soft_opc': nan} step=4316




2022-04-21 22:08.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.44 [info     ] FQE_20220421220757: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00015146617429802217, 'time_algorithm_update': 0.009622032383838332, 'loss': 0.005289313660412802, 'time_step': 0.009842477649091238, 'init_value': -1.7496932744979858, 'ave_value': -1.2376638642705238, 'soft_opc': nan} step=4482




2022-04-21 22:08.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.46 [info     ] FQE_20220421220757: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00015042345207857798, 'time_algorithm_update': 0.009700360068355698, 'loss': 0.006005565737022075, 'time_step': 0.009920009647507265, 'init_value': -1.848400592803955, 'ave_value': -1.3021863653047665, 'soft_opc': nan} step=4648




2022-04-21 22:08.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.47 [info     ] FQE_20220421220757: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00014767158462340572, 'time_algorithm_update': 0.009289658213236246, 'loss': 0.006035877623649998, 'time_step': 0.009503044277788645, 'init_value': -1.8992180824279785, 'ave_value': -1.311928814394517, 'soft_opc': nan} step=4814




2022-04-21 22:08.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.49 [info     ] FQE_20220421220757: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00015079975128173828, 'time_algorithm_update': 0.009347662868269956, 'loss': 0.006931871368622699, 'time_step': 0.009568096643470856, 'init_value': -1.939145803451538, 'ave_value': -1.34079659540911, 'soft_opc': nan} step=4980




2022-04-21 22:08.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.51 [info     ] FQE_20220421220757: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.0001540428184601198, 'time_algorithm_update': 0.009651920881616064, 'loss': 0.007278648094471869, 'time_step': 0.009872584457857063, 'init_value': -2.037813663482666, 'ave_value': -1.4114939409601797, 'soft_opc': nan} step=5146




2022-04-21 22:08.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.53 [info     ] FQE_20220421220757: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00014739869588828948, 'time_algorithm_update': 0.009197367243019932, 'loss': 0.007665955374600837, 'time_step': 0.009411627987781203, 'init_value': -2.1340456008911133, 'ave_value': -1.4875529341869527, 'soft_opc': nan} step=5312




2022-04-21 22:08.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.54 [info     ] FQE_20220421220757: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00015078251620373093, 'time_algorithm_update': 0.009535483567111463, 'loss': 0.008322549806255566, 'time_step': 0.009753003177872623, 'init_value': -2.189990520477295, 'ave_value': -1.5212771222935066, 'soft_opc': nan} step=5478




2022-04-21 22:08.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.56 [info     ] FQE_20220421220757: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.0001521742487528238, 'time_algorithm_update': 0.00907728901828628, 'loss': 0.008673604304273894, 'time_step': 0.009298122072794351, 'init_value': -2.204944610595703, 'ave_value': -1.5170723883552595, 'soft_opc': nan} step=5644




2022-04-21 22:08.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:08.58 [info     ] FQE_20220421220757: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.0001549304249774979, 'time_algorithm_update': 0.009727600109146303, 'loss': 0.009125326561996803, 'time_step': 0.00995072686528585, 'init_value': -2.322685480117798, 'ave_value': -1.597099603833379, 'soft_opc': nan} step=5810




2022-04-21 22:08.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:09.00 [info     ] FQE_20220421220757: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.0001556485532278038, 'time_algorithm_update': 0.00963759278676596, 'loss': 0.009975567819825542, 'time_step': 0.009859185620962855, 'init_value': -2.359229326248169, 'ave_value': -1.6295827527572442, 'soft_opc': nan} step=5976




2022-04-21 22:09.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:09.01 [info     ] FQE_20220421220757: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00014892687280494045, 'time_algorithm_update': 0.009636396385100951, 'loss': 0.010891163771122089, 'time_step': 0.009851065026708397, 'init_value': -2.3911614418029785, 'ave_value': -1.6514671209024954, 'soft_opc': nan} step=6142




2022-04-21 22:09.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:09.03 [info     ] FQE_20220421220757: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.0001527702952005777, 'time_algorithm_update': 0.008963431220456779, 'loss': 0.011431098751182365, 'time_step': 0.009184340396559382, 'init_value': -2.556100845336914, 'ave_value': -1.7668052117029827, 'soft_opc': nan} step=6308




2022-04-21 22:09.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:09.05 [info     ] FQE_20220421220757: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00015485573963946607, 'time_algorithm_update': 0.00958794139953981, 'loss': 0.012016467639092211, 'time_step': 0.009811104062091873, 'init_value': -2.6020383834838867, 'ave_value': -1.7943845863546337, 'soft_opc': nan} step=6474




2022-04-21 22:09.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:09.06 [info     ] FQE_20220421220757: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00014618075037577065, 'time_algorithm_update': 0.009304806410548198, 'loss': 0.01303793944443677, 'time_step': 0.009516529290072889, 'init_value': -2.7032225131988525, 'ave_value': -1.8803123906523258, 'soft_opc': nan} step=6640




2022-04-21 22:09.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:09.08 [info     ] FQE_20220421220757: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00015338644923934018, 'time_algorithm_update': 0.00961486546390028, 'loss': 0.013237049824001368, 'time_step': 0.009834828146968979, 'init_value': -2.7968828678131104, 'ave_value': -1.9418741086850295, 'soft_opc': nan} step=6806




2022-04-21 22:09.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:09.10 [info     ] FQE_20220421220757: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00015336346913533038, 'time_algorithm_update': 0.00922417066183435, 'loss': 0.014039756950055507, 'time_step': 0.009445861161473286, 'init_value': -2.8454456329345703, 'ave_value': -1.9868448004395038, 'soft_opc': nan} step=6972




2022-04-21 22:09.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:09.12 [info     ] FQE_20220421220757: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00014953010053519742, 'time_algorithm_update': 0.009554580033543598, 'loss': 0.014386541389997667, 'time_step': 0.009768937007490411, 'init_value': -2.928365707397461, 'ave_value': -2.065122772699541, 'soft_opc': nan} step=7138




2022-04-21 22:09.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:09.13 [info     ] FQE_20220421220757: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00015224893409085562, 'time_algorithm_update': 0.00966956672898258, 'loss': 0.015288483202443403, 'time_step': 0.009887373591043863, 'init_value': -3.030888795852661, 'ave_value': -2.1618067481899046, 'soft_opc': nan} step=7304




2022-04-21 22:09.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:09.15 [info     ] FQE_20220421220757: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.0001572025827614658, 'time_algorithm_update': 0.0097494254629296, 'loss': 0.015942554590600847, 'time_step': 0.009974169443888837, 'init_value': -3.056513786315918, 'ave_value': -2.1558729430979438, 'soft_opc': nan} step=7470




2022-04-21 22:09.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:09.17 [info     ] FQE_20220421220757: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.0001539710056350892, 'time_algorithm_update': 0.009361805686031479, 'loss': 0.01643513148111931, 'time_step': 0.009585060268999582, 'init_value': -3.0354933738708496, 'ave_value': -2.1189202251063812, 'soft_opc': nan} step=7636




2022-04-21 22:09.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:09.19 [info     ] FQE_20220421220757: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00015294408223715173, 'time_algorithm_update': 0.009706547461360335, 'loss': 0.016468258433994916, 'time_step': 0.00992699129035674, 'init_value': -3.0659990310668945, 'ave_value': -2.1178822705471836, 'soft_opc': nan} step=7802




2022-04-21 22:09.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:09.21 [info     ] FQE_20220421220757: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00015437172119875988, 'time_algorithm_update': 0.010013469730515078, 'loss': 0.017305981035762277, 'time_step': 0.01023519613656653, 'init_value': -3.1375832557678223, 'ave_value': -2.15659826026038, 'soft_opc': nan} step=7968




2022-04-21 22:09.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:09.22 [info     ] FQE_20220421220757: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00015288375946412603, 'time_algorithm_update': 0.009846536509961966, 'loss': 0.01790588780612045, 'time_step': 0.010063967072820089, 'init_value': -3.1910247802734375, 'ave_value': -2.2181433763992677, 'soft_opc': nan} step=8134




2022-04-21 22:09.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:09.24 [info     ] FQE_20220421220757: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.0001507308109697089, 'time_algorithm_update': 0.009799128555389771, 'loss': 0.0188056560421183, 'time_step': 0.010017511356307799, 'init_value': -3.233062744140625, 'ave_value': -2.2372058580319085, 'soft_opc': nan} step=8300




2022-04-21 22:09.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220757/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

[ 0.00000000e+00  7.95731469e+08  4.27108923e-02  1.24000047e-02
  1.42999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.34732792e-01  6.00000000e-01  3.37421461e-01]
Read chunk # 39 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.38489108e-01  4.94000047e-02
 -1.56000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -7.49080829e-02  7.04145269e-02]
Read chunk # 40 out of 4999
torch.Size([44400, 6])
2022-04-21 22:09.24 [debug    ] RoundIterator is selected.
2022-04-21 22:09.24 [info     ] Directory is created at d3rlpy_logs/FQE_20220421220924
2022-04-21 22:09.24 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-21 22:09.24 [debug    ] Building models...
2022-04-21 22:09.24 [debug    ] Models have been built.
2022-04-21 22:09.24 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220421220924/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size':

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-21 22:09.28 [info     ] FQE_20220421220924: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00015866825746935467, 'time_algorithm_update': 0.009953372007192568, 'loss': 0.023396324149744456, 'time_step': 0.01018036312835161, 'init_value': -1.0085201263427734, 'ave_value': -1.0395357572750465, 'soft_opc': nan} step=344




2022-04-21 22:09.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:09.32 [info     ] FQE_20220421220924: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.0001555695090182992, 'time_algorithm_update': 0.009940637405528578, 'loss': 0.02078638222003572, 'time_step': 0.010160550821659177, 'init_value': -1.6155436038970947, 'ave_value': -1.6607824326769727, 'soft_opc': nan} step=688




2022-04-21 22:09.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:09.36 [info     ] FQE_20220421220924: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016371593918911245, 'time_algorithm_update': 0.00968123244684796, 'loss': 0.024234864736235765, 'time_step': 0.009911852520565654, 'init_value': -2.3110499382019043, 'ave_value': -2.447865201143531, 'soft_opc': nan} step=1032




2022-04-21 22:09.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:09.40 [info     ] FQE_20220421220924: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00015863360360611316, 'time_algorithm_update': 0.010048877361208894, 'loss': 0.02736018871249588, 'time_step': 0.010277440381604571, 'init_value': -2.6509926319122314, 'ave_value': -2.92006896371777, 'soft_opc': nan} step=1376




2022-04-21 22:09.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:09.43 [info     ] FQE_20220421220924: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.0001554891120555789, 'time_algorithm_update': 0.009664948596510776, 'loss': 0.03381674704250208, 'time_step': 0.00988796353340149, 'init_value': -3.0453739166259766, 'ave_value': -3.486041208994281, 'soft_opc': nan} step=1720




2022-04-21 22:09.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:09.47 [info     ] FQE_20220421220924: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00015456246775250103, 'time_algorithm_update': 0.010057155476060025, 'loss': 0.0378767809245822, 'time_step': 0.010279575059580248, 'init_value': -3.3939146995544434, 'ave_value': -4.002937028530213, 'soft_opc': nan} step=2064




2022-04-21 22:09.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:09.50 [info     ] FQE_20220421220924: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001587881598361703, 'time_algorithm_update': 0.00931838778562324, 'loss': 0.04722011469705247, 'time_step': 0.0095443538455076, 'init_value': -3.8741886615753174, 'ave_value': -4.631295636007646, 'soft_opc': nan} step=2408




2022-04-21 22:09.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:09.54 [info     ] FQE_20220421220924: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00015750319458717523, 'time_algorithm_update': 0.009069717900697575, 'loss': 0.06020588923494743, 'time_step': 0.009294554244640261, 'init_value': -4.166529655456543, 'ave_value': -5.0505969516849065, 'soft_opc': nan} step=2752




2022-04-21 22:09.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:09.57 [info     ] FQE_20220421220924: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00016225423923758574, 'time_algorithm_update': 0.008649144061776094, 'loss': 0.07211226113898636, 'time_step': 0.008880789196768473, 'init_value': -4.570395469665527, 'ave_value': -5.576255890210201, 'soft_opc': nan} step=3096




2022-04-21 22:09.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:10.01 [info     ] FQE_20220421220924: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00015995045040929042, 'time_algorithm_update': 0.008844166994094849, 'loss': 0.09267808660204241, 'time_step': 0.00907281664914863, 'init_value': -5.262691020965576, 'ave_value': -6.301585147892301, 'soft_opc': nan} step=3440




2022-04-21 22:10.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:10.04 [info     ] FQE_20220421220924: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016305335732393487, 'time_algorithm_update': 0.008744720802750698, 'loss': 0.1132416250563205, 'time_step': 0.008976566237072612, 'init_value': -5.95623779296875, 'ave_value': -6.9658138890389925, 'soft_opc': nan} step=3784




2022-04-21 22:10.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:10.07 [info     ] FQE_20220421220924: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.0001584825127623802, 'time_algorithm_update': 0.008879114722096643, 'loss': 0.1412203012164249, 'time_step': 0.009102431147597557, 'init_value': -6.399066925048828, 'ave_value': -7.480376732322547, 'soft_opc': nan} step=4128




2022-04-21 22:10.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:10.10 [info     ] FQE_20220421220924: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00015959005023157872, 'time_algorithm_update': 0.008397745531658793, 'loss': 0.17068861611728925, 'time_step': 0.008623002573501232, 'init_value': -6.839386940002441, 'ave_value': -7.919404480217008, 'soft_opc': nan} step=4472




2022-04-21 22:10.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:10.13 [info     ] FQE_20220421220924: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001568004142406375, 'time_algorithm_update': 0.00787693885869758, 'loss': 0.2111488542611559, 'time_step': 0.008099517849988715, 'init_value': -7.4207258224487305, 'ave_value': -8.508993601423127, 'soft_opc': nan} step=4816




2022-04-21 22:10.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:10.16 [info     ] FQE_20220421220924: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.0001586273659107297, 'time_algorithm_update': 0.008083889650744061, 'loss': 0.24526406656734126, 'time_step': 0.008311131665872972, 'init_value': -7.6248931884765625, 'ave_value': -8.636879621264903, 'soft_opc': nan} step=5160




2022-04-21 22:10.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:10.20 [info     ] FQE_20220421220924: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.000157088041305542, 'time_algorithm_update': 0.008088154155154561, 'loss': 0.2860135990574003, 'time_step': 0.008313733477925146, 'init_value': -8.29017448425293, 'ave_value': -9.253447188007401, 'soft_opc': nan} step=5504




2022-04-21 22:10.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:10.23 [info     ] FQE_20220421220924: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016054095223892566, 'time_algorithm_update': 0.008012602495592694, 'loss': 0.335124128384429, 'time_step': 0.008242028397183085, 'init_value': -8.267921447753906, 'ave_value': -9.28617253251466, 'soft_opc': nan} step=5848




2022-04-21 22:10.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:10.26 [info     ] FQE_20220421220924: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.0001589725183886151, 'time_algorithm_update': 0.00826308200525683, 'loss': 0.36805123333862527, 'time_step': 0.008489872134009074, 'init_value': -8.221064567565918, 'ave_value': -9.2948608073171, 'soft_opc': nan} step=6192




2022-04-21 22:10.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:10.29 [info     ] FQE_20220421220924: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00016031223674153173, 'time_algorithm_update': 0.008011051388674004, 'loss': 0.41545449843262, 'time_step': 0.008240690064984698, 'init_value': -8.576397895812988, 'ave_value': -9.848262092218155, 'soft_opc': nan} step=6536




2022-04-21 22:10.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:10.32 [info     ] FQE_20220421220924: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00016096650167953138, 'time_algorithm_update': 0.008172437895176023, 'loss': 0.4584025590204049, 'time_step': 0.008404918881349786, 'init_value': -8.64477252960205, 'ave_value': -10.165260972984571, 'soft_opc': nan} step=6880




2022-04-21 22:10.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:10.35 [info     ] FQE_20220421220924: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.0001636618791624557, 'time_algorithm_update': 0.008038780716962592, 'loss': 0.5007356229985436, 'time_step': 0.00827175309491712, 'init_value': -9.054203033447266, 'ave_value': -10.722326885881154, 'soft_opc': nan} step=7224




2022-04-21 22:10.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:10.38 [info     ] FQE_20220421220924: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.0001579419124958127, 'time_algorithm_update': 0.008048544096392255, 'loss': 0.5369594437232631, 'time_step': 0.008274479660876962, 'init_value': -8.884746551513672, 'ave_value': -10.839748816428756, 'soft_opc': nan} step=7568




2022-04-21 22:10.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:10.41 [info     ] FQE_20220421220924: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.0001634664313737736, 'time_algorithm_update': 0.008220112600991892, 'loss': 0.5778784605858545, 'time_step': 0.008452423090158506, 'init_value': -9.26974105834961, 'ave_value': -11.39640984586356, 'soft_opc': nan} step=7912




2022-04-21 22:10.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:10.44 [info     ] FQE_20220421220924: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.0001648511997489042, 'time_algorithm_update': 0.00806166266286096, 'loss': 0.6094707622095336, 'time_step': 0.00829519366109094, 'init_value': -9.175472259521484, 'ave_value': -11.587710359843179, 'soft_opc': nan} step=8256




2022-04-21 22:10.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:10.47 [info     ] FQE_20220421220924: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.0001632425674172335, 'time_algorithm_update': 0.008244181095167648, 'loss': 0.6314151519422181, 'time_step': 0.008477635161821232, 'init_value': -9.249115943908691, 'ave_value': -12.125337003017062, 'soft_opc': nan} step=8600




2022-04-21 22:10.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:10.50 [info     ] FQE_20220421220924: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00016268533329631007, 'time_algorithm_update': 0.008060877406319906, 'loss': 0.665587242546513, 'time_step': 0.00829123064529064, 'init_value': -9.334409713745117, 'ave_value': -12.69005539011764, 'soft_opc': nan} step=8944




2022-04-21 22:10.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:10.54 [info     ] FQE_20220421220924: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00015959490177243254, 'time_algorithm_update': 0.008137124915455663, 'loss': 0.6845426269222138, 'time_step': 0.008366736561753029, 'init_value': -9.61136245727539, 'ave_value': -13.209406903070642, 'soft_opc': nan} step=9288




2022-04-21 22:10.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:10.57 [info     ] FQE_20220421220924: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00015991302423698958, 'time_algorithm_update': 0.008167133774868277, 'loss': 0.7063671494346805, 'time_step': 0.008395605309064998, 'init_value': -9.890911102294922, 'ave_value': -13.88246161971107, 'soft_opc': nan} step=9632




2022-04-21 22:10.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:11.00 [info     ] FQE_20220421220924: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00016151195348695267, 'time_algorithm_update': 0.008130499789881151, 'loss': 0.728562586908337, 'time_step': 0.008363330780073654, 'init_value': -9.722440719604492, 'ave_value': -14.067151986461065, 'soft_opc': nan} step=9976




2022-04-21 22:11.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:11.03 [info     ] FQE_20220421220924: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00016341583673344103, 'time_algorithm_update': 0.008214904818423959, 'loss': 0.7443280059378594, 'time_step': 0.008446887482044309, 'init_value': -9.947933197021484, 'ave_value': -14.666836300025786, 'soft_opc': nan} step=10320




2022-04-21 22:11.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:11.06 [info     ] FQE_20220421220924: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.0001634865306144537, 'time_algorithm_update': 0.007748723030090332, 'loss': 0.740718423865389, 'time_step': 0.007981626793395641, 'init_value': -9.885509490966797, 'ave_value': -14.768749979882593, 'soft_opc': nan} step=10664




2022-04-21 22:11.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:11.09 [info     ] FQE_20220421220924: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00015996777734091116, 'time_algorithm_update': 0.008232196403104205, 'loss': 0.7614480090425025, 'time_step': 0.008463351532470348, 'init_value': -10.166950225830078, 'ave_value': -15.31323065194595, 'soft_opc': nan} step=11008




2022-04-21 22:11.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:11.12 [info     ] FQE_20220421220924: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00016204215759454773, 'time_algorithm_update': 0.008050762636716976, 'loss': 0.7842367300980313, 'time_step': 0.008281622515168301, 'init_value': -9.9796142578125, 'ave_value': -15.473457424782284, 'soft_opc': nan} step=11352




2022-04-21 22:11.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:11.15 [info     ] FQE_20220421220924: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00016013480896173523, 'time_algorithm_update': 0.008086552453595538, 'loss': 0.7645156887186648, 'time_step': 0.008316747670949892, 'init_value': -10.442489624023438, 'ave_value': -16.270552137338868, 'soft_opc': nan} step=11696




2022-04-21 22:11.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:11.18 [info     ] FQE_20220421220924: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.000162126713020857, 'time_algorithm_update': 0.008212553900341655, 'loss': 0.7714789755599096, 'time_step': 0.008444003587545351, 'init_value': -10.794670104980469, 'ave_value': -16.918086089166973, 'soft_opc': nan} step=12040




2022-04-21 22:11.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:11.21 [info     ] FQE_20220421220924: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00016289672186208326, 'time_algorithm_update': 0.008117418649584748, 'loss': 0.7528943590235052, 'time_step': 0.008349379827809889, 'init_value': -10.729101181030273, 'ave_value': -17.107772074646928, 'soft_opc': nan} step=12384




2022-04-21 22:11.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:11.24 [info     ] FQE_20220421220924: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016505912292835324, 'time_algorithm_update': 0.008266381053037421, 'loss': 0.756755821168596, 'time_step': 0.008501617021338885, 'init_value': -11.005329132080078, 'ave_value': -17.631908584901694, 'soft_opc': nan} step=12728




2022-04-21 22:11.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:11.27 [info     ] FQE_20220421220924: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00016241780547208564, 'time_algorithm_update': 0.008072731799857562, 'loss': 0.7298135199140064, 'time_step': 0.008303952078486597, 'init_value': -10.775896072387695, 'ave_value': -17.5940854456024, 'soft_opc': nan} step=13072




2022-04-21 22:11.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:11.31 [info     ] FQE_20220421220924: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016228542771450308, 'time_algorithm_update': 0.008197154416594394, 'loss': 0.7240593361473361, 'time_step': 0.00843254563420318, 'init_value': -10.741779327392578, 'ave_value': -17.906171711336672, 'soft_opc': nan} step=13416




2022-04-21 22:11.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:11.34 [info     ] FQE_20220421220924: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00016359811605409135, 'time_algorithm_update': 0.008009312457816546, 'loss': 0.6962025609939511, 'time_step': 0.008244669714639353, 'init_value': -10.195962905883789, 'ave_value': -17.72311157408246, 'soft_opc': nan} step=13760




2022-04-21 22:11.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:11.37 [info     ] FQE_20220421220924: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00016678072685419128, 'time_algorithm_update': 0.00810477899950604, 'loss': 0.7003152696957249, 'time_step': 0.008343698673470075, 'init_value': -10.688549041748047, 'ave_value': -18.452435006445423, 'soft_opc': nan} step=14104




2022-04-21 22:11.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:11.40 [info     ] FQE_20220421220924: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.0001634865306144537, 'time_algorithm_update': 0.00828795308290526, 'loss': 0.6675361765786832, 'time_step': 0.00852133922798689, 'init_value': -10.883245468139648, 'ave_value': -18.75385163545751, 'soft_opc': nan} step=14448




2022-04-21 22:11.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:11.43 [info     ] FQE_20220421220924: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00016318226969519326, 'time_algorithm_update': 0.008100596278212791, 'loss': 0.6746472521620088, 'time_step': 0.00833443569582562, 'init_value': -10.90928840637207, 'ave_value': -18.983658881173344, 'soft_opc': nan} step=14792




2022-04-21 22:11.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:11.46 [info     ] FQE_20220421220924: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00016335623208866564, 'time_algorithm_update': 0.008190820383471112, 'loss': 0.6707108256266301, 'time_step': 0.008421307386353959, 'init_value': -11.249740600585938, 'ave_value': -19.5966715849785, 'soft_opc': nan} step=15136




2022-04-21 22:11.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:11.49 [info     ] FQE_20220421220924: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016170324281204577, 'time_algorithm_update': 0.008034643045691557, 'loss': 0.6681825081698683, 'time_step': 0.008264089739599894, 'init_value': -11.37925910949707, 'ave_value': -19.766862317465396, 'soft_opc': nan} step=15480




2022-04-21 22:11.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:11.52 [info     ] FQE_20220421220924: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00016258345093838003, 'time_algorithm_update': 0.008055917052335517, 'loss': 0.6519078974135567, 'time_step': 0.00828782694284306, 'init_value': -11.72245979309082, 'ave_value': -20.19029285868479, 'soft_opc': nan} step=15824




2022-04-21 22:11.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:11.55 [info     ] FQE_20220421220924: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016123264334922614, 'time_algorithm_update': 0.008210455262383749, 'loss': 0.6311658086839976, 'time_step': 0.008442355449809584, 'init_value': -11.497644424438477, 'ave_value': -20.022385230423414, 'soft_opc': nan} step=16168




2022-04-21 22:11.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:11.58 [info     ] FQE_20220421220924: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00016201096911763036, 'time_algorithm_update': 0.008053454548813576, 'loss': 0.6347841541174539, 'time_step': 0.008282944213512331, 'init_value': -11.579259872436523, 'ave_value': -20.108333876356483, 'soft_opc': nan} step=16512




2022-04-21 22:11.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:12.02 [info     ] FQE_20220421220924: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016415673632954442, 'time_algorithm_update': 0.008340875076693158, 'loss': 0.619002693830881, 'time_step': 0.00857798928438231, 'init_value': -11.493860244750977, 'ave_value': -19.98336759955404, 'soft_opc': nan} step=16856




2022-04-21 22:12.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:12.05 [info     ] FQE_20220421220924: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.0001610614532648131, 'time_algorithm_update': 0.00814933970917103, 'loss': 0.616037531604254, 'time_step': 0.00837958482808845, 'init_value': -11.674440383911133, 'ave_value': -20.095206616532796, 'soft_opc': nan} step=17200




2022-04-21 22:12.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421220924/model_17200.pt
most optimal hyper params for cql at this point:  [0.0031407135725740544, 0.0013820489635677539, 7.31673811873202e-05, 1]
search iteration:  1
using hyper params:  [0.007303350770612616, 0.0023988627208773256, 6.690946072949238e-05, 1]
2022-04-21 22:12.05 [debug    ] RoundIterator is selected.
2022-04-21 22:12.05 [info     ] Directory is created at d3rlpy_logs/CQL_20220421221205
2022-04-21 22:12.05 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-21 22:12.05 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-21 22:12.05 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220421221205/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': Tr

  minimum = torch.tensor(
  maximum = torch.tensor(


Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:12.23 [info     ] CQL_20220421221205: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.00030564159327159726, 'time_algorithm_update': 0.05099182252939037, 'temp_loss': 4.925417906976159, 'temp': 0.9881420869358702, 'alpha_loss': -17.67719509146806, 'alpha': 1.0176995906526642, 'critic_loss': 27.01664113171528, 'actor_loss': -1.929096462624955, 'time_step': 0.05137903359584037, 'td_error': 1.2178327434845597, 'init_value': 0.19635266065597534, 'ave_value': 0.32662067912773335} step=346
2022-04-21 22:12.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:12.42 [info     ] CQL_20220421221205: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.0003071341211396146, 'time_algorithm_update': 0.050853735449686216, 'temp_loss': 4.871445065977945, 'temp': 0.9654759981728702, 'alpha_loss': -18.33854184123133, 'alpha': 1.0541548697934675, 'critic_loss': 29.964773398603317, 'actor_loss': -1.7629256934099804, 'time_step': 0.051243409256025545, 'td_error': 1.2099875249039913, 'init_value': 0.09369026869535446, 'ave_value': 0.3285440744103527} step=692
2022-04-21 22:12.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:13.00 [info     ] CQL_20220421221205: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.00029938620638985167, 'time_algorithm_update': 0.050652464690235995, 'temp_loss': 4.763547716802255, 'temp': 0.9437121471573162, 'alpha_loss': -19.005374450904096, 'alpha': 1.0924357010449977, 'critic_loss': 38.50344062540572, 'actor_loss': -1.3476583837084688, 'time_step': 0.0510384802184353, 'td_error': 1.2048280494908268, 'init_value': -0.2008177936077118, 'ave_value': 0.07080691879276926} step=1038
2022-04-21 22:13.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:13.18 [info     ] CQL_20220421221205: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.0002927139315301972, 'time_algorithm_update': 0.049926739896653013, 'temp_loss': 4.658557642401988, 'temp': 0.9226479068656878, 'alpha_loss': -19.705599746263097, 'alpha': 1.1326081552946499, 'critic_loss': 48.44432989572514, 'actor_loss': -0.9461793302628346, 'time_step': 0.050300334919394785, 'td_error': 1.204372931275188, 'init_value': -0.6590685844421387, 'ave_value': -0.35400652239013475} step=1384
2022-04-21 22:13.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:13.37 [info     ] CQL_20220421221205: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.0003043957528351359, 'time_algorithm_update': 0.05079243775737079, 'temp_loss': 4.555722324834393, 'temp': 0.9022158937302628, 'alpha_loss': -20.437235054942224, 'alpha': 1.1747159620240935, 'critic_loss': 60.13356847156679, 'actor_loss': -0.46381201051347865, 'time_step': 0.0511816161216339, 'td_error': 1.202533185777604, 'init_value': -1.039546251296997, 'ave_value': -0.681584421360504} step=1730
2022-04-21 22:13.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:13.56 [info     ] CQL_20220421221205: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.00031140291621919314, 'time_algorithm_update': 0.05309189192821525, 'temp_loss': 4.455957893691311, 'temp': 0.8823646901315347, 'alpha_loss': -21.20723748069278, 'alpha': 1.2187925518592657, 'critic_loss': 73.16808319091797, 'actor_loss': 0.03043875415847136, 'time_step': 0.05348894637444116, 'td_error': 1.2049035522405551, 'init_value': -1.3828036785125732, 'ave_value': -0.9798030800661695} step=2076
2022-04-21 22:13.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:14.15 [info     ] CQL_20220421221205: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.0002948893287967395, 'time_algorithm_update': 0.052590214448168096, 'temp_loss': 4.359162067402305, 'temp': 0.863053118390155, 'alpha_loss': -22.008059286657787, 'alpha': 1.2648787743094339, 'critic_loss': 87.96718906115935, 'actor_loss': 0.432785715627877, 'time_step': 0.052964723179106075, 'td_error': 1.2063680431501846, 'init_value': -1.6086746454238892, 'ave_value': -1.2049422262247957} step=2422
2022-04-21 22:14.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:14.35 [info     ] CQL_20220421221205: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.00030942183698532896, 'time_algorithm_update': 0.05362696316889945, 'temp_loss': 4.265131973806834, 'temp': 0.8442391182990433, 'alpha_loss': -22.84208619663481, 'alpha': 1.3130035534759477, 'critic_loss': 105.0628172372807, 'actor_loss': 0.8071462835879685, 'time_step': 0.05402124410419795, 'td_error': 1.2111349408149752, 'init_value': -1.701387882232666, 'ave_value': -1.378560196523872} step=2768
2022-04-21 22:14.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:14.55 [info     ] CQL_20220421221205: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.00030968506212179374, 'time_algorithm_update': 0.055243876628103976, 'temp_loss': 4.171331610982818, 'temp': 0.8259035207632649, 'alpha_loss': -23.71351302703681, 'alpha': 1.3632125068951204, 'critic_loss': 125.36428627940272, 'actor_loss': 0.9738758347627056, 'time_step': 0.05563607381258397, 'td_error': 1.2079211021600296, 'init_value': -1.8522875308990479, 'ave_value': -1.4788786028546708} step=3114
2022-04-21 22:14.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:15.16 [info     ] CQL_20220421221205: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.0003058986167687212, 'time_algorithm_update': 0.056497066007184155, 'temp_loss': 4.081360297396004, 'temp': 0.8080165965019623, 'alpha_loss': -24.62236177852388, 'alpha': 1.41554939953578, 'critic_loss': 152.14057362286343, 'actor_loss': 0.8428289237394498, 'time_step': 0.05688761217745742, 'td_error': 1.2086171655841236, 'init_value': -1.7793259620666504, 'ave_value': -1.4144787774147045} step=3460
2022-04-21 22:15.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:15.36 [info     ] CQL_20220421221205: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.00030394992387363675, 'time_algorithm_update': 0.05667763569451481, 'temp_loss': 3.9927968723925553, 'temp': 0.7905590485975232, 'alpha_loss': -25.57366761995878, 'alpha': 1.4700740258817728, 'critic_loss': 186.7895860175866, 'actor_loss': 0.41388304705995355, 'time_step': 0.05706457113254966, 'td_error': 1.2117160627894836, 'init_value': -1.290016770362854, 'ave_value': -0.9882743842740825} step=3806
2022-04-21 22:15.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:15.57 [info     ] CQL_20220421221205: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.00030775083971850445, 'time_algorithm_update': 0.05650661308641378, 'temp_loss': 3.9064603161949645, 'temp': 0.7735100424703146, 'alpha_loss': -26.553792749526185, 'alpha': 1.5268393916890801, 'critic_loss': 229.98687633889259, 'actor_loss': -0.22491229574782828, 'time_step': 0.056900545351767126, 'td_error': 1.2174430166300363, 'init_value': -0.7073578834533691, 'ave_value': -0.4787905014171709} step=4152
2022-04-21 22:15.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:16.18 [info     ] CQL_20220421221205: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.00030084703699012714, 'time_algorithm_update': 0.056945877957206244, 'temp_loss': 3.8223590023944833, 'temp': 0.7568518607258108, 'alpha_loss': -27.58013432012128, 'alpha': 1.5859048494024772, 'critic_loss': 278.699157494341, 'actor_loss': -1.0034780262867151, 'time_step': 0.05733163163841115, 'td_error': 1.2241259843104306, 'init_value': 0.028794169425964355, 'ave_value': 0.17304915643631422} step=4498
2022-04-21 22:16.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:16.38 [info     ] CQL_20220421221205: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.00030001739546053674, 'time_algorithm_update': 0.05634458698978314, 'temp_loss': 3.7403716972108523, 'temp': 0.7405692099491296, 'alpha_loss': -28.64840813454865, 'alpha': 1.6473492980003357, 'critic_loss': 329.70298678888753, 'actor_loss': -1.6581218948943077, 'time_step': 0.056729664692299904, 'td_error': 1.2291477191719613, 'init_value': 0.5455212593078613, 'ave_value': 0.633267049435588} step=4844
2022-04-21 22:16.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:16.59 [info     ] CQL_20220421221205: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.0002957437768836931, 'time_algorithm_update': 0.056074821880098026, 'temp_loss': 3.6601674660092836, 'temp': 0.7246517918013424, 'alpha_loss': -29.758613145420316, 'alpha': 1.7112526369921734, 'critic_loss': 378.2822232108585, 'actor_loss': -2.2080744198291975, 'time_step': 0.0564529482339848, 'td_error': 1.2321195848015383, 'init_value': 1.067307949066162, 'ave_value': 1.1207049102057434} step=5190
2022-04-21 22:16.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:17.19 [info     ] CQL_20220421221205: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.00030506828616809294, 'time_algorithm_update': 0.05612148714892437, 'temp_loss': 3.580523957406854, 'temp': 0.7090870810381938, 'alpha_loss': -30.913821793705054, 'alpha': 1.7776902076825931, 'critic_loss': 426.050519028151, 'actor_loss': -2.683566616449742, 'time_step': 0.0565089772891447, 'td_error': 1.2338826426811424, 'init_value': 1.5584686994552612, 'ave_value': 1.58983991308877} step=5536
2022-04-21 22:17.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:17.40 [info     ] CQL_20220421221205: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.0003056271227798021, 'time_algorithm_update': 0.05651016042411672, 'temp_loss': 3.5045467449750514, 'temp': 0.6938657693435691, 'alpha_loss': -32.11489271704172, 'alpha': 1.8467524451327462, 'critic_loss': 476.95101126080993, 'actor_loss': -3.1361184457823033, 'time_step': 0.05690219154247659, 'td_error': 1.2346121050108134, 'init_value': 1.9865391254425049, 'ave_value': 2.0070269203231255} step=5882
2022-04-21 22:17.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:18.00 [info     ] CQL_20220421221205: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.00030232371622427353, 'time_algorithm_update': 0.05574814156989831, 'temp_loss': 3.429415020639497, 'temp': 0.6789735903629678, 'alpha_loss': -33.36000257282588, 'alpha': 1.9185322292278268, 'critic_loss': 536.2468195567931, 'actor_loss': -3.551741433970501, 'time_step': 0.056131525535804, 'td_error': 1.2354725305628766, 'init_value': 2.417151689529419, 'ave_value': 2.431166549133655} step=6228
2022-04-21 22:18.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:18.20 [info     ] CQL_20220421221205: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.00030500213534845783, 'time_algorithm_update': 0.05635462124223654, 'temp_loss': 3.3551152462215095, 'temp': 0.6644087218480303, 'alpha_loss': -34.65750055919493, 'alpha': 1.9931297939636803, 'critic_loss': 603.326069208928, 'actor_loss': -3.949672964266959, 'time_step': 0.05673997526223949, 'td_error': 1.235670305475029, 'init_value': 2.791334867477417, 'ave_value': 2.8031020457171443} step=6574
2022-04-21 22:18.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:18.41 [info     ] CQL_20220421221205: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.00030441504682419615, 'time_algorithm_update': 0.05604612689486818, 'temp_loss': 3.2830774577366824, 'temp': 0.6501586690458948, 'alpha_loss': -36.00233786919213, 'alpha': 2.0706461578435293, 'critic_loss': 677.9444488348988, 'actor_loss': -4.301504452104513, 'time_step': 0.05643633542033289, 'td_error': 1.236736182419548, 'init_value': 3.193986654281616, 'ave_value': 3.2019179486957077} step=6920
2022-04-21 22:18.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:19.01 [info     ] CQL_20220421221205: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.0003090710998270553, 'time_algorithm_update': 0.05618568031774091, 'temp_loss': 3.2126178066165463, 'temp': 0.6362175376429035, 'alpha_loss': -37.40628394639561, 'alpha': 2.1511990672591104, 'critic_loss': 761.2445359422982, 'actor_loss': -4.619797933997447, 'time_step': 0.05658024790659116, 'td_error': 1.236900710891287, 'init_value': 3.4907867908477783, 'ave_value': 3.496172439786369} step=7266
2022-04-21 22:19.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:19.22 [info     ] CQL_20220421221205: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.0003108116932687043, 'time_algorithm_update': 0.05615468728059978, 'temp_loss': 3.1439005818670194, 'temp': 0.6225773919524485, 'alpha_loss': -38.863066855193566, 'alpha': 2.2349003767002524, 'critic_loss': 856.0256815122042, 'actor_loss': -4.916671568258649, 'time_step': 0.05654921765961399, 'td_error': 1.2379048510980135, 'init_value': 3.825350284576416, 'ave_value': 3.833222596088028} step=7612
2022-04-21 22:19.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:19.43 [info     ] CQL_20220421221205: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.00030418765338170045, 'time_algorithm_update': 0.0574436084383485, 'temp_loss': 3.0766169632101334, 'temp': 0.6092301601963925, 'alpha_loss': -40.37413339118737, 'alpha': 2.3218711028898382, 'critic_loss': 956.6118855559068, 'actor_loss': -5.206557629425402, 'time_step': 0.05783216181518026, 'td_error': 1.2389318675723582, 'init_value': 4.144209861755371, 'ave_value': 4.151649553462644} step=7958
2022-04-21 22:19.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:20.03 [info     ] CQL_20220421221205: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.0002983415746964471, 'time_algorithm_update': 0.057070320741289614, 'temp_loss': 3.0105337056121386, 'temp': 0.5961687719890837, 'alpha_loss': -41.942432072810355, 'alpha': 2.4122237056666025, 'critic_loss': 1067.3547128666344, 'actor_loss': -5.475689974823439, 'time_step': 0.057445255318128996, 'td_error': 1.2399012877101134, 'init_value': 4.426551818847656, 'ave_value': 4.434157730403699} step=8304
2022-04-21 22:20.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:20.24 [info     ] CQL_20220421221205: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.0003024567069345816, 'time_algorithm_update': 0.057314292543885335, 'temp_loss': 2.947081155170595, 'temp': 0.583388264124104, 'alpha_loss': -43.577946062032886, 'alpha': 2.5061014587479518, 'critic_loss': 1184.5030676340093, 'actor_loss': -5.734880976594252, 'time_step': 0.057691181326188105, 'td_error': 1.2410706786018861, 'init_value': 4.709503650665283, 'ave_value': 4.712874366299922} step=8650
2022-04-21 22:20.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:20.45 [info     ] CQL_20220421221205: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.00030447775228864196, 'time_algorithm_update': 0.05748710880389792, 'temp_loss': 2.8836060383416324, 'temp': 0.5708803655095183, 'alpha_loss': -45.273836345341856, 'alpha': 2.603646795873697, 'critic_loss': 1311.7608092203307, 'actor_loss': -5.98901447533183, 'time_step': 0.057871674526633554, 'td_error': 1.2426294217011897, 'init_value': 5.001988887786865, 'ave_value': 5.006813073418275} step=8996
2022-04-21 22:20.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:21.06 [info     ] CQL_20220421221205: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.0003063237735990844, 'time_algorithm_update': 0.05770273566935104, 'temp_loss': 2.821629910799809, 'temp': 0.5586422066812571, 'alpha_loss': -47.03132128853329, 'alpha': 2.704985099720817, 'critic_loss': 1450.3589311721007, 'actor_loss': -6.21797997965289, 'time_step': 0.05808734755984621, 'td_error': 1.243899954562427, 'init_value': 5.250080585479736, 'ave_value': 5.254407520551939} step=9342
2022-04-21 22:21.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:21.27 [info     ] CQL_20220421221205: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.00030191578616985696, 'time_algorithm_update': 0.05723393307944943, 'temp_loss': 2.760853465581905, 'temp': 0.5466687028118641, 'alpha_loss': -48.86624598640927, 'alpha': 2.8102755498334853, 'critic_loss': 1590.6975062375814, 'actor_loss': -6.441464781072098, 'time_step': 0.05761363740601291, 'td_error': 1.2452048563511988, 'init_value': 5.4880194664001465, 'ave_value': 5.49199295984506} step=9688
2022-04-21 22:21.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:21.47 [info     ] CQL_20220421221205: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.00030170079600604283, 'time_algorithm_update': 0.05480707794255604, 'temp_loss': 2.701856024003442, 'temp': 0.534950647567738, 'alpha_loss': -50.76562346750601, 'alpha': 2.9196729956334724, 'critic_loss': 1741.105770750542, 'actor_loss': -6.641422191796275, 'time_step': 0.055185382765841624, 'td_error': 1.2457121069088697, 'init_value': 5.639052867889404, 'ave_value': 5.644748296823588} step=10034
2022-04-21 22:21.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:22.07 [info     ] CQL_20220421221205: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.0003047664730535077, 'time_algorithm_update': 0.054378266279408005, 'temp_loss': 2.644076073789872, 'temp': 0.5234847373700555, 'alpha_loss': -52.75155351892372, 'alpha': 3.033338552265498, 'critic_loss': 1903.7719645417494, 'actor_loss': -6.800904267096106, 'time_step': 0.05476105213165283, 'td_error': 1.2467302146887305, 'init_value': 5.810845851898193, 'ave_value': 5.815205406000853} step=10380
2022-04-21 22:22.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:22.27 [info     ] CQL_20220421221205: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.0003067985435441739, 'time_algorithm_update': 0.0547203996967029, 'temp_loss': 2.587283323265914, 'temp': 0.5122647213108967, 'alpha_loss': -54.79923751037245, 'alpha': 3.1514304132130793, 'critic_loss': 2065.877206791343, 'actor_loss': -6.949131553572727, 'time_step': 0.05510844867353495, 'td_error': 1.2482309969220242, 'init_value': 6.002330780029297, 'ave_value': 6.007044796516342} step=10726
2022-04-21 22:22.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:22.47 [info     ] CQL_20220421221205: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.0003081642823412239, 'time_algorithm_update': 0.05444867349084402, 'temp_loss': 2.531462369626657, 'temp': 0.5012853425534475, 'alpha_loss': -56.93377750595181, 'alpha': 3.2741161591744836, 'critic_loss': 2212.434824331647, 'actor_loss': -7.1136068539812385, 'time_step': 0.05483875316002466, 'td_error': 1.2497742335932147, 'init_value': 6.205573081970215, 'ave_value': 6.20828506592723} step=11072
2022-04-21 22:22.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:23.07 [info     ] CQL_20220421221205: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.00030398575556760577, 'time_algorithm_update': 0.05480469375676503, 'temp_loss': 2.4773482380574836, 'temp': 0.490542172782683, 'alpha_loss': -59.140839119178025, 'alpha': 3.40157011134087, 'critic_loss': 2346.509793143741, 'actor_loss': -7.276413746651887, 'time_step': 0.05518943243633116, 'td_error': 1.251300899254623, 'init_value': 6.395829677581787, 'ave_value': 6.39937082637029} step=11418
2022-04-21 22:23.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:23.27 [info     ] CQL_20220421221205: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.0003067551320687884, 'time_algorithm_update': 0.0548915656315798, 'temp_loss': 2.4240975290364615, 'temp': 0.48002841715523276, 'alpha_loss': -61.45419754733929, 'alpha': 3.5339942660634915, 'critic_loss': 2452.7134544063856, 'actor_loss': -7.429682763325686, 'time_step': 0.055278598228630994, 'td_error': 1.2524679867512132, 'init_value': 6.5525689125061035, 'ave_value': 6.555046908422463} step=11764
2022-04-21 22:23.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:23.46 [info     ] CQL_20220421221205: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.0002989458899966554, 'time_algorithm_update': 0.05458716850060259, 'temp_loss': 2.372661904103494, 'temp': 0.4697409743346231, 'alpha_loss': -63.83784440762735, 'alpha': 3.671570078485963, 'critic_loss': 2566.8737969370936, 'actor_loss': -7.582246573674197, 'time_step': 0.054967284202575684, 'td_error': 1.2544875992667992, 'init_value': 6.77107572555542, 'ave_value': 6.772379317132146} step=12110
2022-04-21 22:23.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:24.06 [info     ] CQL_20220421221205: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.00030648019272467994, 'time_algorithm_update': 0.05531028309309414, 'temp_loss': 2.321196188127374, 'temp': 0.45967253930651386, 'alpha_loss': -66.33152872427351, 'alpha': 3.814503802040409, 'critic_loss': 2680.074820634258, 'actor_loss': -7.709139906602099, 'time_step': 0.05569698424697611, 'td_error': 1.2552646505183358, 'init_value': 6.875011444091797, 'ave_value': 6.87616648314518} step=12456
2022-04-21 22:24.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:24.27 [info     ] CQL_20220421221205: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.00030545692223344923, 'time_algorithm_update': 0.05537761429141712, 'temp_loss': 2.2717313614883863, 'temp': 0.44982007249242306, 'alpha_loss': -68.9168526754214, 'alpha': 3.9630041163780785, 'critic_loss': 2838.4045318427115, 'actor_loss': -7.7943604888254505, 'time_step': 0.05576207389721292, 'td_error': 1.255778270307671, 'init_value': 6.963301658630371, 'ave_value': 6.964806122687147} step=12802
2022-04-21 22:24.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:24.46 [info     ] CQL_20220421221205: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.00029740581622702536, 'time_algorithm_update': 0.05415578660248332, 'temp_loss': 2.2228543792845885, 'temp': 0.44017960062261263, 'alpha_loss': -71.59931548895864, 'alpha': 4.117295345129994, 'critic_loss': 3001.394218665327, 'actor_loss': -7.894247992190323, 'time_step': 0.05453261474653476, 'td_error': 1.2563363121967652, 'init_value': 7.0579705238342285, 'ave_value': 7.060609732990075} step=13148
2022-04-21 22:24.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:25.06 [info     ] CQL_20220421221205: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.00031291818343146, 'time_algorithm_update': 0.05487721366000313, 'temp_loss': 2.175256012492097, 'temp': 0.4307460932201044, 'alpha_loss': -74.37722687914193, 'alpha': 4.277582284343036, 'critic_loss': 3184.4587444680274, 'actor_loss': -8.005003927760042, 'time_step': 0.0552703238636083, 'td_error': 1.2587029226132642, 'init_value': 7.258182048797607, 'ave_value': 7.258762860512948} step=13494
2022-04-21 22:25.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:25.26 [info     ] CQL_20220421221205: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.0003124062036503257, 'time_algorithm_update': 0.05422042491119032, 'temp_loss': 2.1288323719377464, 'temp': 0.4215141018863358, 'alpha_loss': -77.27622392687495, 'alpha': 4.44409758644986, 'critic_loss': 3363.388751608788, 'actor_loss': -8.107007845288756, 'time_step': 0.05461720924156939, 'td_error': 1.2593516467700563, 'init_value': 7.353084564208984, 'ave_value': 7.354650328180812} step=13840
2022-04-21 22:25.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:25.46 [info     ] CQL_20220421221205: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.00030908625938988834, 'time_algorithm_update': 0.05403848879599158, 'temp_loss': 2.0831657117501847, 'temp': 0.41247906376516197, 'alpha_loss': -80.28398244780612, 'alpha': 4.617110142129005, 'critic_loss': 3478.551637153405, 'actor_loss': -8.212609213900704, 'time_step': 0.05443067840069016, 'td_error': 1.2595341421479171, 'init_value': 7.412573337554932, 'ave_value': 7.414765777967732} step=14186
2022-04-21 22:25.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:26.05 [info     ] CQL_20220421221205: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.00030613083370848197, 'time_algorithm_update': 0.05412716741506764, 'temp_loss': 2.038548615626517, 'temp': 0.40363838183397505, 'alpha_loss': -83.40808886048421, 'alpha': 4.796853241892908, 'critic_loss': 3572.1776045429915, 'actor_loss': -8.332451371099218, 'time_step': 0.0545145252536487, 'td_error': 1.2622998940408952, 'init_value': 7.632516860961914, 'ave_value': 7.633180624435953} step=14532
2022-04-21 22:26.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:26.25 [info     ] CQL_20220421221205: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.00031049196430713456, 'time_algorithm_update': 0.054232138429763, 'temp_loss': 1.9945445746355663, 'temp': 0.39498824268751753, 'alpha_loss': -86.66190468231378, 'alpha': 4.983592223569837, 'critic_loss': 3471.5513965408236, 'actor_loss': -8.518943607462624, 'time_step': 0.05463037463281885, 'td_error': 1.2647008210725486, 'init_value': 7.842134952545166, 'ave_value': 7.841545390585352} step=14878
2022-04-21 22:26.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:26.45 [info     ] CQL_20220421221205: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.00030842819654872654, 'time_algorithm_update': 0.05423122265435368, 'temp_loss': 1.952428701985089, 'temp': 0.38652195406786966, 'alpha_loss': -90.03967820977888, 'alpha': 5.177607011243787, 'critic_loss': 3141.893990048094, 'actor_loss': -8.780461567674758, 'time_step': 0.054622650146484375, 'td_error': 1.267916220934867, 'init_value': 8.12606143951416, 'ave_value': 8.124717063189344} step=15224
2022-04-21 22:26.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:27.04 [info     ] CQL_20220421221205: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.0003045190965509139, 'time_algorithm_update': 0.052980172840846065, 'temp_loss': 1.910215793317453, 'temp': 0.37823655210822993, 'alpha_loss': -93.54226618419493, 'alpha': 5.379165923664336, 'critic_loss': 2885.6598429597184, 'actor_loss': -8.986199494731219, 'time_step': 0.053365572339537516, 'td_error': 1.270393397787079, 'init_value': 8.34991455078125, 'ave_value': 8.34821444611348} step=15570
2022-04-21 22:27.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:27.23 [info     ] CQL_20220421221205: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.00030970022168462676, 'time_algorithm_update': 0.052963839790035536, 'temp_loss': 1.869355115242776, 'temp': 0.3701295891421379, 'alpha_loss': -97.18849038802131, 'alpha': 5.588571854409455, 'critic_loss': 2677.304654336389, 'actor_loss': -9.20403321767818, 'time_step': 0.05335950989254637, 'td_error': 1.2727216763609197, 'init_value': 8.567070007324219, 'ave_value': 8.565770367006941} step=15916
2022-04-21 22:27.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:27.43 [info     ] CQL_20220421221205: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.00030777633534690547, 'time_algorithm_update': 0.05316616069374746, 'temp_loss': 1.8291965461190725, 'temp': 0.362197250386194, 'alpha_loss': -100.96622078956207, 'alpha': 5.806134057182797, 'critic_loss': 2431.6144793736453, 'actor_loss': -9.448513039274712, 'time_step': 0.053557546841615886, 'td_error': 1.2769207765893955, 'init_value': 8.883816719055176, 'ave_value': 8.88233364542947} step=16262
2022-04-21 22:27.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:28.02 [info     ] CQL_20220421221205: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.000306212833161988, 'time_algorithm_update': 0.0534416003034294, 'temp_loss': 1.7900838455712864, 'temp': 0.35443458443432185, 'alpha_loss': -104.89588310815006, 'alpha': 6.032155994735012, 'critic_loss': 2255.5319788938314, 'actor_loss': -9.655234469154667, 'time_step': 0.05383238213599762, 'td_error': 1.2786471374513226, 'init_value': 9.053744316101074, 'ave_value': 9.052833880717973} step=16608
2022-04-21 22:28.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:28.22 [info     ] CQL_20220421221205: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.00030245188343731654, 'time_algorithm_update': 0.053197350805205414, 'temp_loss': 1.7519113369759796, 'temp': 0.3468384455049658, 'alpha_loss': -108.98070261519769, 'alpha': 6.266985607974102, 'critic_loss': 2108.28976705033, 'actor_loss': -9.878842428240473, 'time_step': 0.05357698967002031, 'td_error': 1.282445887555699, 'init_value': 9.335476875305176, 'ave_value': 9.333852463302142} step=16954
2022-04-21 22:28.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:28.41 [info     ] CQL_20220421221205: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.0003096030626682877, 'time_algorithm_update': 0.05408075090088596, 'temp_loss': 1.7139382796480476, 'temp': 0.3394065190085097, 'alpha_loss': -113.21935552255266, 'alpha': 6.510960504498785, 'critic_loss': 2018.9374827125857, 'actor_loss': -10.074974795986462, 'time_step': 0.0544657845028563, 'td_error': 1.284579598609304, 'init_value': 9.521822929382324, 'ave_value': 9.521477645966272} step=17300
2022-04-21 22:28.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421221205/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.519100

Epoch 1/50:   0%|          | 0/177 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-21 22:28.43 [info     ] FQE_20220421222841: epoch=1 step=177 epoch=1 metrics={'time_sample_batch': 0.00015260540159408656, 'time_algorithm_update': 0.00845317786696267, 'loss': 0.006336980137584657, 'time_step': 0.008670860764670506, 'init_value': -0.2699570655822754, 'ave_value': -0.2331114131632868, 'soft_opc': nan} step=177




2022-04-21 22:28.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_177.pt


Epoch 2/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:28.45 [info     ] FQE_20220421222841: epoch=2 step=354 epoch=2 metrics={'time_sample_batch': 0.0001533031463623047, 'time_algorithm_update': 0.007970842264466366, 'loss': 0.004243861570149281, 'time_step': 0.008191799713393389, 'init_value': -0.36215445399284363, 'ave_value': -0.2892594243581589, 'soft_opc': nan} step=354




2022-04-21 22:28.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_354.pt


Epoch 3/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:28.46 [info     ] FQE_20220421222841: epoch=3 step=531 epoch=3 metrics={'time_sample_batch': 0.00014921366158178296, 'time_algorithm_update': 0.008350375008448369, 'loss': 0.003694747517406309, 'time_step': 0.008565609064479332, 'init_value': -0.4918309450149536, 'ave_value': -0.38244417295441613, 'soft_opc': nan} step=531




2022-04-21 22:28.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_531.pt


Epoch 4/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:28.48 [info     ] FQE_20220421222841: epoch=4 step=708 epoch=4 metrics={'time_sample_batch': 0.00015285459615416446, 'time_algorithm_update': 0.008654356002807617, 'loss': 0.0032814522405381257, 'time_step': 0.008873181154498946, 'init_value': -0.5274114012718201, 'ave_value': -0.4002851524913275, 'soft_opc': nan} step=708




2022-04-21 22:28.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_708.pt


Epoch 5/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:28.49 [info     ] FQE_20220421222841: epoch=5 step=885 epoch=5 metrics={'time_sample_batch': 0.00015212317644539526, 'time_algorithm_update': 0.008364476726553535, 'loss': 0.002981391274317636, 'time_step': 0.008584279798518467, 'init_value': -0.6187224984169006, 'ave_value': -0.463963471532047, 'soft_opc': nan} step=885




2022-04-21 22:28.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_885.pt


Epoch 6/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:28.51 [info     ] FQE_20220421222841: epoch=6 step=1062 epoch=6 metrics={'time_sample_batch': 0.0001516072763561529, 'time_algorithm_update': 0.007982734906471382, 'loss': 0.002637604938157029, 'time_step': 0.008192015233012917, 'init_value': -0.698056697845459, 'ave_value': -0.5113766407375937, 'soft_opc': nan} step=1062




2022-04-21 22:28.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_1062.pt


Epoch 7/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:28.53 [info     ] FQE_20220421222841: epoch=7 step=1239 epoch=7 metrics={'time_sample_batch': 0.0001512260760291148, 'time_algorithm_update': 0.008439916675373658, 'loss': 0.0024547237451688127, 'time_step': 0.00866203388925326, 'init_value': -0.7855815887451172, 'ave_value': -0.5661783685913315, 'soft_opc': nan} step=1239




2022-04-21 22:28.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_1239.pt


Epoch 8/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:28.54 [info     ] FQE_20220421222841: epoch=8 step=1416 epoch=8 metrics={'time_sample_batch': 0.00014887825917389433, 'time_algorithm_update': 0.008534178221966587, 'loss': 0.002269826530831429, 'time_step': 0.008754454090096855, 'init_value': -0.8868403434753418, 'ave_value': -0.6304583749524107, 'soft_opc': nan} step=1416




2022-04-21 22:28.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_1416.pt


Epoch 9/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:28.56 [info     ] FQE_20220421222841: epoch=9 step=1593 epoch=9 metrics={'time_sample_batch': 0.00015409383396644376, 'time_algorithm_update': 0.008497852390095338, 'loss': 0.0023117923535574962, 'time_step': 0.008720757597583836, 'init_value': -0.9227470755577087, 'ave_value': -0.6430437096156874, 'soft_opc': nan} step=1593




2022-04-21 22:28.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_1593.pt


Epoch 10/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:28.58 [info     ] FQE_20220421222841: epoch=10 step=1770 epoch=10 metrics={'time_sample_batch': 0.00015292059903764454, 'time_algorithm_update': 0.008321896784723141, 'loss': 0.0023607951819128487, 'time_step': 0.008544602636563576, 'init_value': -1.031190037727356, 'ave_value': -0.7288546292899966, 'soft_opc': nan} step=1770




2022-04-21 22:28.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_1770.pt


Epoch 11/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:28.59 [info     ] FQE_20220421222841: epoch=11 step=1947 epoch=11 metrics={'time_sample_batch': 0.00015402783108296367, 'time_algorithm_update': 0.008178445578968458, 'loss': 0.0024176474067377253, 'time_step': 0.008398896556789591, 'init_value': -1.1349059343338013, 'ave_value': -0.7962373951891879, 'soft_opc': nan} step=1947




2022-04-21 22:28.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_1947.pt


Epoch 12/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.01 [info     ] FQE_20220421222841: epoch=12 step=2124 epoch=12 metrics={'time_sample_batch': 0.00015358870985817774, 'time_algorithm_update': 0.008590349369803392, 'loss': 0.0025793101311671826, 'time_step': 0.008811710918017026, 'init_value': -1.274009108543396, 'ave_value': -0.9097437840592754, 'soft_opc': nan} step=2124




2022-04-21 22:29.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_2124.pt


Epoch 13/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.02 [info     ] FQE_20220421222841: epoch=13 step=2301 epoch=13 metrics={'time_sample_batch': 0.00015555936737922626, 'time_algorithm_update': 0.008525075212036822, 'loss': 0.0026307477843342613, 'time_step': 0.008749907973122462, 'init_value': -1.314297080039978, 'ave_value': -0.9155564027237104, 'soft_opc': nan} step=2301




2022-04-21 22:29.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_2301.pt


Epoch 14/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.04 [info     ] FQE_20220421222841: epoch=14 step=2478 epoch=14 metrics={'time_sample_batch': 0.00015226730519095382, 'time_algorithm_update': 0.008482763322733216, 'loss': 0.003032561256431864, 'time_step': 0.008706429583878166, 'init_value': -1.4108200073242188, 'ave_value': -0.9728161250685786, 'soft_opc': nan} step=2478




2022-04-21 22:29.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_2478.pt


Epoch 15/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.06 [info     ] FQE_20220421222841: epoch=15 step=2655 epoch=15 metrics={'time_sample_batch': 0.00015306742177844721, 'time_algorithm_update': 0.007977694441369698, 'loss': 0.0033445461021110122, 'time_step': 0.008200384129238668, 'init_value': -1.5058525800704956, 'ave_value': -1.0411275336871275, 'soft_opc': nan} step=2655




2022-04-21 22:29.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_2655.pt


Epoch 16/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.07 [info     ] FQE_20220421222841: epoch=16 step=2832 epoch=16 metrics={'time_sample_batch': 0.00015490742053015757, 'time_algorithm_update': 0.008547284508829063, 'loss': 0.0036390129470866236, 'time_step': 0.00877075141432595, 'init_value': -1.6382077932357788, 'ave_value': -1.1341111732674791, 'soft_opc': nan} step=2832




2022-04-21 22:29.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_2832.pt


Epoch 17/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.09 [info     ] FQE_20220421222841: epoch=17 step=3009 epoch=17 metrics={'time_sample_batch': 0.00015394431723039702, 'time_algorithm_update': 0.008497582990570931, 'loss': 0.0038738268114150223, 'time_step': 0.008718978213725116, 'init_value': -1.7492116689682007, 'ave_value': -1.2191012116255344, 'soft_opc': nan} step=3009




2022-04-21 22:29.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_3009.pt


Epoch 18/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.11 [info     ] FQE_20220421222841: epoch=18 step=3186 epoch=18 metrics={'time_sample_batch': 0.00015354829992951646, 'time_algorithm_update': 0.00851929793923588, 'loss': 0.004263655855096771, 'time_step': 0.008742087304928882, 'init_value': -1.7706947326660156, 'ave_value': -1.2160976872340337, 'soft_opc': nan} step=3186




2022-04-21 22:29.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_3186.pt


Epoch 19/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.12 [info     ] FQE_20220421222841: epoch=19 step=3363 epoch=19 metrics={'time_sample_batch': 0.00015165172727768031, 'time_algorithm_update': 0.007759052481354967, 'loss': 0.004649563844770846, 'time_step': 0.007977817018153303, 'init_value': -1.858790397644043, 'ave_value': -1.290699937429514, 'soft_opc': nan} step=3363




2022-04-21 22:29.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_3363.pt


Epoch 20/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.14 [info     ] FQE_20220421222841: epoch=20 step=3540 epoch=20 metrics={'time_sample_batch': 0.0001506684190135891, 'time_algorithm_update': 0.008513570505346955, 'loss': 0.004976120070057085, 'time_step': 0.008731591499457925, 'init_value': -1.873426079750061, 'ave_value': -1.2845144897788852, 'soft_opc': nan} step=3540




2022-04-21 22:29.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_3540.pt


Epoch 21/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.15 [info     ] FQE_20220421222841: epoch=21 step=3717 epoch=21 metrics={'time_sample_batch': 0.00015403860706394003, 'time_algorithm_update': 0.008462423658640372, 'loss': 0.0056943565618252055, 'time_step': 0.008686884648382328, 'init_value': -2.0746614933013916, 'ave_value': -1.4511061008986053, 'soft_opc': nan} step=3717




2022-04-21 22:29.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_3717.pt


Epoch 22/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.17 [info     ] FQE_20220421222841: epoch=22 step=3894 epoch=22 metrics={'time_sample_batch': 0.0001546878599177646, 'time_algorithm_update': 0.008520877967446537, 'loss': 0.00572563393178296, 'time_step': 0.008746506804126804, 'init_value': -2.013624429702759, 'ave_value': -1.3797829744246628, 'soft_opc': nan} step=3894




2022-04-21 22:29.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_3894.pt


Epoch 23/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.19 [info     ] FQE_20220421222841: epoch=23 step=4071 epoch=23 metrics={'time_sample_batch': 0.00015518490204029838, 'time_algorithm_update': 0.008084570620693055, 'loss': 0.0059786444009639495, 'time_step': 0.00830836349961448, 'init_value': -2.109924793243408, 'ave_value': -1.456095906519317, 'soft_opc': nan} step=4071




2022-04-21 22:29.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_4071.pt


Epoch 24/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.20 [info     ] FQE_20220421222841: epoch=24 step=4248 epoch=24 metrics={'time_sample_batch': 0.0001561156773971299, 'time_algorithm_update': 0.008505240672052243, 'loss': 0.00640396444826771, 'time_step': 0.008733255041521148, 'init_value': -2.1895968914031982, 'ave_value': -1.5293243933547367, 'soft_opc': nan} step=4248




2022-04-21 22:29.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_4248.pt


Epoch 25/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.22 [info     ] FQE_20220421222841: epoch=25 step=4425 epoch=25 metrics={'time_sample_batch': 0.00015131632486979166, 'time_algorithm_update': 0.008466266642856058, 'loss': 0.006811516438209647, 'time_step': 0.008691098056944077, 'init_value': -2.20294451713562, 'ave_value': -1.5355826755319988, 'soft_opc': nan} step=4425




2022-04-21 22:29.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_4425.pt


Epoch 26/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.24 [info     ] FQE_20220421222841: epoch=26 step=4602 epoch=26 metrics={'time_sample_batch': 0.00015484007064905543, 'time_algorithm_update': 0.008454458861701233, 'loss': 0.007144580456088849, 'time_step': 0.008678022751026908, 'init_value': -2.3161160945892334, 'ave_value': -1.635041735140053, 'soft_opc': nan} step=4602




2022-04-21 22:29.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_4602.pt


Epoch 27/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.25 [info     ] FQE_20220421222841: epoch=27 step=4779 epoch=27 metrics={'time_sample_batch': 0.000155645575227037, 'time_algorithm_update': 0.008274513449372545, 'loss': 0.008006768736206942, 'time_step': 0.00849773520130222, 'init_value': -2.333327054977417, 'ave_value': -1.6691861723412622, 'soft_opc': nan} step=4779




2022-04-21 22:29.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_4779.pt


Epoch 28/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.27 [info     ] FQE_20220421222841: epoch=28 step=4956 epoch=28 metrics={'time_sample_batch': 0.0001541894707976088, 'time_algorithm_update': 0.008227756467916198, 'loss': 0.008007599521325744, 'time_step': 0.008451297458282298, 'init_value': -2.368086099624634, 'ave_value': -1.6866691338899615, 'soft_opc': nan} step=4956




2022-04-21 22:29.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_4956.pt


Epoch 29/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.28 [info     ] FQE_20220421222841: epoch=29 step=5133 epoch=29 metrics={'time_sample_batch': 0.0001551256341449285, 'time_algorithm_update': 0.00852368241649563, 'loss': 0.008200006599611693, 'time_step': 0.00874676812166548, 'init_value': -2.36794376373291, 'ave_value': -1.677655703198534, 'soft_opc': nan} step=5133




2022-04-21 22:29.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_5133.pt


Epoch 30/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.30 [info     ] FQE_20220421222841: epoch=30 step=5310 epoch=30 metrics={'time_sample_batch': 0.00015618572127347612, 'time_algorithm_update': 0.008398796878965562, 'loss': 0.008775989726444939, 'time_step': 0.008622529142993992, 'init_value': -2.4014732837677, 'ave_value': -1.7112874852219322, 'soft_opc': nan} step=5310




2022-04-21 22:29.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_5310.pt


Epoch 31/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.32 [info     ] FQE_20220421222841: epoch=31 step=5487 epoch=31 metrics={'time_sample_batch': 0.0001525905846202441, 'time_algorithm_update': 0.008539645685314458, 'loss': 0.00969597197264054, 'time_step': 0.008761007233528094, 'init_value': -2.524184465408325, 'ave_value': -1.801206497916275, 'soft_opc': nan} step=5487




2022-04-21 22:29.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_5487.pt


Epoch 32/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.33 [info     ] FQE_20220421222841: epoch=32 step=5664 epoch=32 metrics={'time_sample_batch': 0.00018140555775098208, 'time_algorithm_update': 0.00789263827652581, 'loss': 0.00993254013283252, 'time_step': 0.008145107387822901, 'init_value': -2.5694570541381836, 'ave_value': -1.8543848027265422, 'soft_opc': nan} step=5664




2022-04-21 22:29.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_5664.pt


Epoch 33/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.35 [info     ] FQE_20220421222841: epoch=33 step=5841 epoch=33 metrics={'time_sample_batch': 0.00015542466761702198, 'time_algorithm_update': 0.008602011675215036, 'loss': 0.010109989787945209, 'time_step': 0.00882800824224612, 'init_value': -2.595667600631714, 'ave_value': -1.8574905030317492, 'soft_opc': nan} step=5841




2022-04-21 22:29.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_5841.pt


Epoch 34/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.36 [info     ] FQE_20220421222841: epoch=34 step=6018 epoch=34 metrics={'time_sample_batch': 0.0001548360296561893, 'time_algorithm_update': 0.008447282058370989, 'loss': 0.010400286383695353, 'time_step': 0.008666967941542803, 'init_value': -2.5916037559509277, 'ave_value': -1.8556837179773562, 'soft_opc': nan} step=6018




2022-04-21 22:29.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_6018.pt


Epoch 35/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.38 [info     ] FQE_20220421222841: epoch=35 step=6195 epoch=35 metrics={'time_sample_batch': 0.0001556873321533203, 'time_algorithm_update': 0.008661562440085546, 'loss': 0.010550860137649743, 'time_step': 0.00888862717623091, 'init_value': -2.563565492630005, 'ave_value': -1.8375772273917501, 'soft_opc': nan} step=6195




2022-04-21 22:29.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_6195.pt


Epoch 36/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.40 [info     ] FQE_20220421222841: epoch=36 step=6372 epoch=36 metrics={'time_sample_batch': 0.00015207333753337968, 'time_algorithm_update': 0.007780904823777366, 'loss': 0.011011187118375516, 'time_step': 0.00800072810070663, 'init_value': -2.6531131267547607, 'ave_value': -1.9253662539192953, 'soft_opc': nan} step=6372




2022-04-21 22:29.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_6372.pt


Epoch 37/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.41 [info     ] FQE_20220421222841: epoch=37 step=6549 epoch=37 metrics={'time_sample_batch': 0.00015857798905022401, 'time_algorithm_update': 0.00898583730061849, 'loss': 0.011463736595341273, 'time_step': 0.009215732078767766, 'init_value': -2.7069931030273438, 'ave_value': -1.9878068814160408, 'soft_opc': nan} step=6549




2022-04-21 22:29.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_6549.pt


Epoch 38/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.43 [info     ] FQE_20220421222841: epoch=38 step=6726 epoch=38 metrics={'time_sample_batch': 0.00015284920816367628, 'time_algorithm_update': 0.008932842373174463, 'loss': 0.01155869000526224, 'time_step': 0.009154358826114633, 'init_value': -2.673597574234009, 'ave_value': -1.9763978274136378, 'soft_opc': nan} step=6726




2022-04-21 22:29.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_6726.pt


Epoch 39/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.45 [info     ] FQE_20220421222841: epoch=39 step=6903 epoch=39 metrics={'time_sample_batch': 0.00015136212278894112, 'time_algorithm_update': 0.008605800779525843, 'loss': 0.01207087176018605, 'time_step': 0.008828227802858515, 'init_value': -2.7845938205718994, 'ave_value': -2.052863664968728, 'soft_opc': nan} step=6903




2022-04-21 22:29.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_6903.pt


Epoch 40/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.46 [info     ] FQE_20220421222841: epoch=40 step=7080 epoch=40 metrics={'time_sample_batch': 0.00015369646966794116, 'time_algorithm_update': 0.008530300215812726, 'loss': 0.012205256874959831, 'time_step': 0.008750017079929848, 'init_value': -2.7502973079681396, 'ave_value': -2.028543900761071, 'soft_opc': nan} step=7080




2022-04-21 22:29.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_7080.pt


Epoch 41/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.48 [info     ] FQE_20220421222841: epoch=41 step=7257 epoch=41 metrics={'time_sample_batch': 0.00015510273518535376, 'time_algorithm_update': 0.008918896906793454, 'loss': 0.01224006202083744, 'time_step': 0.009144337163806636, 'init_value': -2.7875473499298096, 'ave_value': -2.0394788655656595, 'soft_opc': nan} step=7257




2022-04-21 22:29.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_7257.pt


Epoch 42/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.50 [info     ] FQE_20220421222841: epoch=42 step=7434 epoch=42 metrics={'time_sample_batch': 0.00015762431473381776, 'time_algorithm_update': 0.008696413309560657, 'loss': 0.01287713812205067, 'time_step': 0.008924596053732317, 'init_value': -2.8769874572753906, 'ave_value': -2.1226457095539963, 'soft_opc': nan} step=7434




2022-04-21 22:29.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_7434.pt


Epoch 43/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.52 [info     ] FQE_20220421222841: epoch=43 step=7611 epoch=43 metrics={'time_sample_batch': 0.00015283439118983383, 'time_algorithm_update': 0.008959677259800798, 'loss': 0.013875334471329983, 'time_step': 0.009180792307449599, 'init_value': -2.83551025390625, 'ave_value': -2.1175683778059016, 'soft_opc': nan} step=7611




2022-04-21 22:29.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_7611.pt


Epoch 44/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.53 [info     ] FQE_20220421222841: epoch=44 step=7788 epoch=44 metrics={'time_sample_batch': 0.00015356715789622505, 'time_algorithm_update': 0.008613601242755092, 'loss': 0.013817429527742946, 'time_step': 0.008833825924975724, 'init_value': -2.9049994945526123, 'ave_value': -2.166454890339045, 'soft_opc': nan} step=7788




2022-04-21 22:29.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_7788.pt


Epoch 45/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.55 [info     ] FQE_20220421222841: epoch=45 step=7965 epoch=45 metrics={'time_sample_batch': 0.00015333412730761167, 'time_algorithm_update': 0.008741582180820617, 'loss': 0.01441752428345456, 'time_step': 0.00896352967299984, 'init_value': -2.917790174484253, 'ave_value': -2.179940547080369, 'soft_opc': nan} step=7965




2022-04-21 22:29.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_7965.pt


Epoch 46/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.57 [info     ] FQE_20220421222841: epoch=46 step=8142 epoch=46 metrics={'time_sample_batch': 0.00015558091934117895, 'time_algorithm_update': 0.008701647742319915, 'loss': 0.014458014492319086, 'time_step': 0.00892198018435031, 'init_value': -2.992285966873169, 'ave_value': -2.217904852881088, 'soft_opc': nan} step=8142




2022-04-21 22:29.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_8142.pt


Epoch 47/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:29.58 [info     ] FQE_20220421222841: epoch=47 step=8319 epoch=47 metrics={'time_sample_batch': 0.00015772803355071504, 'time_algorithm_update': 0.008960414067500056, 'loss': 0.01443503136841296, 'time_step': 0.009183505160660393, 'init_value': -2.948432207107544, 'ave_value': -2.1956652381085418, 'soft_opc': nan} step=8319




2022-04-21 22:29.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_8319.pt


Epoch 48/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:30.00 [info     ] FQE_20220421222841: epoch=48 step=8496 epoch=48 metrics={'time_sample_batch': 0.00015673799029851363, 'time_algorithm_update': 0.008910801451084978, 'loss': 0.014658658138784123, 'time_step': 0.009134489264191881, 'init_value': -2.955305337905884, 'ave_value': -2.209452143043011, 'soft_opc': nan} step=8496




2022-04-21 22:30.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_8496.pt


Epoch 49/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:30.02 [info     ] FQE_20220421222841: epoch=49 step=8673 epoch=49 metrics={'time_sample_batch': 0.00015519971901414086, 'time_algorithm_update': 0.008239665273892678, 'loss': 0.015371333316723568, 'time_step': 0.008460929838277526, 'init_value': -3.002398729324341, 'ave_value': -2.2583950722808237, 'soft_opc': nan} step=8673




2022-04-21 22:30.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_8673.pt


Epoch 50/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 22:30.03 [info     ] FQE_20220421222841: epoch=50 step=8850 epoch=50 metrics={'time_sample_batch': 0.00015316036461436816, 'time_algorithm_update': 0.009047692778420314, 'loss': 0.015577546787832332, 'time_step': 0.009270581821937346, 'init_value': -3.070967674255371, 'ave_value': -2.3358386044298207, 'soft_opc': nan} step=8850




2022-04-21 22:30.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421222841/model_8850.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

start
[ 0.00000000e+00  7.95731469e+08  4.27108923e-02  1.24000047e-02
  1.42999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.34732792e-01  6.00000000e-01  3.37421461e-01]
Read chunk # 39 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.38489108e-01  4.94000047e-02
 -1.56000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -7.49080829e-02  7.04145269e-02]
Read chunk # 40 out of 4999
torch.Size([44400, 6])
2022-04-21 22:30.04 [debug    ] RoundIterator is selected.
2022-04-21 22:30.04 [info     ] Directory is created at d3rlpy_logs/FQE_20220421223004
2022-04-21 22:30.04 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-21 22:30.04 [debug    ] Building models...
2022-04-21 22:30.04 [debug    ] Models have been built.
2022-04-21 22:30.04 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220421223004/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-21 22:30.07 [info     ] FQE_20220421223004: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00015780329704284668, 'time_algorithm_update': 0.008681971666424773, 'loss': 0.024271887277664488, 'time_step': 0.008906171765438346, 'init_value': -1.112412691116333, 'ave_value': -1.1152689113407521, 'soft_opc': nan} step=344




2022-04-21 22:30.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:30.10 [info     ] FQE_20220421223004: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00015540039816568064, 'time_algorithm_update': 0.008784259474554728, 'loss': 0.022586857012018215, 'time_step': 0.009006412916405256, 'init_value': -1.9335057735443115, 'ave_value': -1.9336096902151365, 'soft_opc': nan} step=688




2022-04-21 22:30.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:30.14 [info     ] FQE_20220421223004: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00015702289204264796, 'time_algorithm_update': 0.00875117820362712, 'loss': 0.026361961074736574, 'time_step': 0.008976234946140024, 'init_value': -2.997713088989258, 'ave_value': -3.0112833986679712, 'soft_opc': nan} step=1032




2022-04-21 22:30.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:30.17 [info     ] FQE_20220421223004: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00015608169311700866, 'time_algorithm_update': 0.008654558381368948, 'loss': 0.029309738489669248, 'time_step': 0.00887765371522238, 'init_value': -3.6529316902160645, 'ave_value': -3.690513539018932, 'soft_opc': nan} step=1376




2022-04-21 22:30.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:30.20 [info     ] FQE_20220421223004: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.0001587264759596004, 'time_algorithm_update': 0.00882078118102495, 'loss': 0.03610434190121068, 'time_step': 0.009048128543898117, 'init_value': -4.581850051879883, 'ave_value': -4.6952653139561145, 'soft_opc': nan} step=1720




2022-04-21 22:30.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:30.24 [info     ] FQE_20220421223004: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00015855320664339288, 'time_algorithm_update': 0.00860211876935737, 'loss': 0.044827040812802005, 'time_step': 0.00882763432901959, 'init_value': -5.188417911529541, 'ave_value': -5.476115383650805, 'soft_opc': nan} step=2064




2022-04-21 22:30.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:30.27 [info     ] FQE_20220421223004: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00015975638877513797, 'time_algorithm_update': 0.008923894444177316, 'loss': 0.05598160596889292, 'time_step': 0.009151927953542666, 'init_value': -5.8976850509643555, 'ave_value': -6.423329763369518, 'soft_opc': nan} step=2408




2022-04-21 22:30.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:30.30 [info     ] FQE_20220421223004: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00016109818635984908, 'time_algorithm_update': 0.008541868869648424, 'loss': 0.06834463813602058, 'time_step': 0.008769328511038493, 'init_value': -6.037139415740967, 'ave_value': -6.967583459588859, 'soft_opc': nan} step=2752




2022-04-21 22:30.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:30.34 [info     ] FQE_20220421223004: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00018503776816434638, 'time_algorithm_update': 0.00919715124507283, 'loss': 0.07998773415551283, 'time_step': 0.009459135144255882, 'init_value': -6.109793663024902, 'ave_value': -7.562910067551845, 'soft_opc': nan} step=3096




2022-04-21 22:30.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:30.37 [info     ] FQE_20220421223004: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00015930034393487976, 'time_algorithm_update': 0.008486412292303042, 'loss': 0.10033149578132082, 'time_step': 0.008715017590411874, 'init_value': -6.266245365142822, 'ave_value': -8.467469052285761, 'soft_opc': nan} step=3440




2022-04-21 22:30.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:30.40 [info     ] FQE_20220421223004: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016292929649353027, 'time_algorithm_update': 0.008919138547986052, 'loss': 0.11279482654902305, 'time_step': 0.00915196607279223, 'init_value': -6.139810562133789, 'ave_value': -9.062191850947098, 'soft_opc': nan} step=3784




2022-04-21 22:30.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:30.44 [info     ] FQE_20220421223004: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00016419693481090457, 'time_algorithm_update': 0.008638523345769838, 'loss': 0.12955948781915183, 'time_step': 0.008874365063600762, 'init_value': -6.178928375244141, 'ave_value': -9.862093728753058, 'soft_opc': nan} step=4128




2022-04-21 22:30.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:30.47 [info     ] FQE_20220421223004: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016716191934984783, 'time_algorithm_update': 0.008888974439266116, 'loss': 0.1418693005594663, 'time_step': 0.009130250575930574, 'init_value': -6.249285697937012, 'ave_value': -10.573156477176576, 'soft_opc': nan} step=4472




2022-04-21 22:30.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:30.50 [info     ] FQE_20220421223004: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00016734211943870368, 'time_algorithm_update': 0.00893373544826064, 'loss': 0.1495777657472117, 'time_step': 0.009174988713375357, 'init_value': -6.39000129699707, 'ave_value': -11.30114628278172, 'soft_opc': nan} step=4816




2022-04-21 22:30.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:30.54 [info     ] FQE_20220421223004: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016499328058819439, 'time_algorithm_update': 0.008733262849408526, 'loss': 0.15860590933237312, 'time_step': 0.008967556925707085, 'init_value': -6.411166667938232, 'ave_value': -11.854835475254635, 'soft_opc': nan} step=5160




2022-04-21 22:30.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:30.57 [info     ] FQE_20220421223004: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00016977066217466842, 'time_algorithm_update': 0.009007165598314862, 'loss': 0.16300529296791486, 'time_step': 0.00925005036731099, 'init_value': -6.846072196960449, 'ave_value': -12.649999899895828, 'soft_opc': nan} step=5504




2022-04-21 22:30.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:31.01 [info     ] FQE_20220421223004: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016930283502090808, 'time_algorithm_update': 0.009544895831928697, 'loss': 0.174420005490267, 'time_step': 0.0097910858864008, 'init_value': -7.1252121925354, 'ave_value': -13.13846566596295, 'soft_opc': nan} step=5848




2022-04-21 22:31.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:31.05 [info     ] FQE_20220421223004: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00017060235489246457, 'time_algorithm_update': 0.00999695894330047, 'loss': 0.1762916704011691, 'time_step': 0.010240073120871256, 'init_value': -7.180159568786621, 'ave_value': -13.379281305796928, 'soft_opc': nan} step=6192




2022-04-21 22:31.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:31.08 [info     ] FQE_20220421223004: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00016913303109102471, 'time_algorithm_update': 0.00963086998739908, 'loss': 0.184872358571738, 'time_step': 0.009871345619822657, 'init_value': -7.4792022705078125, 'ave_value': -13.895063744419934, 'soft_opc': nan} step=6536




2022-04-21 22:31.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:31.12 [info     ] FQE_20220421223004: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00016801786977191304, 'time_algorithm_update': 0.009766031836354456, 'loss': 0.19073624787635582, 'time_step': 0.010004033876019855, 'init_value': -7.7585930824279785, 'ave_value': -14.250213776528835, 'soft_opc': nan} step=6880




2022-04-21 22:31.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:31.16 [info     ] FQE_20220421223004: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.0001677870750427246, 'time_algorithm_update': 0.00966992974281311, 'loss': 0.20139319926695248, 'time_step': 0.009909852992656619, 'init_value': -8.430398941040039, 'ave_value': -14.877721477204279, 'soft_opc': nan} step=7224




2022-04-21 22:31.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:31.19 [info     ] FQE_20220421223004: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00017132662063421204, 'time_algorithm_update': 0.00991172707358072, 'loss': 0.20981288414318547, 'time_step': 0.010159579127333885, 'init_value': -8.818090438842773, 'ave_value': -15.280100728213988, 'soft_opc': nan} step=7568




2022-04-21 22:31.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:31.23 [info     ] FQE_20220421223004: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00016902906950130018, 'time_algorithm_update': 0.00978161706480869, 'loss': 0.21423939143870632, 'time_step': 0.010022089924923209, 'init_value': -9.372893333435059, 'ave_value': -15.789539995526493, 'soft_opc': nan} step=7912




2022-04-21 22:31.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:31.27 [info     ] FQE_20220421223004: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.000166231116583181, 'time_algorithm_update': 0.009873069996057554, 'loss': 0.21940269115987385, 'time_step': 0.0101126654203548, 'init_value': -9.788755416870117, 'ave_value': -16.028157938365734, 'soft_opc': nan} step=8256




2022-04-21 22:31.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:31.30 [info     ] FQE_20220421223004: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00016898748486541038, 'time_algorithm_update': 0.009456603333007458, 'loss': 0.22723028308517018, 'time_step': 0.009700108406155608, 'init_value': -10.167884826660156, 'ave_value': -16.415452133208884, 'soft_opc': nan} step=8600




2022-04-21 22:31.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:31.34 [info     ] FQE_20220421223004: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00016995086226352426, 'time_algorithm_update': 0.009991136401198631, 'loss': 0.23974902258622785, 'time_step': 0.010236837836199029, 'init_value': -10.998790740966797, 'ave_value': -17.277802794795853, 'soft_opc': nan} step=8944




2022-04-21 22:31.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:31.38 [info     ] FQE_20220421223004: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00017089483349822288, 'time_algorithm_update': 0.009671419858932495, 'loss': 0.23967264888288323, 'time_step': 0.009917196839354759, 'init_value': -11.322099685668945, 'ave_value': -17.472073199707378, 'soft_opc': nan} step=9288




2022-04-21 22:31.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:31.42 [info     ] FQE_20220421223004: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00016901590103326843, 'time_algorithm_update': 0.010149656340133312, 'loss': 0.2457119905833848, 'time_step': 0.010389653749244158, 'init_value': -11.638683319091797, 'ave_value': -17.81156509952779, 'soft_opc': nan} step=9632




2022-04-21 22:31.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:31.45 [info     ] FQE_20220421223004: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00016976650371107945, 'time_algorithm_update': 0.009463177863941637, 'loss': 0.2479080969825127, 'time_step': 0.009708173053209172, 'init_value': -12.16221809387207, 'ave_value': -18.145510180093986, 'soft_opc': nan} step=9976




2022-04-21 22:31.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:31.49 [info     ] FQE_20220421223004: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00017615321070648903, 'time_algorithm_update': 0.00971375024595926, 'loss': 0.2572932841018016, 'time_step': 0.009966593842173731, 'init_value': -12.93824577331543, 'ave_value': -18.83160154037849, 'soft_opc': nan} step=10320




2022-04-21 22:31.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:31.53 [info     ] FQE_20220421223004: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00017140771067419716, 'time_algorithm_update': 0.010022510622822962, 'loss': 0.2663187299107829, 'time_step': 0.010269550390021746, 'init_value': -13.064586639404297, 'ave_value': -18.913610756975757, 'soft_opc': nan} step=10664




2022-04-21 22:31.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:31.56 [info     ] FQE_20220421223004: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00017116513363150664, 'time_algorithm_update': 0.009618093108021936, 'loss': 0.26855873591049984, 'time_step': 0.009862398685410965, 'init_value': -13.698628425598145, 'ave_value': -19.66518887676265, 'soft_opc': nan} step=11008




2022-04-21 22:31.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:32.00 [info     ] FQE_20220421223004: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.0001709599827611169, 'time_algorithm_update': 0.00976805700812229, 'loss': 0.2736269889161164, 'time_step': 0.010015805793362994, 'init_value': -13.8270845413208, 'ave_value': -19.71381829395588, 'soft_opc': nan} step=11352




2022-04-21 22:32.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:32.04 [info     ] FQE_20220421223004: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00016787925431894702, 'time_algorithm_update': 0.009529330702715142, 'loss': 0.2875505667423483, 'time_step': 0.009771981904673021, 'init_value': -14.358949661254883, 'ave_value': -20.19977017720943, 'soft_opc': nan} step=11696




2022-04-21 22:32.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:32.07 [info     ] FQE_20220421223004: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00016803866208985795, 'time_algorithm_update': 0.010034534127213234, 'loss': 0.2965688625280204, 'time_step': 0.01027487738187923, 'init_value': -15.04007339477539, 'ave_value': -20.881534448718266, 'soft_opc': nan} step=12040




2022-04-21 22:32.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:32.11 [info     ] FQE_20220421223004: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.000166447356689808, 'time_algorithm_update': 0.009590757447619771, 'loss': 0.3050301889212109, 'time_step': 0.009830542775087579, 'init_value': -15.307212829589844, 'ave_value': -21.055323142887236, 'soft_opc': nan} step=12384




2022-04-21 22:32.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:32.15 [info     ] FQE_20220421223004: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016670726066411926, 'time_algorithm_update': 0.009547009024509164, 'loss': 0.3113864902569371, 'time_step': 0.009785212056581364, 'init_value': -15.600531578063965, 'ave_value': -21.31778632164975, 'soft_opc': nan} step=12728




2022-04-21 22:32.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:32.18 [info     ] FQE_20220421223004: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00017741599748300952, 'time_algorithm_update': 0.009647875331168952, 'loss': 0.31904634279686267, 'time_step': 0.009897446216538895, 'init_value': -15.86377239227295, 'ave_value': -21.58507287448591, 'soft_opc': nan} step=13072




2022-04-21 22:32.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:32.22 [info     ] FQE_20220421223004: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.0001713855322017226, 'time_algorithm_update': 0.009848054065260776, 'loss': 0.3258432769703917, 'time_step': 0.010091459335282792, 'init_value': -15.97543716430664, 'ave_value': -21.719601491308428, 'soft_opc': nan} step=13416




2022-04-21 22:32.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:32.26 [info     ] FQE_20220421223004: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00016743637794672058, 'time_algorithm_update': 0.009664512650911198, 'loss': 0.33314477107054447, 'time_step': 0.009902416966682258, 'init_value': -16.438148498535156, 'ave_value': -22.031833097268198, 'soft_opc': nan} step=13760




2022-04-21 22:32.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:32.29 [info     ] FQE_20220421223004: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00016913857570914335, 'time_algorithm_update': 0.009722062321596368, 'loss': 0.3459033819029194, 'time_step': 0.009965204222257747, 'init_value': -16.70476531982422, 'ave_value': -22.367548194923646, 'soft_opc': nan} step=14104




2022-04-21 22:32.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:32.33 [info     ] FQE_20220421223004: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00016908174337342728, 'time_algorithm_update': 0.009729473396789196, 'loss': 0.35176254688155684, 'time_step': 0.009972098954888277, 'init_value': -16.687602996826172, 'ave_value': -22.387906551310742, 'soft_opc': nan} step=14448




2022-04-21 22:32.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:32.37 [info     ] FQE_20220421223004: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00017070077186407044, 'time_algorithm_update': 0.009956948978956355, 'loss': 0.3596899641114612, 'time_step': 0.010200045136518256, 'init_value': -16.80565643310547, 'ave_value': -22.444950896273326, 'soft_opc': nan} step=14792




2022-04-21 22:32.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:32.41 [info     ] FQE_20220421223004: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00016930630040723225, 'time_algorithm_update': 0.009797984084417654, 'loss': 0.3733498523347513, 'time_step': 0.010041137767392535, 'init_value': -17.231664657592773, 'ave_value': -22.72283269264945, 'soft_opc': nan} step=15136




2022-04-21 22:32.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:32.44 [info     ] FQE_20220421223004: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016745647718740064, 'time_algorithm_update': 0.009689556997875834, 'loss': 0.3852601827252223, 'time_step': 0.009929711735525797, 'init_value': -17.25402069091797, 'ave_value': -22.823500128911792, 'soft_opc': nan} step=15480




2022-04-21 22:32.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:32.48 [info     ] FQE_20220421223004: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00016867837240529615, 'time_algorithm_update': 0.009832752998485121, 'loss': 0.39502128465426, 'time_step': 0.010075928859932477, 'init_value': -17.096332550048828, 'ave_value': -22.703970968784546, 'soft_opc': nan} step=15824




2022-04-21 22:32.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:32.52 [info     ] FQE_20220421223004: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00019171626068824945, 'time_algorithm_update': 0.009734987519508185, 'loss': 0.39746499598719354, 'time_step': 0.010001809791077015, 'init_value': -17.25139045715332, 'ave_value': -22.847087135101745, 'soft_opc': nan} step=16168




2022-04-21 22:32.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:32.55 [info     ] FQE_20220421223004: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00016751816106397053, 'time_algorithm_update': 0.009757206883541373, 'loss': 0.40074667981879913, 'time_step': 0.009997818359108858, 'init_value': -17.468894958496094, 'ave_value': -22.97253340123859, 'soft_opc': nan} step=16512




2022-04-21 22:32.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:32.59 [info     ] FQE_20220421223004: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016810173212095748, 'time_algorithm_update': 0.008804947137832642, 'loss': 0.41824633370121117, 'time_step': 0.009047933096109434, 'init_value': -17.52145004272461, 'ave_value': -23.122102448333322, 'soft_opc': nan} step=16856




2022-04-21 22:32.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 22:33.02 [info     ] FQE_20220421223004: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00016772608424341955, 'time_algorithm_update': 0.009160793797914372, 'loss': 0.4264449915783696, 'time_step': 0.009401484977367312, 'init_value': -17.463958740234375, 'ave_value': -23.160734990483302, 'soft_opc': nan} step=17200




2022-04-21 22:33.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421223004/model_17200.pt
search iteration:  2
using hyper params:  [0.004446512015644759, 0.0060274053941248495, 4.474788048687251e-05, 5]
2022-04-21 22:33.02 [debug    ] RoundIterator is selected.
2022-04-21 22:33.02 [info     ] Directory is created at d3rlpy_logs/CQL_20220421223302
2022-04-21 22:33.02 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-21 22:33.02 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-21 22:33.02 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220421223302/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.004446512015644759, 'actor_optim_factory': {'optim

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:33.23 [info     ] CQL_20220421223302: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.00038480207410161894, 'time_algorithm_update': 0.05784746401571814, 'temp_loss': 4.910524677679029, 'temp': 0.9919290010295162, 'alpha_loss': -17.74471259254941, 'alpha': 1.0177124645668647, 'critic_loss': 101.87650590135873, 'actor_loss': 3.843314676308692, 'time_step': 0.05832477180944013, 'td_error': 1.3019353789548835, 'init_value': -7.3464555740356445, 'ave_value': -6.662746551789735} step=346
2022-04-21 22:33.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:33.44 [info     ] CQL_20220421223302: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.0003918843462288035, 'time_algorithm_update': 0.05762368130546085, 'temp_loss': 4.928349426026978, 'temp': 0.9764976687514024, 'alpha_loss': -18.387215851359283, 'alpha': 1.054138267315881, 'critic_loss': 171.2191151304741, 'actor_loss': 8.27101109207021, 'time_step': 0.05810870945109108, 'td_error': 1.382914462875904, 'init_value': -10.450048446655273, 'ave_value': -9.663441003674633} step=692
2022-04-21 22:33.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:34.05 [info     ] CQL_20220421223302: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.0003878243396736983, 'time_algorithm_update': 0.05787259168018495, 'temp_loss': 4.853110070862522, 'temp': 0.9615916933627487, 'alpha_loss': -19.039695701158116, 'alpha': 1.0923852786163375, 'critic_loss': 350.69775699328824, 'actor_loss': 10.824498446690553, 'time_step': 0.058353249048222006, 'td_error': 1.4020300313011549, 'init_value': -11.580254554748535, 'ave_value': -10.853720984203434} step=1038
2022-04-21 22:34.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:34.26 [info     ] CQL_20220421223302: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.0003877774828431234, 'time_algorithm_update': 0.05727920573570825, 'temp_loss': 4.779763170749466, 'temp': 0.9470421763169283, 'alpha_loss': -19.718441952170664, 'alpha': 1.1325110338326823, 'critic_loss': 628.0417573961909, 'actor_loss': 9.792140999281337, 'time_step': 0.0577587785059317, 'td_error': 1.3214897595061552, 'init_value': -8.997133255004883, 'ave_value': -8.642833741053748} step=1384
2022-04-21 22:34.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:34.47 [info     ] CQL_20220421223302: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.00038654060033015434, 'time_algorithm_update': 0.057716182890654985, 'temp_loss': 4.708355667963193, 'temp': 0.9327952398385616, 'alpha_loss': -20.433447562201174, 'alpha': 1.1745490287080667, 'critic_loss': 1010.6055917023234, 'actor_loss': 6.022363690282568, 'time_step': 0.058195733610605226, 'td_error': 1.2951806120612845, 'init_value': -6.395380973815918, 'ave_value': -6.272939426947567} step=1730
2022-04-21 22:34.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:35.08 [info     ] CQL_20220421223302: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.0003908335128960582, 'time_algorithm_update': 0.0564258153727978, 'temp_loss': 4.639019694631499, 'temp': 0.9188278568962406, 'alpha_loss': -21.190491907858434, 'alpha': 1.2185576833741514, 'critic_loss': 1433.5858687031475, 'actor_loss': 4.5973887581356685, 'time_step': 0.05690769239657187, 'td_error': 1.303476668969845, 'init_value': -6.2384209632873535, 'ave_value': -6.168216402104479} step=2076
2022-04-21 22:35.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:35.28 [info     ] CQL_20220421223302: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.00037907382656384065, 'time_algorithm_update': 0.056463499978787635, 'temp_loss': 4.570401707136562, 'temp': 0.9051201589879273, 'alpha_loss': -21.98903280048701, 'alpha': 1.2645796465046832, 'critic_loss': 1837.7372422411263, 'actor_loss': 4.696685864057155, 'time_step': 0.05693196630202277, 'td_error': 1.313400670067844, 'init_value': -6.548104763031006, 'ave_value': -6.492817006601774} step=2422
2022-04-21 22:35.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:35.49 [info     ] CQL_20220421223302: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.00039083833639332326, 'time_algorithm_update': 0.0567345488278163, 'temp_loss': 4.5018979800229815, 'temp': 0.8916598336200494, 'alpha_loss': -22.827663697259275, 'alpha': 1.3126622662378873, 'critic_loss': 2239.5490320459267, 'actor_loss': 5.105735113165971, 'time_step': 0.05721922899257241, 'td_error': 1.327494195637768, 'init_value': -7.206424236297607, 'ave_value': -7.160279179433331} step=2768
2022-04-21 22:35.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:36.10 [info     ] CQL_20220421223302: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.0003942368347520773, 'time_algorithm_update': 0.05661581637542372, 'temp_loss': 4.435919487407442, 'temp': 0.8784294500516329, 'alpha_loss': -23.700668770453834, 'alpha': 1.3628382100535266, 'critic_loss': 2660.322536578757, 'actor_loss': 5.611975639541714, 'time_step': 0.05710088035274792, 'td_error': 1.3379253746207238, 'init_value': -7.548037052154541, 'ave_value': -7.515619059590267} step=3114
2022-04-21 22:36.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:36.30 [info     ] CQL_20220421223302: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.00037318640361631535, 'time_algorithm_update': 0.055353944012195386, 'temp_loss': 4.371132701807628, 'temp': 0.8654221677022174, 'alpha_loss': -24.609714221403088, 'alpha': 1.4151573070900978, 'critic_loss': 3109.323902637283, 'actor_loss': 6.218704455160681, 'time_step': 0.055818679704831516, 'td_error': 1.3543347510834631, 'init_value': -8.315051078796387, 'ave_value': -8.28294109237765} step=3460
2022-04-21 22:36.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:36.50 [info     ] CQL_20220421223302: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.0003998748139838952, 'time_algorithm_update': 0.05617367394397713, 'temp_loss': 4.306212168897508, 'temp': 0.8526266360213991, 'alpha_loss': -25.55824569194992, 'alpha': 1.4696589329339176, 'critic_loss': 3568.938324151012, 'actor_loss': 6.888268203404597, 'time_step': 0.05666790945681533, 'td_error': 1.3686177070840424, 'init_value': -8.828071594238281, 'ave_value': -8.806014363038816} step=3806
2022-04-21 22:36.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:37.11 [info     ] CQL_20220421223302: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.0003890226342085469, 'time_algorithm_update': 0.05673844828081958, 'temp_loss': 4.2429958134028265, 'temp': 0.8400364512997556, 'alpha_loss': -26.54507103407314, 'alpha': 1.526411588481396, 'critic_loss': 4055.870556076138, 'actor_loss': 7.567699345550096, 'time_step': 0.05722273291879996, 'td_error': 1.3888014070490036, 'init_value': -9.705001831054688, 'ave_value': -9.682297552677623} step=4152
2022-04-21 22:37.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:37.31 [info     ] CQL_20220421223302: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.00038360377956677034, 'time_algorithm_update': 0.05631383650564734, 'temp_loss': 4.180293528330808, 'temp': 0.8276447388478098, 'alpha_loss': -27.569208216804988, 'alpha': 1.5854648441248547, 'critic_loss': 4561.09522613349, 'actor_loss': 8.356505447729475, 'time_step': 0.0567897571993701, 'td_error': 1.4075029303206255, 'init_value': -10.387436866760254, 'ave_value': -10.36826453283535} step=4498
2022-04-21 22:37.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:37.52 [info     ] CQL_20220421223302: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.0003847938052491646, 'time_algorithm_update': 0.05648790342959365, 'temp_loss': 4.117653466373509, 'temp': 0.8154447561054561, 'alpha_loss': -28.63952159330335, 'alpha': 1.646895717334196, 'critic_loss': 5080.042944759302, 'actor_loss': 9.124138399355674, 'time_step': 0.05696656318069193, 'td_error': 1.4324606719894313, 'init_value': -11.3189058303833, 'ave_value': -11.297759704535581} step=4844
2022-04-21 22:37.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:38.13 [info     ] CQL_20220421223302: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.0003775923238324292, 'time_algorithm_update': 0.05833485016243995, 'temp_loss': 4.057609015117491, 'temp': 0.8034346584295262, 'alpha_loss': -29.751625485502917, 'alpha': 1.7107837320752226, 'critic_loss': 5590.2625527795335, 'actor_loss': 9.943263963467812, 'time_step': 0.058803275141412814, 'td_error': 1.4489171485989223, 'init_value': -11.77214527130127, 'ave_value': -11.764053772500818} step=5190
2022-04-21 22:38.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:38.35 [info     ] CQL_20220421223302: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.0003840475413151559, 'time_algorithm_update': 0.061167941617138816, 'temp_loss': 3.9991097277988588, 'temp': 0.7916034685738514, 'alpha_loss': -30.904068820049307, 'alpha': 1.7772094810629167, 'critic_loss': 6160.257662910946, 'actor_loss': 10.833022553107641, 'time_step': 0.06164522184801929, 'td_error': 1.4771340947069123, 'init_value': -12.722740173339844, 'ave_value': -12.714739060978463} step=5536
2022-04-21 22:38.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:38.58 [info     ] CQL_20220421223302: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.0003905048260109962, 'time_algorithm_update': 0.060647009425080584, 'temp_loss': 3.939498533403253, 'temp': 0.7799510948919837, 'alpha_loss': -32.106184562506705, 'alpha': 1.846258092133296, 'critic_loss': 6751.547535449783, 'actor_loss': 11.738859217980005, 'time_step': 0.0611260647029546, 'td_error': 1.5150326219242283, 'init_value': -13.966586112976074, 'ave_value': -13.950726747580646} step=5882
2022-04-21 22:38.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:39.20 [info     ] CQL_20220421223302: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.000384673217817538, 'time_algorithm_update': 0.0610873582046156, 'temp_loss': 3.881970096185717, 'temp': 0.7684740692893893, 'alpha_loss': -33.34950148301318, 'alpha': 1.9180184812904093, 'critic_loss': 7364.010595420881, 'actor_loss': 12.64728103207715, 'time_step': 0.061567125981942764, 'td_error': 1.5363679257383078, 'init_value': -14.492962837219238, 'ave_value': -14.489330800555226} step=6228
2022-04-21 22:39.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:39.42 [info     ] CQL_20220421223302: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.00038572611836339696, 'time_algorithm_update': 0.0599695995363886, 'temp_loss': 3.82428298520215, 'temp': 0.7571675663738582, 'alpha_loss': -34.65213303758919, 'alpha': 1.9926016378953966, 'critic_loss': 7964.581986091041, 'actor_loss': 13.655908468830793, 'time_step': 0.060445170182024124, 'td_error': 1.571066984164645, 'init_value': -15.463935852050781, 'ave_value': -15.459986295198139} step=6574
2022-04-21 22:39.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:40.04 [info     ] CQL_20220421223302: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.00039139579486295665, 'time_algorithm_update': 0.060372171374414696, 'temp_loss': 3.768102996611182, 'temp': 0.7460308772635598, 'alpha_loss': -35.99991182095743, 'alpha': 2.0701079086072185, 'critic_loss': 8565.76591712202, 'actor_loss': 14.637184239536351, 'time_step': 0.06085414969163134, 'td_error': 1.612114425525976, 'init_value': -16.586711883544922, 'ave_value': -16.57874169600637} step=6920
2022-04-21 22:40.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:40.26 [info     ] CQL_20220421223302: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.00038795595224193065, 'time_algorithm_update': 0.060635012698311334, 'temp_loss': 3.713044961752919, 'temp': 0.7350586165237978, 'alpha_loss': -37.39975268854571, 'alpha': 2.1506478359244463, 'critic_loss': 9145.137715069544, 'actor_loss': 15.575086690097875, 'time_step': 0.06111361732372659, 'td_error': 1.6500438599798115, 'init_value': -17.55177879333496, 'ave_value': -17.543725301056124} step=7266
2022-04-21 22:40.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:40.48 [info     ] CQL_20220421223302: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.00039173895223981384, 'time_algorithm_update': 0.06018148405703506, 'temp_loss': 3.657982385916517, 'temp': 0.724248213919601, 'alpha_loss': -38.8551436517969, 'alpha': 2.234325189810957, 'critic_loss': 9629.348675149025, 'actor_loss': 16.492030424878777, 'time_step': 0.06066399847151916, 'td_error': 1.6734988290142432, 'init_value': -18.037208557128906, 'ave_value': -18.04091746989106} step=7612
2022-04-21 22:40.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:41.09 [info     ] CQL_20220421223302: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.00038287680962182193, 'time_algorithm_update': 0.05971998079663756, 'temp_loss': 3.604549656024558, 'temp': 0.7135980306677736, 'alpha_loss': -40.36468143132381, 'alpha': 2.3212675462568426, 'critic_loss': 9954.696088669618, 'actor_loss': 17.369325003872028, 'time_step': 0.06019052260183875, 'td_error': 1.72769200509251, 'init_value': -19.367740631103516, 'ave_value': -19.359425004911174} step=7958
2022-04-21 22:41.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:41.31 [info     ] CQL_20220421223302: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.0003856007074345054, 'time_algorithm_update': 0.05985939640530272, 'temp_loss': 3.5510393422463036, 'temp': 0.7031055793941365, 'alpha_loss': -41.933629835272114, 'alpha': 2.411600988724328, 'critic_loss': 10255.878082098989, 'actor_loss': 18.39377654632392, 'time_step': 0.060328423632362675, 'td_error': 1.763833141506401, 'init_value': -20.132230758666992, 'ave_value': -20.130171547460577} step=8304
2022-04-21 22:41.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:41.53 [info     ] CQL_20220421223302: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.0003854814981449546, 'time_algorithm_update': 0.059774912850705186, 'temp_loss': 3.4987246177100033, 'temp': 0.6927681740997844, 'alpha_loss': -43.576557490177926, 'alpha': 2.505456838993668, 'critic_loss': 10817.390348401374, 'actor_loss': 19.43582027633755, 'time_step': 0.06024451476301072, 'td_error': 1.8278809415535844, 'init_value': -21.54483413696289, 'ave_value': -21.532419371118813} step=8650
2022-04-21 22:41.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:42.15 [info     ] CQL_20220421223302: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.0003858666888551216, 'time_algorithm_update': 0.05956162331421251, 'temp_loss': 3.4477662709407033, 'temp': 0.682581696034856, 'alpha_loss': -45.25850601416792, 'alpha': 2.602978886207404, 'critic_loss': 11453.865220262825, 'actor_loss': 20.439096064925884, 'time_step': 0.060026988817777245, 'td_error': 1.8636674671203644, 'init_value': -22.22710609436035, 'ave_value': -22.222593536847235} step=8996
2022-04-21 22:42.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:42.37 [info     ] CQL_20220421223302: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.00038862090579347116, 'time_algorithm_update': 0.06029065909413244, 'temp_loss': 3.396567716074817, 'temp': 0.6725466082550887, 'alpha_loss': -47.023890842592095, 'alpha': 2.7042839155031766, 'critic_loss': 11979.306603933346, 'actor_loss': 21.396470229749735, 'time_step': 0.06076431756763789, 'td_error': 1.9066952616214177, 'init_value': -23.062040328979492, 'ave_value': -23.06175965968892} step=9342
2022-04-21 22:42.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:42.58 [info     ] CQL_20220421223302: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.00038926380907179994, 'time_algorithm_update': 0.06003591366585968, 'temp_loss': 3.3465577012541665, 'temp': 0.6626593126037906, 'alpha_loss': -48.84053075933732, 'alpha': 2.809526217466145, 'critic_loss': 12196.38365358562, 'actor_loss': 22.260938754660547, 'time_step': 0.06051044588144115, 'td_error': 1.9398644212907452, 'init_value': -23.65050506591797, 'ave_value': -23.655104692765693} step=9688
2022-04-21 22:42.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:43.20 [info     ] CQL_20220421223302: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.00039160044896120283, 'time_algorithm_update': 0.06003113633635416, 'temp_loss': 3.2981162071228027, 'temp': 0.6529155929998166, 'alpha_loss': -50.754717049571134, 'alpha': 2.918856961189667, 'critic_loss': 10645.538918555816, 'actor_loss': 22.657046566119774, 'time_step': 0.060512752891275924, 'td_error': 1.9652241643236648, 'init_value': -24.11126136779785, 'ave_value': -24.11938858918524} step=10034
2022-04-21 22:43.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:43.42 [info     ] CQL_20220421223302: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.00039177685114689644, 'time_algorithm_update': 0.06016765646851821, 'temp_loss': 3.2490715250114484, 'temp': 0.6433168258625648, 'alpha_loss': -52.734590331943046, 'alpha': 3.0324903492293607, 'critic_loss': 8727.251370292179, 'actor_loss': 23.316160968273362, 'time_step': 0.06065420401578694, 'td_error': 2.0079635052699047, 'init_value': -24.909704208374023, 'ave_value': -24.91637215232216} step=10380
2022-04-21 22:43.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:44.04 [info     ] CQL_20220421223302: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.0003884906713673145, 'time_algorithm_update': 0.06050715763444845, 'temp_loss': 3.200960796003397, 'temp': 0.6338581403211362, 'alpha_loss': -54.784489846643, 'alpha': 3.1505459450572904, 'critic_loss': 7837.5633467304915, 'actor_loss': 24.3381799135594, 'time_step': 0.06098517241505529, 'td_error': 2.07447935969559, 'init_value': -26.12396812438965, 'ave_value': -26.123205150389005} step=10726
2022-04-21 22:44.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:44.25 [info     ] CQL_20220421223302: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.0003902857014209549, 'time_algorithm_update': 0.05703908170578797, 'temp_loss': 3.153943460111673, 'temp': 0.624539657307498, 'alpha_loss': -56.9105891740391, 'alpha': 3.273180767979925, 'critic_loss': 7450.122265060513, 'actor_loss': 25.37781080345198, 'time_step': 0.05751851735087488, 'td_error': 2.136237591613196, 'init_value': -27.182601928710938, 'ave_value': -27.17932797925288} step=11072
2022-04-21 22:44.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:44.46 [info     ] CQL_20220421223302: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.000390737042950757, 'time_algorithm_update': 0.056780672486806884, 'temp_loss': 3.1079804277144416, 'temp': 0.6153579216471986, 'alpha_loss': -59.12888363744482, 'alpha': 3.400570050829408, 'critic_loss': 7706.20630729543, 'actor_loss': 26.736645803286162, 'time_step': 0.05726382291385893, 'td_error': 2.2143965642191095, 'init_value': -28.45091438293457, 'ave_value': -28.44833809267915} step=11418
2022-04-21 22:44.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:45.06 [info     ] CQL_20220421223302: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.00038643723967447444, 'time_algorithm_update': 0.05641668655968815, 'temp_loss': 3.0625843140431224, 'temp': 0.6063097531051305, 'alpha_loss': -61.426577022309935, 'alpha': 3.532940127946049, 'critic_loss': 8147.040547100794, 'actor_loss': 27.935956993544032, 'time_step': 0.05689175556160811, 'td_error': 2.281976917298934, 'init_value': -29.481172561645508, 'ave_value': -29.483901304766825} step=11764
2022-04-21 22:45.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:45.26 [info     ] CQL_20220421223302: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.00039378687136435093, 'time_algorithm_update': 0.05383923839282438, 'temp_loss': 3.0168359196944046, 'temp': 0.5973964650506918, 'alpha_loss': -63.82941000172168, 'alpha': 3.6704689291860326, 'critic_loss': 8469.116665349531, 'actor_loss': 29.15092081830681, 'time_step': 0.05432738458490096, 'td_error': 2.368942676653314, 'init_value': -30.837627410888672, 'ave_value': -30.83494259150122} step=12110
2022-04-21 22:45.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:45.45 [info     ] CQL_20220421223302: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.00038587771399172747, 'time_algorithm_update': 0.0529613715375779, 'temp_loss': 2.972778158380806, 'temp': 0.5886141730181743, 'alpha_loss': -66.3088424991321, 'alpha': 3.813350232350344, 'critic_loss': 8506.404391426571, 'actor_loss': 30.241825241573974, 'time_step': 0.05343593131600088, 'td_error': 2.4359592602256863, 'init_value': -31.804908752441406, 'ave_value': -31.806381360701103} step=12456
2022-04-21 22:45.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:46.04 [info     ] CQL_20220421223302: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.00039265679486225107, 'time_algorithm_update': 0.05269638306832727, 'temp_loss': 2.9289454387102514, 'temp': 0.5799601908364048, 'alpha_loss': -68.90242586521744, 'alpha': 3.9618072874973276, 'critic_loss': 7984.987466977511, 'actor_loss': 31.083244031564348, 'time_step': 0.05318244757679846, 'td_error': 2.5047237352447684, 'init_value': -32.81272888183594, 'ave_value': -32.808487593251535} step=12802
2022-04-21 22:46.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:46.23 [info     ] CQL_20220421223302: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.00038170401071537435, 'time_algorithm_update': 0.0524306841668366, 'temp_loss': 2.8858647966660516, 'temp': 0.5714340604454107, 'alpha_loss': -71.5676138552627, 'alpha': 4.116001086427986, 'critic_loss': 8007.866521405347, 'actor_loss': 32.242500939121136, 'time_step': 0.0529026275425288, 'td_error': 2.5657999246283625, 'init_value': -33.61805725097656, 'ave_value': -33.620870574111905} step=13148
2022-04-21 22:46.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:46.42 [info     ] CQL_20220421223302: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.0003884582850285348, 'time_algorithm_update': 0.05241213575263933, 'temp_loss': 2.8437050849716097, 'temp': 0.5630326815423249, 'alpha_loss': -74.37725512554191, 'alpha': 4.276233772321932, 'critic_loss': 7860.469980581647, 'actor_loss': 33.24392417951815, 'time_step': 0.05289708052067398, 'td_error': 2.671616078126386, 'init_value': -35.116729736328125, 'ave_value': -35.102982058464036} step=13494
2022-04-21 22:46.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:47.02 [info     ] CQL_20220421223302: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.0003866873724612197, 'time_algorithm_update': 0.05268244385030228, 'temp_loss': 2.8013993308723317, 'temp': 0.5547558716955902, 'alpha_loss': -77.24690870604763, 'alpha': 4.442693531168678, 'critic_loss': 8265.974061822615, 'actor_loss': 34.332974704014774, 'time_step': 0.05315929685713928, 'td_error': 2.731345388990285, 'init_value': -35.842247009277344, 'ave_value': -35.840975740160516} step=13840
2022-04-21 22:47.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:47.21 [info     ] CQL_20220421223302: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.000391876766447387, 'time_algorithm_update': 0.05258227703888292, 'temp_loss': 2.7604481746695635, 'temp': 0.5466003173348531, 'alpha_loss': -80.26002365729713, 'alpha': 4.6156173551702775, 'critic_loss': 8577.862093004876, 'actor_loss': 35.35844125913058, 'time_step': 0.05306162930637426, 'td_error': 2.80756530791737, 'init_value': -36.80391311645508, 'ave_value': -36.80508775815064} step=14186
2022-04-21 22:47.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:47.40 [info     ] CQL_20220421223302: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.000391342736393041, 'time_algorithm_update': 0.05238407884719055, 'temp_loss': 2.719894105988431, 'temp': 0.5385637414248693, 'alpha_loss': -83.37326162261081, 'alpha': 4.795266665475217, 'critic_loss': 8741.467355717125, 'actor_loss': 36.34935479357063, 'time_step': 0.05286748629773973, 'td_error': 2.878010582585436, 'init_value': -37.675201416015625, 'ave_value': -37.680156423424926} step=14532
2022-04-21 22:47.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:47.59 [info     ] CQL_20220421223302: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.00038487856098682206, 'time_algorithm_update': 0.052943173171467864, 'temp_loss': 2.6803322045100217, 'temp': 0.5306452238835351, 'alpha_loss': -86.62004428929676, 'alpha': 4.981899411692096, 'critic_loss': 8946.264323857478, 'actor_loss': 37.32081337195601, 'time_step': 0.05341907732748572, 'td_error': 2.958172107143109, 'init_value': -38.65587615966797, 'ave_value': -38.6584446206597} step=14878
2022-04-21 22:47.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:48.19 [info     ] CQL_20220421223302: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.0003892541620772698, 'time_algorithm_update': 0.053348254606213875, 'temp_loss': 2.641043383262061, 'temp': 0.5228423564075735, 'alpha_loss': -90.02238155651644, 'alpha': 5.175836891108165, 'critic_loss': 9127.171762102602, 'actor_loss': 38.23377409697957, 'time_step': 0.05382855227916916, 'td_error': 3.0343080049074502, 'init_value': -39.575294494628906, 'ave_value': -39.5759661483222} step=15224
2022-04-21 22:48.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:48.39 [info     ] CQL_20220421223302: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.0003972239577012255, 'time_algorithm_update': 0.05659456266833179, 'temp_loss': 2.601894767987246, 'temp': 0.5151550547580499, 'alpha_loss': -93.50740549605706, 'alpha': 5.377337232490495, 'critic_loss': 7486.962515241149, 'actor_loss': 38.4286149195853, 'time_step': 0.05708450733581719, 'td_error': 3.0410750687942456, 'init_value': -39.621707916259766, 'ave_value': -39.62837359671233} step=15570
2022-04-21 22:48.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:48.59 [info     ] CQL_20220421223302: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.00037121566044801923, 'time_algorithm_update': 0.05452488681484509, 'temp_loss': 2.5635840348425627, 'temp': 0.5075812038314136, 'alpha_loss': -97.14053421902518, 'alpha': 5.58664058260835, 'critic_loss': 6168.68628776418, 'actor_loss': 38.97033908601441, 'time_step': 0.054986738745187745, 'td_error': 3.0917191964190187, 'init_value': -40.239044189453125, 'ave_value': -40.24182438802923} step=15916
2022-04-21 22:48.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:49.20 [info     ] CQL_20220421223302: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.0003932301019657554, 'time_algorithm_update': 0.05611231147898415, 'temp_loss': 2.5260391979548285, 'temp': 0.5001188367605209, 'alpha_loss': -100.92984262367204, 'alpha': 5.804108928393767, 'critic_loss': 5250.038584097272, 'actor_loss': 39.61971166092536, 'time_step': 0.0565970888027566, 'td_error': 3.175882649642169, 'init_value': -41.265350341796875, 'ave_value': -41.252446421609214} step=16262
2022-04-21 22:49.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:49.40 [info     ] CQL_20220421223302: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.0003904180030602251, 'time_algorithm_update': 0.05572990806116534, 'temp_loss': 2.4886617922369454, 'temp': 0.49276653474810495, 'alpha_loss': -104.86070948253477, 'alpha': 6.0300351539788215, 'critic_loss': 5713.1044061032335, 'actor_loss': 40.864707913701935, 'time_step': 0.05620799037073389, 'td_error': 3.277895133119563, 'init_value': -42.397193908691406, 'ave_value': -42.388139975246176} step=16608
2022-04-21 22:49.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:50.01 [info     ] CQL_20220421223302: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.00038539191891003206, 'time_algorithm_update': 0.057094228750019405, 'temp_loss': 2.452491338542431, 'temp': 0.48552203385127074, 'alpha_loss': -108.92796984986762, 'alpha': 6.264749491145845, 'critic_loss': 5930.27147308526, 'actor_loss': 41.8204972482141, 'time_step': 0.05756875131860634, 'td_error': 3.3604320699572865, 'init_value': -43.276424407958984, 'ave_value': -43.27390306869803} step=16954
2022-04-21 22:50.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:50.22 [info     ] CQL_20220421223302: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.0003938702489599327, 'time_algorithm_update': 0.0592550569876081, 'temp_loss': 2.415956298739924, 'temp': 0.4783837237971367, 'alpha_loss': -113.19711358836621, 'alpha': 6.508624192607196, 'critic_loss': 5943.776330495845, 'actor_loss': 42.710620902177226, 'time_step': 0.05974001760427662, 'td_error': 3.4219249410780654, 'init_value': -43.92951583862305, 'ave_value': -43.92855522622425} step=17300
2022-04-21 22:50.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421223302/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.519100

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-21 22:50.24 [info     ] FQE_20220421225023: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00015935122248638108, 'time_algorithm_update': 0.009782384677105639, 'loss': 0.007076957539363529, 'time_step': 0.010011348379663674, 'init_value': -0.1890489161014557, 'ave_value': -0.16020133453714955, 'soft_opc': nan} step=166




2022-04-21 22:50.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:50.26 [info     ] FQE_20220421225023: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00016411959406841233, 'time_algorithm_update': 0.009184254221169346, 'loss': 0.004161613785762744, 'time_step': 0.009416380560541728, 'init_value': -0.2398911416530609, 'ave_value': -0.17957555177025833, 'soft_opc': nan} step=332




2022-04-21 22:50.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:50.28 [info     ] FQE_20220421225023: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00015836307801396013, 'time_algorithm_update': 0.00993898834090635, 'loss': 0.003425225724202457, 'time_step': 0.010164806641728044, 'init_value': -0.26057541370391846, 'ave_value': -0.1941515929241841, 'soft_opc': nan} step=498




2022-04-21 22:50.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:50.30 [info     ] FQE_20220421225023: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00016400038477886155, 'time_algorithm_update': 0.009960568094828042, 'loss': 0.003149738439908588, 'time_step': 0.010195740734238222, 'init_value': -0.29177871346473694, 'ave_value': -0.22021510608594966, 'soft_opc': nan} step=664




2022-04-21 22:50.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:50.31 [info     ] FQE_20220421225023: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00016214474138007107, 'time_algorithm_update': 0.009632367685616735, 'loss': 0.0028563107753800878, 'time_step': 0.009865146085440394, 'init_value': -0.31225472688674927, 'ave_value': -0.24645453487136948, 'soft_opc': nan} step=830




2022-04-21 22:50.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:50.33 [info     ] FQE_20220421225023: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.0001598654023136001, 'time_algorithm_update': 0.009094420685825578, 'loss': 0.002467312770811504, 'time_step': 0.009321416716977775, 'init_value': -0.3398403525352478, 'ave_value': -0.2782937112088139, 'soft_opc': nan} step=996




2022-04-21 22:50.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:50.35 [info     ] FQE_20220421225023: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00016269051885030357, 'time_algorithm_update': 0.009754766900855374, 'loss': 0.0022528152632336302, 'time_step': 0.009986580136310623, 'init_value': -0.3516143560409546, 'ave_value': -0.3029964873869274, 'soft_opc': nan} step=1162




2022-04-21 22:50.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:50.37 [info     ] FQE_20220421225023: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00015967868896852057, 'time_algorithm_update': 0.009881634310067418, 'loss': 0.0019436355400409729, 'time_step': 0.01011770317353398, 'init_value': -0.36970606446266174, 'ave_value': -0.33130057751629, 'soft_opc': nan} step=1328




2022-04-21 22:50.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:50.39 [info     ] FQE_20220421225023: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00015944745167192207, 'time_algorithm_update': 0.009698386651923857, 'loss': 0.0016861732334532518, 'time_step': 0.009927178003701818, 'init_value': -0.37332260608673096, 'ave_value': -0.3454891936937431, 'soft_opc': nan} step=1494




2022-04-21 22:50.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:50.40 [info     ] FQE_20220421225023: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00016277525798383965, 'time_algorithm_update': 0.009847967021436576, 'loss': 0.0015276426011808664, 'time_step': 0.010081156190619412, 'init_value': -0.40501102805137634, 'ave_value': -0.3927040880487242, 'soft_opc': nan} step=1660




2022-04-21 22:50.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:50.42 [info     ] FQE_20220421225023: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00015858713402805557, 'time_algorithm_update': 0.009383037865880024, 'loss': 0.0014391606501780495, 'time_step': 0.009610118636165756, 'init_value': -0.4626556932926178, 'ave_value': -0.4620051738088699, 'soft_opc': nan} step=1826




2022-04-21 22:50.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:50.44 [info     ] FQE_20220421225023: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.0001596111849129918, 'time_algorithm_update': 0.009573147957583508, 'loss': 0.0013549555089894165, 'time_step': 0.009828573249908814, 'init_value': -0.44975197315216064, 'ave_value': -0.45601388569585644, 'soft_opc': nan} step=1992




2022-04-21 22:50.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:50.46 [info     ] FQE_20220421225023: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00016140363302575536, 'time_algorithm_update': 0.00992476509278079, 'loss': 0.0014332808727392904, 'time_step': 0.010156370071043452, 'init_value': -0.5223681926727295, 'ave_value': -0.5322360686529931, 'soft_opc': nan} step=2158




2022-04-21 22:50.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:50.47 [info     ] FQE_20220421225023: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.000160994299923081, 'time_algorithm_update': 0.009521115257079342, 'loss': 0.0015394616421839469, 'time_step': 0.009753247341477728, 'init_value': -0.5928250551223755, 'ave_value': -0.6123707420679296, 'soft_opc': nan} step=2324




2022-04-21 22:50.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:50.49 [info     ] FQE_20220421225023: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00015701586941638626, 'time_algorithm_update': 0.00909780737865402, 'loss': 0.0015522339574134268, 'time_step': 0.009322330176112163, 'init_value': -0.6353824734687805, 'ave_value': -0.652939590600294, 'soft_opc': nan} step=2490




2022-04-21 22:50.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:50.51 [info     ] FQE_20220421225023: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.000158271157597921, 'time_algorithm_update': 0.009826382958745382, 'loss': 0.0016916288600824997, 'time_step': 0.010055147021649832, 'init_value': -0.7026281356811523, 'ave_value': -0.708878197969852, 'soft_opc': nan} step=2656




2022-04-21 22:50.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:50.53 [info     ] FQE_20220421225023: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00015723274414797863, 'time_algorithm_update': 0.009897033852266979, 'loss': 0.0018295738274506461, 'time_step': 0.0101238546601261, 'init_value': -0.7326726317405701, 'ave_value': -0.7361940860123214, 'soft_opc': nan} step=2822




2022-04-21 22:50.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:50.55 [info     ] FQE_20220421225023: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00016264312238578336, 'time_algorithm_update': 0.00987372771803155, 'loss': 0.002025170734044198, 'time_step': 0.010109502148915487, 'init_value': -0.7825809717178345, 'ave_value': -0.7695292174988133, 'soft_opc': nan} step=2988




2022-04-21 22:50.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:50.56 [info     ] FQE_20220421225023: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00016077886144798923, 'time_algorithm_update': 0.00939530493265175, 'loss': 0.0022050319441120105, 'time_step': 0.009625095918954137, 'init_value': -0.8641945719718933, 'ave_value': -0.8437803276051004, 'soft_opc': nan} step=3154




2022-04-21 22:50.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:50.58 [info     ] FQE_20220421225023: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00015558248542877566, 'time_algorithm_update': 0.00962479861385851, 'loss': 0.0024297533234923585, 'time_step': 0.009846384266772902, 'init_value': -0.9175677299499512, 'ave_value': -0.8805376049827549, 'soft_opc': nan} step=3320




2022-04-21 22:50.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.00 [info     ] FQE_20220421225023: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00015774836023169827, 'time_algorithm_update': 0.009954396500644913, 'loss': 0.002497741873993213, 'time_step': 0.010181705635714244, 'init_value': -0.945339024066925, 'ave_value': -0.9016141697256784, 'soft_opc': nan} step=3486




2022-04-21 22:51.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.02 [info     ] FQE_20220421225023: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00015965570886451077, 'time_algorithm_update': 0.009746801422302982, 'loss': 0.0025141498111454904, 'time_step': 0.009979427578937576, 'init_value': -1.0550471544265747, 'ave_value': -0.9846747552413863, 'soft_opc': nan} step=3652




2022-04-21 22:51.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.03 [info     ] FQE_20220421225023: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00016026324536426957, 'time_algorithm_update': 0.009550834276590002, 'loss': 0.0029333923657211556, 'time_step': 0.00977809027016881, 'init_value': -1.1211178302764893, 'ave_value': -1.026934938632405, 'soft_opc': nan} step=3818




2022-04-21 22:51.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.05 [info     ] FQE_20220421225023: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.0001638021813817771, 'time_algorithm_update': 0.009384828877736288, 'loss': 0.003196762647502214, 'time_step': 0.00961872037634792, 'init_value': -1.1963624954223633, 'ave_value': -1.0869099408052527, 'soft_opc': nan} step=3984




2022-04-21 22:51.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.07 [info     ] FQE_20220421225023: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00016376052994325938, 'time_algorithm_update': 0.009845693427396107, 'loss': 0.003177009989452795, 'time_step': 0.010079436991588179, 'init_value': -1.2912005186080933, 'ave_value': -1.1629153678881692, 'soft_opc': nan} step=4150




2022-04-21 22:51.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.09 [info     ] FQE_20220421225023: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00016221368169210045, 'time_algorithm_update': 0.009639503007911774, 'loss': 0.0036663816305906906, 'time_step': 0.009871475667838591, 'init_value': -1.3556935787200928, 'ave_value': -1.2214124356165763, 'soft_opc': nan} step=4316




2022-04-21 22:51.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.11 [info     ] FQE_20220421225023: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00016333539801907827, 'time_algorithm_update': 0.009924136012433523, 'loss': 0.0039017908438178993, 'time_step': 0.010155718010592174, 'init_value': -1.3422249555587769, 'ave_value': -1.1929844996422059, 'soft_opc': nan} step=4482




2022-04-21 22:51.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.12 [info     ] FQE_20220421225023: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001603264406502965, 'time_algorithm_update': 0.009377045803759471, 'loss': 0.004264984553376864, 'time_step': 0.00960751901189965, 'init_value': -1.4416369199752808, 'ave_value': -1.262799729875012, 'soft_opc': nan} step=4648




2022-04-21 22:51.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.14 [info     ] FQE_20220421225023: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00016178136848541628, 'time_algorithm_update': 0.009689414357564536, 'loss': 0.004666408276008667, 'time_step': 0.009923041584980056, 'init_value': -1.5128782987594604, 'ave_value': -1.3195555338728333, 'soft_opc': nan} step=4814




2022-04-21 22:51.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.16 [info     ] FQE_20220421225023: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00016069986734045557, 'time_algorithm_update': 0.009752743215446013, 'loss': 0.004720534761303037, 'time_step': 0.009984702949064323, 'init_value': -1.5597221851348877, 'ave_value': -1.349036982232235, 'soft_opc': nan} step=4980




2022-04-21 22:51.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.18 [info     ] FQE_20220421225023: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00015834009790995033, 'time_algorithm_update': 0.009471338915537638, 'loss': 0.005255676038403356, 'time_step': 0.009698889341699072, 'init_value': -1.6536405086517334, 'ave_value': -1.4099153043114925, 'soft_opc': nan} step=5146




2022-04-21 22:51.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.19 [info     ] FQE_20220421225023: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.000164224440792957, 'time_algorithm_update': 0.009791302393717938, 'loss': 0.005455810921540181, 'time_step': 0.010025506996246705, 'init_value': -1.7400755882263184, 'ave_value': -1.4824507070814368, 'soft_opc': nan} step=5312




2022-04-21 22:51.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.21 [info     ] FQE_20220421225023: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00016015121735722185, 'time_algorithm_update': 0.009117823049246547, 'loss': 0.00582313517680818, 'time_step': 0.009347059640539697, 'init_value': -1.7837802171707153, 'ave_value': -1.496121588886321, 'soft_opc': nan} step=5478




2022-04-21 22:51.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.23 [info     ] FQE_20220421225023: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00016900573868349375, 'time_algorithm_update': 0.009930839021521878, 'loss': 0.006431679234872235, 'time_step': 0.01017564032451216, 'init_value': -1.875169277191162, 'ave_value': -1.5535331392576834, 'soft_opc': nan} step=5644




2022-04-21 22:51.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.25 [info     ] FQE_20220421225023: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00015587548175490047, 'time_algorithm_update': 0.009865772293274662, 'loss': 0.006611427344123732, 'time_step': 0.010092219674443624, 'init_value': -1.9119161367416382, 'ave_value': -1.583889029632314, 'soft_opc': nan} step=5810




2022-04-21 22:51.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.27 [info     ] FQE_20220421225023: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00015930095350885965, 'time_algorithm_update': 0.009879018886979804, 'loss': 0.006928062043643658, 'time_step': 0.010112840009022909, 'init_value': -1.9758985042572021, 'ave_value': -1.6265069075864278, 'soft_opc': nan} step=5976




2022-04-21 22:51.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.28 [info     ] FQE_20220421225023: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00015861155038856598, 'time_algorithm_update': 0.009153952081519437, 'loss': 0.007303659009245078, 'time_step': 0.009384033191634947, 'init_value': -2.0502169132232666, 'ave_value': -1.6807211003576716, 'soft_opc': nan} step=6142




2022-04-21 22:51.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.30 [info     ] FQE_20220421225023: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00016107616654361588, 'time_algorithm_update': 0.009863365127379635, 'loss': 0.00730229193780919, 'time_step': 0.01009535214987146, 'init_value': -2.0780792236328125, 'ave_value': -1.6850366965569723, 'soft_opc': nan} step=6308




2022-04-21 22:51.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.32 [info     ] FQE_20220421225023: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00016300505902393754, 'time_algorithm_update': 0.00946535403469959, 'loss': 0.008106711118254834, 'time_step': 0.009697912687278655, 'init_value': -2.132321834564209, 'ave_value': -1.7252360821768709, 'soft_opc': nan} step=6474




2022-04-21 22:51.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.34 [info     ] FQE_20220421225023: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00015899215836122813, 'time_algorithm_update': 0.00955395238945283, 'loss': 0.008523988322016828, 'time_step': 0.009782262595303088, 'init_value': -2.2336034774780273, 'ave_value': -1.7899031678667745, 'soft_opc': nan} step=6640




2022-04-21 22:51.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.35 [info     ] FQE_20220421225023: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.0001586244766970715, 'time_algorithm_update': 0.009497530489082796, 'loss': 0.008932035978278987, 'time_step': 0.00972586080252406, 'init_value': -2.2853381633758545, 'ave_value': -1.8093189180799265, 'soft_opc': nan} step=6806




2022-04-21 22:51.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.37 [info     ] FQE_20220421225023: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00015873794096061983, 'time_algorithm_update': 0.009737916739590197, 'loss': 0.009449942860435358, 'time_step': 0.009964776326374835, 'init_value': -2.313429355621338, 'ave_value': -1.8027078239201055, 'soft_opc': nan} step=6972




2022-04-21 22:51.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.39 [info     ] FQE_20220421225023: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00016235299857265978, 'time_algorithm_update': 0.009862309478851685, 'loss': 0.00929182229341984, 'time_step': 0.01009676255375506, 'init_value': -2.332486867904663, 'ave_value': -1.8013125554364813, 'soft_opc': nan} step=7138




2022-04-21 22:51.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.41 [info     ] FQE_20220421225023: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016013110976621328, 'time_algorithm_update': 0.009681825178215303, 'loss': 0.009631281024678218, 'time_step': 0.009910149746630565, 'init_value': -2.431309223175049, 'ave_value': -1.897891965364148, 'soft_opc': nan} step=7304




2022-04-21 22:51.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.43 [info     ] FQE_20220421225023: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016360972301069512, 'time_algorithm_update': 0.009761037596737045, 'loss': 0.010160218064844361, 'time_step': 0.009993879191846732, 'init_value': -2.4536051750183105, 'ave_value': -1.8974658996248535, 'soft_opc': nan} step=7470




2022-04-21 22:51.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.44 [info     ] FQE_20220421225023: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00015846361596900295, 'time_algorithm_update': 0.009387638195451483, 'loss': 0.010596697949467743, 'time_step': 0.00961683744407562, 'init_value': -2.519017457962036, 'ave_value': -1.9295090896249456, 'soft_opc': nan} step=7636




2022-04-21 22:51.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.46 [info     ] FQE_20220421225023: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00016134043773972844, 'time_algorithm_update': 0.00957223449844912, 'loss': 0.010956868417813521, 'time_step': 0.009804573403783592, 'init_value': -2.481867790222168, 'ave_value': -1.8856594453234359, 'soft_opc': nan} step=7802




2022-04-21 22:51.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.48 [info     ] FQE_20220421225023: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00016043272363134176, 'time_algorithm_update': 0.0095799385783184, 'loss': 0.010951200253274068, 'time_step': 0.009811892566910708, 'init_value': -2.5489141941070557, 'ave_value': -1.9370152463746333, 'soft_opc': nan} step=7968




2022-04-21 22:51.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.50 [info     ] FQE_20220421225023: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00016029340675078243, 'time_algorithm_update': 0.009897787886929799, 'loss': 0.010974163055933272, 'time_step': 0.010133300919130624, 'init_value': -2.587801933288574, 'ave_value': -1.9660389645809626, 'soft_opc': nan} step=8134




2022-04-21 22:51.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 22:51.51 [info     ] FQE_20220421225023: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.0001604384686573442, 'time_algorithm_update': 0.009106239640569112, 'loss': 0.0115399585501361, 'time_step': 0.00933481124510248, 'init_value': -2.599125862121582, 'ave_value': -1.9661320187839435, 'soft_opc': nan} step=8300




2022-04-21 22:51.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225023/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-21 22:51.52 [info     ] Directory is created at d3rlpy_logs/FQE_20220421225152
2022-04-21 22:51.52 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-21 22:51.52 [debug    ] Building models...
2022-04-21 22:51.52 [debug    ] Models have been built.
2022-04-21 22:51.52 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220421225152/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/355 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-21 22:51.55 [info     ] FQE_20220421225152: epoch=1 step=355 epoch=1 metrics={'time_sample_batch': 0.00016303532560106733, 'time_algorithm_update': 0.009798633548575388, 'loss': 0.024747650897209074, 'time_step': 0.010031813634953028, 'init_value': -1.2425814867019653, 'ave_value': -1.2523666925411887, 'soft_opc': nan} step=355




2022-04-21 22:51.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_355.pt


Epoch 2/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:51.59 [info     ] FQE_20220421225152: epoch=2 step=710 epoch=2 metrics={'time_sample_batch': 0.00016280966745295996, 'time_algorithm_update': 0.00946162653640962, 'loss': 0.023763425210097305, 'time_step': 0.00969445201712595, 'init_value': -2.4072606563568115, 'ave_value': -2.426411293188117, 'soft_opc': nan} step=710




2022-04-21 22:51.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_710.pt


Epoch 3/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:52.03 [info     ] FQE_20220421225152: epoch=3 step=1065 epoch=3 metrics={'time_sample_batch': 0.00016366394472793795, 'time_algorithm_update': 0.009667089623464666, 'loss': 0.024609777798325242, 'time_step': 0.009905212026246836, 'init_value': -2.9408209323883057, 'ave_value': -3.015273667118264, 'soft_opc': nan} step=1065




2022-04-21 22:52.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_1065.pt


Epoch 4/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:52.07 [info     ] FQE_20220421225152: epoch=4 step=1420 epoch=4 metrics={'time_sample_batch': 0.00016460620181661258, 'time_algorithm_update': 0.00957000624965614, 'loss': 0.029584951024554985, 'time_step': 0.009804935186681614, 'init_value': -3.864910125732422, 'ave_value': -4.067493447803316, 'soft_opc': nan} step=1420




2022-04-21 22:52.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_1420.pt


Epoch 5/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:52.10 [info     ] FQE_20220421225152: epoch=5 step=1775 epoch=5 metrics={'time_sample_batch': 0.00016686479810258033, 'time_algorithm_update': 0.009858268415424186, 'loss': 0.03511125984745966, 'time_step': 0.010097825359290755, 'init_value': -4.250349044799805, 'ave_value': -4.619703750260548, 'soft_opc': nan} step=1775




2022-04-21 22:52.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_1775.pt


Epoch 6/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:52.14 [info     ] FQE_20220421225152: epoch=6 step=2130 epoch=6 metrics={'time_sample_batch': 0.00016298294067382813, 'time_algorithm_update': 0.009653964512784717, 'loss': 0.04388664626896801, 'time_step': 0.009886641569540534, 'init_value': -4.828465461730957, 'ave_value': -5.481777541072826, 'soft_opc': nan} step=2130




2022-04-21 22:52.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_2130.pt


Epoch 7/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:52.18 [info     ] FQE_20220421225152: epoch=7 step=2485 epoch=7 metrics={'time_sample_batch': 0.00015470074935698173, 'time_algorithm_update': 0.009420938223180637, 'loss': 0.04942928307335562, 'time_step': 0.009643764227208957, 'init_value': -5.1314239501953125, 'ave_value': -6.084709361007621, 'soft_opc': nan} step=2485




2022-04-21 22:52.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_2485.pt


Epoch 8/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:52.22 [info     ] FQE_20220421225152: epoch=8 step=2840 epoch=8 metrics={'time_sample_batch': 0.00016672913457306337, 'time_algorithm_update': 0.009735016755654777, 'loss': 0.05821484106946999, 'time_step': 0.00997376777756382, 'init_value': -5.296764373779297, 'ave_value': -6.699166149062079, 'soft_opc': nan} step=2840




2022-04-21 22:52.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_2840.pt


Epoch 9/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:52.25 [info     ] FQE_20220421225152: epoch=9 step=3195 epoch=9 metrics={'time_sample_batch': 0.0001694659112204968, 'time_algorithm_update': 0.009730962296606789, 'loss': 0.06221535340423735, 'time_step': 0.009973812103271484, 'init_value': -5.399174213409424, 'ave_value': -7.269907742163389, 'soft_opc': nan} step=3195




2022-04-21 22:52.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_3195.pt


Epoch 10/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:52.29 [info     ] FQE_20220421225152: epoch=10 step=3550 epoch=10 metrics={'time_sample_batch': 0.00017230074170609594, 'time_algorithm_update': 0.0098184713175599, 'loss': 0.0684863206838638, 'time_step': 0.010065082093359719, 'init_value': -5.426340579986572, 'ave_value': -7.914776523800583, 'soft_opc': nan} step=3550




2022-04-21 22:52.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_3550.pt


Epoch 11/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:52.33 [info     ] FQE_20220421225152: epoch=11 step=3905 epoch=11 metrics={'time_sample_batch': 0.00016920130017777563, 'time_algorithm_update': 0.009504600981591452, 'loss': 0.06983846193048315, 'time_step': 0.00974552933598908, 'init_value': -5.712468147277832, 'ave_value': -8.772668190613063, 'soft_opc': nan} step=3905




2022-04-21 22:52.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_3905.pt


Epoch 12/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:52.37 [info     ] FQE_20220421225152: epoch=12 step=4260 epoch=12 metrics={'time_sample_batch': 0.00017017176453496368, 'time_algorithm_update': 0.009733102019404022, 'loss': 0.0753150099652334, 'time_step': 0.009980797431838344, 'init_value': -5.474693298339844, 'ave_value': -9.21142957639165, 'soft_opc': nan} step=4260




2022-04-21 22:52.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_4260.pt


Epoch 13/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:52.40 [info     ] FQE_20220421225152: epoch=13 step=4615 epoch=13 metrics={'time_sample_batch': 0.00016954918982277454, 'time_algorithm_update': 0.009620036541576117, 'loss': 0.07813956329835132, 'time_step': 0.009862888363045707, 'init_value': -5.472324848175049, 'ave_value': -9.987686325139828, 'soft_opc': nan} step=4615




2022-04-21 22:52.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_4615.pt


Epoch 14/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:52.44 [info     ] FQE_20220421225152: epoch=14 step=4970 epoch=14 metrics={'time_sample_batch': 0.0001723880499181613, 'time_algorithm_update': 0.009805892890607806, 'loss': 0.07895535351315015, 'time_step': 0.010053865002914214, 'init_value': -5.384887218475342, 'ave_value': -10.442302620036502, 'soft_opc': nan} step=4970




2022-04-21 22:52.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_4970.pt


Epoch 15/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:52.48 [info     ] FQE_20220421225152: epoch=15 step=5325 epoch=15 metrics={'time_sample_batch': 0.00016757871063662248, 'time_algorithm_update': 0.009409451820480991, 'loss': 0.08190944804482057, 'time_step': 0.009648115534177968, 'init_value': -5.360988616943359, 'ave_value': -11.189050845193414, 'soft_opc': nan} step=5325




2022-04-21 22:52.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_5325.pt


Epoch 16/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:52.52 [info     ] FQE_20220421225152: epoch=16 step=5680 epoch=16 metrics={'time_sample_batch': 0.00017427726530692947, 'time_algorithm_update': 0.00987497114799392, 'loss': 0.08281775236549511, 'time_step': 0.010126950035632496, 'init_value': -5.396724224090576, 'ave_value': -11.841690305716321, 'soft_opc': nan} step=5680




2022-04-21 22:52.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_5680.pt


Epoch 17/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:52.55 [info     ] FQE_20220421225152: epoch=17 step=6035 epoch=17 metrics={'time_sample_batch': 0.0001710213405985228, 'time_algorithm_update': 0.009511504374759299, 'loss': 0.08707593633255488, 'time_step': 0.009753213130252462, 'init_value': -5.425675868988037, 'ave_value': -12.56697808376758, 'soft_opc': nan} step=6035




2022-04-21 22:52.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_6035.pt


Epoch 18/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:52.59 [info     ] FQE_20220421225152: epoch=18 step=6390 epoch=18 metrics={'time_sample_batch': 0.00016936114136601837, 'time_algorithm_update': 0.010032704850317726, 'loss': 0.09090833768353496, 'time_step': 0.010275295418752751, 'init_value': -5.2610039710998535, 'ave_value': -12.973315576317287, 'soft_opc': nan} step=6390




2022-04-21 22:52.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_6390.pt


Epoch 19/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:53.03 [info     ] FQE_20220421225152: epoch=19 step=6745 epoch=19 metrics={'time_sample_batch': 0.0001685854414818992, 'time_algorithm_update': 0.009347637606338716, 'loss': 0.09235842348089521, 'time_step': 0.009595186609617421, 'init_value': -5.2498860359191895, 'ave_value': -13.581937481682242, 'soft_opc': nan} step=6745




2022-04-21 22:53.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_6745.pt


Epoch 20/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:53.07 [info     ] FQE_20220421225152: epoch=20 step=7100 epoch=20 metrics={'time_sample_batch': 0.00016794137551750935, 'time_algorithm_update': 0.009886825588387502, 'loss': 0.09242530569014415, 'time_step': 0.01012952898589658, 'init_value': -5.178543567657471, 'ave_value': -13.887665975961937, 'soft_opc': nan} step=7100




2022-04-21 22:53.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_7100.pt


Epoch 21/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:53.10 [info     ] FQE_20220421225152: epoch=21 step=7455 epoch=21 metrics={'time_sample_batch': 0.00016804413056709398, 'time_algorithm_update': 0.009522188213509574, 'loss': 0.09555761516356552, 'time_step': 0.009762709577318647, 'init_value': -5.303048610687256, 'ave_value': -14.466335638262574, 'soft_opc': nan} step=7455




2022-04-21 22:53.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_7455.pt


Epoch 22/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:53.14 [info     ] FQE_20220421225152: epoch=22 step=7810 epoch=22 metrics={'time_sample_batch': 0.0001672133593492105, 'time_algorithm_update': 0.009803971438340739, 'loss': 0.09938238081587872, 'time_step': 0.01004868829754037, 'init_value': -5.787963390350342, 'ave_value': -15.366989975033311, 'soft_opc': nan} step=7810




2022-04-21 22:53.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_7810.pt


Epoch 23/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:53.18 [info     ] FQE_20220421225152: epoch=23 step=8165 epoch=23 metrics={'time_sample_batch': 0.0001705001777326557, 'time_algorithm_update': 0.009524633515049034, 'loss': 0.10626699402928352, 'time_step': 0.009765385909819266, 'init_value': -5.708247661590576, 'ave_value': -15.538997382190477, 'soft_opc': nan} step=8165




2022-04-21 22:53.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_8165.pt


Epoch 24/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:53.22 [info     ] FQE_20220421225152: epoch=24 step=8520 epoch=24 metrics={'time_sample_batch': 0.0001722302235348124, 'time_algorithm_update': 0.009862417570302184, 'loss': 0.10890131011133043, 'time_step': 0.01010954480775645, 'init_value': -5.778066158294678, 'ave_value': -15.88845432260981, 'soft_opc': nan} step=8520




2022-04-21 22:53.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_8520.pt


Epoch 25/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:53.25 [info     ] FQE_20220421225152: epoch=25 step=8875 epoch=25 metrics={'time_sample_batch': 0.0001750126690931723, 'time_algorithm_update': 0.009692032908050107, 'loss': 0.11091600303918543, 'time_step': 0.00994458601508342, 'init_value': -6.0689520835876465, 'ave_value': -16.303858835367123, 'soft_opc': nan} step=8875




2022-04-21 22:53.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_8875.pt


Epoch 26/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:53.29 [info     ] FQE_20220421225152: epoch=26 step=9230 epoch=26 metrics={'time_sample_batch': 0.00017142027196749836, 'time_algorithm_update': 0.009920752887994471, 'loss': 0.12155254890906139, 'time_step': 0.010161462971861933, 'init_value': -6.524725914001465, 'ave_value': -16.927143595608353, 'soft_opc': nan} step=9230




2022-04-21 22:53.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_9230.pt


Epoch 27/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:53.33 [info     ] FQE_20220421225152: epoch=27 step=9585 epoch=27 metrics={'time_sample_batch': 0.00017072717908402565, 'time_algorithm_update': 0.009300886073582608, 'loss': 0.12635490727466597, 'time_step': 0.009543171734877036, 'init_value': -6.7311692237854, 'ave_value': -17.183935717918086, 'soft_opc': nan} step=9585




2022-04-21 22:53.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_9585.pt


Epoch 28/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:53.37 [info     ] FQE_20220421225152: epoch=28 step=9940 epoch=28 metrics={'time_sample_batch': 0.00017118924100634078, 'time_algorithm_update': 0.009996103904616666, 'loss': 0.13862761742579685, 'time_step': 0.010240630028953014, 'init_value': -7.477802753448486, 'ave_value': -17.833465882060878, 'soft_opc': nan} step=9940




2022-04-21 22:53.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_9940.pt


Epoch 29/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:53.40 [info     ] FQE_20220421225152: epoch=29 step=10295 epoch=29 metrics={'time_sample_batch': 0.00017352641468316735, 'time_algorithm_update': 0.009540344292009381, 'loss': 0.1446395830343097, 'time_step': 0.009786863729987346, 'init_value': -7.470990180969238, 'ave_value': -17.665292131063197, 'soft_opc': nan} step=10295




2022-04-21 22:53.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_10295.pt


Epoch 30/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:53.44 [info     ] FQE_20220421225152: epoch=30 step=10650 epoch=30 metrics={'time_sample_batch': 0.0001706009179773465, 'time_algorithm_update': 0.009914692354873873, 'loss': 0.1524186637960899, 'time_step': 0.010160001566712285, 'init_value': -8.434846878051758, 'ave_value': -18.473852817574937, 'soft_opc': nan} step=10650




2022-04-21 22:53.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_10650.pt


Epoch 31/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:53.48 [info     ] FQE_20220421225152: epoch=31 step=11005 epoch=31 metrics={'time_sample_batch': 0.00017204754789110641, 'time_algorithm_update': 0.009427938326983385, 'loss': 0.16652682176463202, 'time_step': 0.009673621620930417, 'init_value': -8.74768352508545, 'ave_value': -18.755364968091072, 'soft_opc': nan} step=11005




2022-04-21 22:53.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_11005.pt


Epoch 32/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:53.52 [info     ] FQE_20220421225152: epoch=32 step=11360 epoch=32 metrics={'time_sample_batch': 0.00016951359493631713, 'time_algorithm_update': 0.009832142440366074, 'loss': 0.17758758131309715, 'time_step': 0.010073203771886691, 'init_value': -9.31689167022705, 'ave_value': -19.158777998637845, 'soft_opc': nan} step=11360




2022-04-21 22:53.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_11360.pt


Epoch 33/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:53.56 [info     ] FQE_20220421225152: epoch=33 step=11715 epoch=33 metrics={'time_sample_batch': 0.0001707164334579253, 'time_algorithm_update': 0.009820159724060919, 'loss': 0.1899171019812495, 'time_step': 0.010065047841676524, 'init_value': -9.996260643005371, 'ave_value': -19.57285166198395, 'soft_opc': nan} step=11715




2022-04-21 22:53.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_11715.pt


Epoch 34/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:53.59 [info     ] FQE_20220421225152: epoch=34 step=12070 epoch=34 metrics={'time_sample_batch': 0.0001698164872720208, 'time_algorithm_update': 0.009760364344422246, 'loss': 0.20938986522525968, 'time_step': 0.010007778355773066, 'init_value': -10.66712760925293, 'ave_value': -20.034571474143803, 'soft_opc': nan} step=12070




2022-04-21 22:53.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_12070.pt


Epoch 35/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:54.03 [info     ] FQE_20220421225152: epoch=35 step=12425 epoch=35 metrics={'time_sample_batch': 0.00016961366357937666, 'time_algorithm_update': 0.009607064556068098, 'loss': 0.22750272316149842, 'time_step': 0.009852232059962314, 'init_value': -11.179695129394531, 'ave_value': -20.25028964394303, 'soft_opc': nan} step=12425




2022-04-21 22:54.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_12425.pt


Epoch 36/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:54.07 [info     ] FQE_20220421225152: epoch=36 step=12780 epoch=36 metrics={'time_sample_batch': 0.00017158548596879125, 'time_algorithm_update': 0.009651356012048855, 'loss': 0.24627293301615077, 'time_step': 0.00990048327916105, 'init_value': -11.709315299987793, 'ave_value': -20.501339803601315, 'soft_opc': nan} step=12780




2022-04-21 22:54.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_12780.pt


Epoch 37/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:54.11 [info     ] FQE_20220421225152: epoch=37 step=13135 epoch=37 metrics={'time_sample_batch': 0.00017304353311028278, 'time_algorithm_update': 0.00976752630421813, 'loss': 0.2647866132183814, 'time_step': 0.010016057860683387, 'init_value': -12.399025917053223, 'ave_value': -21.111704336074535, 'soft_opc': nan} step=13135




2022-04-21 22:54.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_13135.pt


Epoch 38/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:54.14 [info     ] FQE_20220421225152: epoch=38 step=13490 epoch=38 metrics={'time_sample_batch': 0.00016976746035293793, 'time_algorithm_update': 0.00972124287779902, 'loss': 0.2785057390213642, 'time_step': 0.009964097385675134, 'init_value': -12.544677734375, 'ave_value': -21.10906229860304, 'soft_opc': nan} step=13490




2022-04-21 22:54.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_13490.pt


Epoch 39/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:54.18 [info     ] FQE_20220421225152: epoch=39 step=13845 epoch=39 metrics={'time_sample_batch': 0.00016900116289165657, 'time_algorithm_update': 0.009552417674534757, 'loss': 0.28861306367940465, 'time_step': 0.009797168113815952, 'init_value': -13.038336753845215, 'ave_value': -21.127242412250443, 'soft_opc': nan} step=13845




2022-04-21 22:54.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_13845.pt


Epoch 40/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:54.22 [info     ] FQE_20220421225152: epoch=40 step=14200 epoch=40 metrics={'time_sample_batch': 0.00016924562588543958, 'time_algorithm_update': 0.009686639946950994, 'loss': 0.30244969232401375, 'time_step': 0.009928405116981185, 'init_value': -13.479438781738281, 'ave_value': -21.395625507218362, 'soft_opc': nan} step=14200




2022-04-21 22:54.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_14200.pt


Epoch 41/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:54.26 [info     ] FQE_20220421225152: epoch=41 step=14555 epoch=41 metrics={'time_sample_batch': 0.00017246394090249506, 'time_algorithm_update': 0.009881407778028031, 'loss': 0.3271274215528663, 'time_step': 0.010127310014106859, 'init_value': -13.7080078125, 'ave_value': -21.391926444672826, 'soft_opc': nan} step=14555




2022-04-21 22:54.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_14555.pt


Epoch 42/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:54.29 [info     ] FQE_20220421225152: epoch=42 step=14910 epoch=42 metrics={'time_sample_batch': 0.0001690555626237896, 'time_algorithm_update': 0.009520282879681654, 'loss': 0.33572874254429, 'time_step': 0.00976338117894992, 'init_value': -13.943236351013184, 'ave_value': -21.310174321751333, 'soft_opc': nan} step=14910




2022-04-21 22:54.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_14910.pt


Epoch 43/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:54.33 [info     ] FQE_20220421225152: epoch=43 step=15265 epoch=43 metrics={'time_sample_batch': 0.00016908578469719684, 'time_algorithm_update': 0.009765932593547123, 'loss': 0.34943504060395586, 'time_step': 0.010007644707048444, 'init_value': -14.224042892456055, 'ave_value': -21.400340404226764, 'soft_opc': nan} step=15265




2022-04-21 22:54.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_15265.pt


Epoch 44/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:54.37 [info     ] FQE_20220421225152: epoch=44 step=15620 epoch=44 metrics={'time_sample_batch': 0.0001704189139352718, 'time_algorithm_update': 0.009535290489734059, 'loss': 0.3613360123157921, 'time_step': 0.009778112760731872, 'init_value': -14.64843463897705, 'ave_value': -21.42282841853603, 'soft_opc': nan} step=15620




2022-04-21 22:54.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_15620.pt


Epoch 45/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:54.41 [info     ] FQE_20220421225152: epoch=45 step=15975 epoch=45 metrics={'time_sample_batch': 0.00016876341591418628, 'time_algorithm_update': 0.009993705615191393, 'loss': 0.3744723862168235, 'time_step': 0.010236222307446977, 'init_value': -15.074325561523438, 'ave_value': -21.609662118390144, 'soft_opc': nan} step=15975




2022-04-21 22:54.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_15975.pt


Epoch 46/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:54.44 [info     ] FQE_20220421225152: epoch=46 step=16330 epoch=46 metrics={'time_sample_batch': 0.00017024967032419123, 'time_algorithm_update': 0.009574953267272089, 'loss': 0.3990252800810505, 'time_step': 0.009819048894962795, 'init_value': -15.741042137145996, 'ave_value': -21.864417263059522, 'soft_opc': nan} step=16330




2022-04-21 22:54.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_16330.pt


Epoch 47/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:54.48 [info     ] FQE_20220421225152: epoch=47 step=16685 epoch=47 metrics={'time_sample_batch': 0.0001686868533282213, 'time_algorithm_update': 0.009454373238791882, 'loss': 0.4092440148133417, 'time_step': 0.009696002745292556, 'init_value': -16.308557510375977, 'ave_value': -22.133423513009728, 'soft_opc': nan} step=16685




2022-04-21 22:54.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_16685.pt


Epoch 48/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:54.52 [info     ] FQE_20220421225152: epoch=48 step=17040 epoch=48 metrics={'time_sample_batch': 0.00016836985735826088, 'time_algorithm_update': 0.009609412475371025, 'loss': 0.4396881761711458, 'time_step': 0.009855296578205807, 'init_value': -16.65264320373535, 'ave_value': -22.07294349098432, 'soft_opc': nan} step=17040




2022-04-21 22:54.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_17040.pt


Epoch 49/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:54.56 [info     ] FQE_20220421225152: epoch=49 step=17395 epoch=49 metrics={'time_sample_batch': 0.00016813748319384077, 'time_algorithm_update': 0.009771962232992683, 'loss': 0.458821960339244, 'time_step': 0.010016317098913059, 'init_value': -17.201631546020508, 'ave_value': -22.280497378930498, 'soft_opc': nan} step=17395




2022-04-21 22:54.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_17395.pt


Epoch 50/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 22:54.59 [info     ] FQE_20220421225152: epoch=50 step=17750 epoch=50 metrics={'time_sample_batch': 0.00017372520876602387, 'time_algorithm_update': 0.009684688272610516, 'loss': 0.4643620928460863, 'time_step': 0.009933246693141023, 'init_value': -17.06423568725586, 'ave_value': -22.086755383839087, 'soft_opc': nan} step=17750




2022-04-21 22:54.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421225152/model_17750.pt
search iteration:  3
using hyper params:  [0.005543794082722098, 0.004614710600356128, 9.534666699061333e-05, 3]
2022-04-21 22:55.00 [debug    ] RoundIterator is selected.
2022-04-21 22:55.00 [info     ] Directory is created at d3rlpy_logs/CQL_20220421225500
2022-04-21 22:55.00 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-21 22:55.00 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-21 22:55.00 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220421225500/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.005543794082722098, 'actor_optim_factory': {'optim_

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:55.21 [info     ] CQL_20220421225500: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.0003570476708384608, 'time_algorithm_update': 0.06008718261829001, 'temp_loss': 4.900047783906749, 'temp': 0.983180202156133, 'alpha_loss': -17.696587424746827, 'alpha': 1.0177276864906266, 'critic_loss': 60.25337053861232, 'actor_loss': 1.0857223222042955, 'time_step': 0.060534199538258456, 'td_error': 1.2398290789685504, 'init_value': -4.044351100921631, 'ave_value': -3.7585354845280214} step=346
2022-04-21 22:55.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:55.43 [info     ] CQL_20220421225500: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.00036303569815751444, 'time_algorithm_update': 0.059368668264047256, 'temp_loss': 4.799379231612806, 'temp': 0.9513198135560648, 'alpha_loss': -18.361723800615078, 'alpha': 1.054198782568033, 'critic_loss': 96.65060272657803, 'actor_loss': 3.839851699812564, 'time_step': 0.05982678887471987, 'td_error': 1.2610068696171732, 'init_value': -6.119749069213867, 'ave_value': -5.632199893782296} step=692
2022-04-21 22:55.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:56.04 [info     ] CQL_20220421225500: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.0003489159435206066, 'time_algorithm_update': 0.058526923890747776, 'temp_loss': 4.649070909257569, 'temp': 0.9211578234771772, 'alpha_loss': -19.033251536374838, 'alpha': 1.0924903144037104, 'critic_loss': 190.4305668648957, 'actor_loss': 5.574343613806487, 'time_step': 0.05896819051290523, 'td_error': 1.2851776173420655, 'init_value': -7.036935806274414, 'ave_value': -6.518956031817404} step=1038
2022-04-21 22:56.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:56.26 [info     ] CQL_20220421225500: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.00035544213532023346, 'time_algorithm_update': 0.058858659915152316, 'temp_loss': 4.5043430796937445, 'temp': 0.8923324077804654, 'alpha_loss': -19.721492761821416, 'alpha': 1.1326566810552785, 'critic_loss': 340.62048128161126, 'actor_loss': 5.689276109541082, 'time_step': 0.05930825396080237, 'td_error': 1.2656460131612872, 'init_value': -6.351736068725586, 'ave_value': -5.956634204226006} step=1384
2022-04-21 22:56.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:56.46 [info     ] CQL_20220421225500: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.0003600899194706382, 'time_algorithm_update': 0.05698417308013563, 'temp_loss': 4.366526399733703, 'temp': 0.8647095719513865, 'alpha_loss': -20.43744874413992, 'alpha': 1.1747362220907487, 'critic_loss': 555.0412109904206, 'actor_loss': 4.133972644805908, 'time_step': 0.05743936582796835, 'td_error': 1.264190344588102, 'init_value': -4.837518215179443, 'ave_value': -4.681395636310279} step=1730
2022-04-21 22:56.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:57.07 [info     ] CQL_20220421225500: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.0003534314260317411, 'time_algorithm_update': 0.05644183145093091, 'temp_loss': 4.23213223501437, 'temp': 0.8381726684942411, 'alpha_loss': -21.196418905533807, 'alpha': 1.2187736496070907, 'critic_loss': 810.5643855629629, 'actor_loss': 2.98718308851209, 'time_step': 0.056889990161609096, 'td_error': 1.269090693226604, 'init_value': -4.255093097686768, 'ave_value': -4.169545612407556} step=2076
2022-04-21 22:57.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:57.27 [info     ] CQL_20220421225500: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.0003619428314914593, 'time_algorithm_update': 0.05593658665012073, 'temp_loss': 4.103869982537507, 'temp': 0.8126388865743759, 'alpha_loss': -21.99859992341499, 'alpha': 1.2648234246783174, 'critic_loss': 1062.9026797961637, 'actor_loss': 2.791557094954342, 'time_step': 0.05638703307664463, 'td_error': 1.2758524671631493, 'init_value': -4.325113296508789, 'ave_value': -4.262003333978938} step=2422
2022-04-21 22:57.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:57.48 [info     ] CQL_20220421225500: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.0003611255932405505, 'time_algorithm_update': 0.05632385146411168, 'temp_loss': 3.9800047350756693, 'temp': 0.7880309398119161, 'alpha_loss': -22.832603989308968, 'alpha': 1.312925732893751, 'critic_loss': 1308.2726122905754, 'actor_loss': 2.8751820808201165, 'time_step': 0.05677381209555389, 'td_error': 1.281007139099301, 'init_value': -4.485599994659424, 'ave_value': -4.432399472041738} step=2768
2022-04-21 22:57.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:58.08 [info     ] CQL_20220421225500: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.0003681644538923495, 'time_algorithm_update': 0.056198796784946686, 'temp_loss': 3.8601042493919415, 'temp': 0.7642839097907778, 'alpha_loss': -23.706243399250713, 'alpha': 1.3631194895402545, 'critic_loss': 1559.4111977285042, 'actor_loss': 3.0733160896797402, 'time_step': 0.05665635029015514, 'td_error': 1.2849536971121538, 'init_value': -4.508633613586426, 'ave_value': -4.47121634879006} step=3114
2022-04-21 22:58.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:58.29 [info     ] CQL_20220421225500: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.0003615590189233681, 'time_algorithm_update': 0.055806781515220685, 'temp_loss': 3.745004664955801, 'temp': 0.7413402918101735, 'alpha_loss': -24.61380980607402, 'alpha': 1.415447328132012, 'critic_loss': 1804.5135007648798, 'actor_loss': 3.3452258302986277, 'time_step': 0.05625891823300047, 'td_error': 1.2906131042435696, 'init_value': -4.970277309417725, 'ave_value': -4.930191083698264} step=3460
2022-04-21 22:58.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:58.49 [info     ] CQL_20220421225500: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.0003704487243828746, 'time_algorithm_update': 0.05651803099351122, 'temp_loss': 3.6323312562325096, 'temp': 0.7191599328393881, 'alpha_loss': -25.563293826373325, 'alpha': 1.4699624215247313, 'critic_loss': 2054.1543437979813, 'actor_loss': 3.5890043050567537, 'time_step': 0.05698082626210472, 'td_error': 1.2952041904149367, 'init_value': -5.128799915313721, 'ave_value': -5.0985390596222455} step=3806
2022-04-21 22:58.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:59.10 [info     ] CQL_20220421225500: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.0003568912517128652, 'time_algorithm_update': 0.056340349202900264, 'temp_loss': 3.5237842694872374, 'temp': 0.6976999301097296, 'alpha_loss': -26.548487371102922, 'alpha': 1.5267237045861393, 'critic_loss': 2346.777701493633, 'actor_loss': 3.93446290837547, 'time_step': 0.05678760591958989, 'td_error': 1.302706425028021, 'init_value': -5.495940685272217, 'ave_value': -5.474564470188942} step=4152
2022-04-21 22:59.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:59.30 [info     ] CQL_20220421225500: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.0003540033549931697, 'time_algorithm_update': 0.055719138569914534, 'temp_loss': 3.4192832784156577, 'temp': 0.6769211357728594, 'alpha_loss': -27.57597596934765, 'alpha': 1.5857907047850548, 'critic_loss': 2700.064642228143, 'actor_loss': 4.407277629554616, 'time_step': 0.056163162165294496, 'td_error': 1.3109897929564023, 'init_value': -5.9995436668396, 'ave_value': -5.975961072640489} step=4498
2022-04-21 22:59.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 22:59.50 [info     ] CQL_20220421225500: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.00035454427575789437, 'time_algorithm_update': 0.05616631742157688, 'temp_loss': 3.316883773472957, 'temp': 0.6567949456286568, 'alpha_loss': -28.646745328958325, 'alpha': 1.6472372443689776, 'critic_loss': 3028.5381148674583, 'actor_loss': 4.847639610312577, 'time_step': 0.05661299118416847, 'td_error': 1.3181798346010118, 'init_value': -6.335772514343262, 'ave_value': -6.316776938458764} step=4844
2022-04-21 22:59.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:00.11 [info     ] CQL_20220421225500: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.0003613936418742803, 'time_algorithm_update': 0.056175920315560575, 'temp_loss': 3.2200132725555775, 'temp': 0.637288320271266, 'alpha_loss': -29.756879646654074, 'alpha': 1.711143886767371, 'critic_loss': 3338.40321376558, 'actor_loss': 5.3088413676774575, 'time_step': 0.05663076301530606, 'td_error': 1.3262986015359535, 'init_value': -6.678651332855225, 'ave_value': -6.668679034331219} step=5190
2022-04-21 23:00.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:00.31 [info     ] CQL_20220421225500: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.00035150547248090624, 'time_algorithm_update': 0.05629815462696759, 'temp_loss': 3.1229971619699732, 'temp': 0.6183804094446876, 'alpha_loss': -30.911241834563327, 'alpha': 1.777582075899047, 'critic_loss': 3667.938874525831, 'actor_loss': 5.7929315318951025, 'time_step': 0.05674224092781199, 'td_error': 1.3369866628795501, 'init_value': -7.3279266357421875, 'ave_value': -7.308457840602507} step=5536
2022-04-21 23:00.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:00.52 [info     ] CQL_20220421225500: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.00035938224351475, 'time_algorithm_update': 0.057455412914298176, 'temp_loss': 3.0304835766037077, 'temp': 0.6000504462705182, 'alpha_loss': -32.11196628195702, 'alpha': 1.8466439477970145, 'critic_loss': 4033.25477626558, 'actor_loss': 6.323852326828621, 'time_step': 0.05790486570038547, 'td_error': 1.3461232606264442, 'init_value': -7.6977972984313965, 'ave_value': -7.685756413661908} step=5882
2022-04-21 23:00.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:01.12 [info     ] CQL_20220421225500: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.0003761845517020694, 'time_algorithm_update': 0.05395053852500254, 'temp_loss': 2.9403072409547133, 'temp': 0.5822758991594259, 'alpha_loss': -33.35772243124901, 'alpha': 1.918421836946741, 'critic_loss': 4401.515633467305, 'actor_loss': 6.823632189304154, 'time_step': 0.05442075095424762, 'td_error': 1.3563359220656857, 'init_value': -8.130173683166504, 'ave_value': -8.122828619329779} step=6228
2022-04-21 23:01.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:01.31 [info     ] CQL_20220421225500: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.0003484404845044792, 'time_algorithm_update': 0.05246577097501369, 'temp_loss': 2.8542555391443947, 'temp': 0.5650325352056867, 'alpha_loss': -34.65951701257959, 'alpha': 1.993019346212376, 'critic_loss': 4771.137630396495, 'actor_loss': 7.364893130484344, 'time_step': 0.05290341377258301, 'td_error': 1.3679206762555964, 'init_value': -8.666691780090332, 'ave_value': -8.657412261836276} step=6574
2022-04-21 23:01.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:01.52 [info     ] CQL_20220421225500: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.0003594477052633473, 'time_algorithm_update': 0.05711891885437717, 'temp_loss': 2.769183632266315, 'temp': 0.5483049862302107, 'alpha_loss': -36.00345728438714, 'alpha': 2.0705390562211847, 'critic_loss': 5180.742632033508, 'actor_loss': 7.894214948477773, 'time_step': 0.057573876628985984, 'td_error': 1.3786793622547522, 'init_value': -9.077473640441895, 'ave_value': -9.073125074032982} step=6920
2022-04-21 23:01.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:02.13 [info     ] CQL_20220421225500: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.0003689038271159795, 'time_algorithm_update': 0.05691883329711209, 'temp_loss': 2.687098927580552, 'temp': 0.5320790656384705, 'alpha_loss': -37.406338079816344, 'alpha': 2.151090344941685, 'critic_loss': 5597.134445278631, 'actor_loss': 8.42349870218707, 'time_step': 0.05738049504384829, 'td_error': 1.3928081115113087, 'init_value': -9.6936674118042, 'ave_value': -9.685616655833577} step=7266
2022-04-21 23:02.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:02.33 [info     ] CQL_20220421225500: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.0003579083205647551, 'time_algorithm_update': 0.05686130757965793, 'temp_loss': 2.608150637907789, 'temp': 0.5163341788198218, 'alpha_loss': -38.86083070391175, 'alpha': 2.234784756781738, 'critic_loss': 5978.652544142883, 'actor_loss': 8.962362782803574, 'time_step': 0.05731055708978906, 'td_error': 1.4060227531839447, 'init_value': -10.209053039550781, 'ave_value': -10.201096437598022} step=7612
2022-04-21 23:02.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:02.54 [info     ] CQL_20220421225500: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.00036572996591556966, 'time_algorithm_update': 0.05682959653049535, 'temp_loss': 2.530301481313099, 'temp': 0.5010580336082877, 'alpha_loss': -40.37135744921734, 'alpha': 2.3217488348139503, 'critic_loss': 6359.444804461706, 'actor_loss': 9.51145679551053, 'time_step': 0.05728954731384454, 'td_error': 1.4187616848445166, 'init_value': -10.652668952941895, 'ave_value': -10.649635486774617} step=7958
2022-04-21 23:02.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:03.15 [info     ] CQL_20220421225500: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.0003600658019843129, 'time_algorithm_update': 0.056961878186705485, 'temp_loss': 2.456242577878037, 'temp': 0.4862357717373468, 'alpha_loss': -41.94251137661796, 'alpha': 2.412097890253012, 'critic_loss': 6823.922697739794, 'actor_loss': 10.028319948670491, 'time_step': 0.05741489140284544, 'td_error': 1.4319393105000384, 'init_value': -11.116189956665039, 'ave_value': -11.115300440076066} step=8304
2022-04-21 23:03.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:03.35 [info     ] CQL_20220421225500: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.0003581991085427345, 'time_algorithm_update': 0.057020013042957104, 'temp_loss': 2.3829035063010418, 'temp': 0.4718523012592613, 'alpha_loss': -43.573764756924845, 'alpha': 2.5059745587365474, 'critic_loss': 7159.368367277818, 'actor_loss': 10.570389053036022, 'time_step': 0.05746803118314357, 'td_error': 1.450060985343261, 'init_value': -11.824206352233887, 'ave_value': -11.813703951014903} step=8650
2022-04-21 23:03.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:03.56 [info     ] CQL_20220421225500: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.0003613364489781374, 'time_algorithm_update': 0.0565756318197085, 'temp_loss': 2.312688056444157, 'temp': 0.4578974568258131, 'alpha_loss': -45.275770672483944, 'alpha': 2.6035098526519156, 'critic_loss': 7517.858827447616, 'actor_loss': 11.087850540359586, 'time_step': 0.05702265287410317, 'td_error': 1.460346163613019, 'init_value': -12.066526412963867, 'ave_value': -12.070647710960246} step=8996
2022-04-21 23:03.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:04.17 [info     ] CQL_20220421225500: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.0003714933560762791, 'time_algorithm_update': 0.05684473679933934, 'temp_loss': 2.244125852006019, 'temp': 0.4443555710288142, 'alpha_loss': -47.03182735332864, 'alpha': 2.704844208121989, 'critic_loss': 7934.0610930161665, 'actor_loss': 11.666816127093542, 'time_step': 0.05730893915099216, 'td_error': 1.4791457275005129, 'init_value': -12.727205276489258, 'ave_value': -12.725604765897907} step=9342
2022-04-21 23:04.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:04.37 [info     ] CQL_20220421225500: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.00036681387465813256, 'time_algorithm_update': 0.05688480903647539, 'temp_loss': 2.1777071594502884, 'temp': 0.43121355832312147, 'alpha_loss': -48.85835712631314, 'alpha': 2.81011524710352, 'critic_loss': 8313.967776259935, 'actor_loss': 12.170145685273098, 'time_step': 0.057339546997423115, 'td_error': 1.495184036685907, 'init_value': -13.25839614868164, 'ave_value': -13.253679586078613} step=9688
2022-04-21 23:04.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:04.58 [info     ] CQL_20220421225500: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.00037075536099472485, 'time_algorithm_update': 0.056810015887883356, 'temp_loss': 2.113458223425584, 'temp': 0.41846106972308517, 'alpha_loss': -50.768378197113215, 'alpha': 2.9194952159947745, 'critic_loss': 8544.187405448429, 'actor_loss': 12.677618718560721, 'time_step': 0.05727161355101304, 'td_error': 1.5113638207416835, 'init_value': -13.758101463317871, 'ave_value': -13.753327933574186} step=10034
2022-04-21 23:04.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:05.19 [info     ] CQL_20220421225500: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.00035545247138580146, 'time_algorithm_update': 0.056513435578759694, 'temp_loss': 2.0507030990082407, 'temp': 0.40608698577550106, 'alpha_loss': -52.736904805795305, 'alpha': 3.033128461396763, 'critic_loss': 8911.973971504696, 'actor_loss': 13.26711159358824, 'time_step': 0.0569613179719517, 'td_error': 1.530748307062647, 'init_value': -14.328622817993164, 'ave_value': -14.32398427761579} step=10380
2022-04-21 23:05.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:05.40 [info     ] CQL_20220421225500: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.00036569826864782785, 'time_algorithm_update': 0.05976650136054596, 'temp_loss': 1.9903903386496395, 'temp': 0.3940779449622755, 'alpha_loss': -54.80122181445877, 'alpha': 3.151195731466216, 'critic_loss': 9432.90173128161, 'actor_loss': 13.830006006825178, 'time_step': 0.0602242712340603, 'td_error': 1.5485544857655866, 'init_value': -14.835808753967285, 'ave_value': -14.83246971390834} step=10726
2022-04-21 23:05.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:06.02 [info     ] CQL_20220421225500: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.00036615443367489503, 'time_algorithm_update': 0.05975373838678261, 'temp_loss': 1.9316925813007906, 'temp': 0.3824236652237831, 'alpha_loss': -56.929954583934276, 'alpha': 3.27385877736042, 'critic_loss': 9744.747044910586, 'actor_loss': 14.361701350680665, 'time_step': 0.06021342801220844, 'td_error': 1.5674773777504842, 'init_value': -15.351555824279785, 'ave_value': -15.35006724430408} step=11072
2022-04-21 23:06.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:06.24 [info     ] CQL_20220421225500: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.0003638267517089844, 'time_algorithm_update': 0.05915114507509794, 'temp_loss': 1.874684016828592, 'temp': 0.3711126935103036, 'alpha_loss': -59.14135738879959, 'alpha': 3.4012945924880187, 'critic_loss': 10184.324992097183, 'actor_loss': 14.933732746653474, 'time_step': 0.05960069639834365, 'td_error': 1.58707763334987, 'init_value': -15.876410484313965, 'ave_value': -15.875951061547227} step=11418
2022-04-21 23:06.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:06.45 [info     ] CQL_20220421225500: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.0003570580069040287, 'time_algorithm_update': 0.05869844056278295, 'temp_loss': 1.8189892606928169, 'temp': 0.36013667569684155, 'alpha_loss': -61.449182311923515, 'alpha': 3.533694901907375, 'critic_loss': 10646.265565728865, 'actor_loss': 15.512192693059845, 'time_step': 0.05914215752155105, 'td_error': 1.6134045984354173, 'init_value': -16.612842559814453, 'ave_value': -16.60601402217599} step=11764
2022-04-21 23:06.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:07.07 [info     ] CQL_20220421225500: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.00036972037629585047, 'time_algorithm_update': 0.05914232703302637, 'temp_loss': 1.7654636109495438, 'temp': 0.3494853083625694, 'alpha_loss': -63.83988518246336, 'alpha': 3.6712507347151035, 'critic_loss': 11067.246522760115, 'actor_loss': 16.074572687204174, 'time_step': 0.05959837491801708, 'td_error': 1.633575475182657, 'init_value': -17.11848258972168, 'ave_value': -17.112502930389304} step=12110
2022-04-21 23:07.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:07.28 [info     ] CQL_20220421225500: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.00036744437465777976, 'time_algorithm_update': 0.058931275599264685, 'temp_loss': 1.7132668564085327, 'temp': 0.3391495395257983, 'alpha_loss': -66.30849467812246, 'alpha': 3.8141357864258607, 'critic_loss': 11334.07325347724, 'actor_loss': 16.59253118768593, 'time_step': 0.05938545541267175, 'td_error': 1.6490835010140077, 'init_value': -17.4642391204834, 'ave_value': -17.46442989423525} step=12456
2022-04-21 23:07.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:07.49 [info     ] CQL_20220421225500: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.00036542470744579513, 'time_algorithm_update': 0.0584554217454326, 'temp_loss': 1.6627279995493807, 'temp': 0.3291187729105095, 'alpha_loss': -68.91055079553858, 'alpha': 3.9625983961744806, 'critic_loss': 10932.008210463331, 'actor_loss': 16.975404221198463, 'time_step': 0.05891120709435788, 'td_error': 1.6552264954058042, 'init_value': -17.551172256469727, 'ave_value': -17.561258201002225} step=12802
2022-04-21 23:07.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:08.11 [info     ] CQL_20220421225500: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.000354479503080335, 'time_algorithm_update': 0.05899471423529476, 'temp_loss': 1.6128474835715543, 'temp': 0.3193871546859686, 'alpha_loss': -71.59182013803824, 'alpha': 4.11686024224827, 'critic_loss': 9077.355375609646, 'actor_loss': 17.190849127797033, 'time_step': 0.05944347174870485, 'td_error': 1.671190732026779, 'init_value': -17.955291748046875, 'ave_value': -17.96281489810428} step=13148
2022-04-21 23:08.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:08.32 [info     ] CQL_20220421225500: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.0003592306478864196, 'time_algorithm_update': 0.05706741010522567, 'temp_loss': 1.5652402070905431, 'temp': 0.3099443490450093, 'alpha_loss': -74.37587473433831, 'alpha': 4.277124253311598, 'critic_loss': 8006.325188256413, 'actor_loss': 17.644048729383876, 'time_step': 0.05752163953174745, 'td_error': 1.6924957863315724, 'init_value': -18.491792678833008, 'ave_value': -18.494905813840283} step=13494
2022-04-21 23:08.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:08.52 [info     ] CQL_20220421225500: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.00036739820689824274, 'time_algorithm_update': 0.05609968425221526, 'temp_loss': 1.5187680421537058, 'temp': 0.3007797371445364, 'alpha_loss': -77.26836082149792, 'alpha': 4.443613232904776, 'critic_loss': 7138.551896111813, 'actor_loss': 18.094347612017152, 'time_step': 0.056560551500044805, 'td_error': 1.710264793017547, 'init_value': -18.895570755004883, 'ave_value': -18.899726025331297} step=13840
2022-04-21 23:08.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:09.13 [info     ] CQL_20220421225500: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.00036206893149138875, 'time_algorithm_update': 0.056392147361887676, 'temp_loss': 1.4745598931533064, 'temp': 0.2918851478079151, 'alpha_loss': -80.26919654890293, 'alpha': 4.6165886448986955, 'critic_loss': 6264.498083566655, 'actor_loss': 18.554616525683098, 'time_step': 0.05685080338075671, 'td_error': 1.7329588485546392, 'init_value': -19.432846069335938, 'ave_value': -19.43433227575238} step=14186
2022-04-21 23:09.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:09.33 [info     ] CQL_20220421225500: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.0003599810462466554, 'time_algorithm_update': 0.05670033989614145, 'temp_loss': 1.4304989776170323, 'temp': 0.28325335509170685, 'alpha_loss': -83.38974091634584, 'alpha': 4.796294365315079, 'critic_loss': 5586.90137847724, 'actor_loss': 19.076741808411704, 'time_step': 0.05715733180845404, 'td_error': 1.752445830001559, 'init_value': -19.841350555419922, 'ave_value': -19.84716584642898} step=14532
2022-04-21 23:09.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:09.54 [info     ] CQL_20220421225500: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.0003662047358606592, 'time_algorithm_update': 0.05683513046000045, 'temp_loss': 1.3882483705619857, 'temp': 0.2748778669992623, 'alpha_loss': -86.65250901679772, 'alpha': 4.983003286957052, 'critic_loss': 5261.301210260116, 'actor_loss': 19.71652630160999, 'time_step': 0.057295899170671585, 'td_error': 1.7861357820861967, 'init_value': -20.618350982666016, 'ave_value': -20.61602771934484} step=14878
2022-04-21 23:09.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:10.14 [info     ] CQL_20220421225500: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.000353036588327044, 'time_algorithm_update': 0.05650721120007465, 'temp_loss': 1.3471640210620242, 'temp': 0.26674907477949394, 'alpha_loss': -90.02669068705829, 'alpha': 5.176993011739213, 'critic_loss': 5161.095278348537, 'actor_loss': 20.414308663737568, 'time_step': 0.05695246134190201, 'td_error': 1.8194773722148856, 'init_value': -21.328874588012695, 'ave_value': -21.325327882816463} step=15224
2022-04-21 23:10.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:10.35 [info     ] CQL_20220421225500: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.00035814742821489454, 'time_algorithm_update': 0.0567892858747802, 'temp_loss': 1.3074008219503943, 'temp': 0.2588608663722959, 'alpha_loss': -93.51523477631497, 'alpha': 5.3785121261728985, 'critic_loss': 5407.337975298049, 'actor_loss': 21.195152448091893, 'time_step': 0.05724205653791483, 'td_error': 1.854432800314645, 'init_value': -22.05872344970703, 'ave_value': -22.052054669745107} step=15570
2022-04-21 23:10.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:10.56 [info     ] CQL_20220421225500: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.00036633359214474013, 'time_algorithm_update': 0.057134972142346335, 'temp_loss': 1.2689412825369422, 'temp': 0.2512050193513749, 'alpha_loss': -97.1790577221468, 'alpha': 5.587887692313663, 'critic_loss': 5520.37792404263, 'actor_loss': 21.854896953340212, 'time_step': 0.05759611777487518, 'td_error': 1.8871878171621637, 'init_value': -22.69747543334961, 'ave_value': -22.69360945680346} step=15916
2022-04-21 23:10.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:11.17 [info     ] CQL_20220421225500: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.0003535430555398754, 'time_algorithm_update': 0.057767133492265824, 'temp_loss': 1.2313691188145235, 'temp': 0.24377586761478745, 'alpha_loss': -100.96368176675256, 'alpha': 5.805419429878279, 'critic_loss': 5717.598614466673, 'actor_loss': 22.5674230211732, 'time_step': 0.05821025027015995, 'td_error': 1.9229617647120103, 'init_value': -23.3983154296875, 'ave_value': -23.393195330808375} step=16262
2022-04-21 23:11.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:11.38 [info     ] CQL_20220421225500: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.00035964340143810117, 'time_algorithm_update': 0.05762233417158182, 'temp_loss': 1.1948640866775733, 'temp': 0.23656623082698425, 'alpha_loss': -104.876274858596, 'alpha': 6.031408535951824, 'critic_loss': 5852.245692964234, 'actor_loss': 23.181695348265542, 'time_step': 0.05807077677952761, 'td_error': 1.955729661331497, 'init_value': -24.02021026611328, 'ave_value': -24.014541395991376} step=16608
2022-04-21 23:11.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:11.59 [info     ] CQL_20220421225500: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.0003575982385977155, 'time_algorithm_update': 0.05776945290537928, 'temp_loss': 1.1595334007560862, 'temp': 0.22957116132871264, 'alpha_loss': -108.96347954369693, 'alpha': 6.26618864357127, 'critic_loss': 5777.720959966582, 'actor_loss': 23.751333071317287, 'time_step': 0.058217768724254104, 'td_error': 1.979611767077151, 'init_value': -24.43927574157715, 'ave_value': -24.436973773093516} step=16954
2022-04-21 23:11.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:12.20 [info     ] CQL_20220421225500: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.000364229169195098, 'time_algorithm_update': 0.05811535416310922, 'temp_loss': 1.1251785172892443, 'temp': 0.2227823011189527, 'alpha_loss': -113.18864193954909, 'alpha': 6.5100979432894315, 'critic_loss': 5303.407242085892, 'actor_loss': 24.220054819404734, 'time_step': 0.058571409627881356, 'td_error': 2.003625365017211, 'init_value': -24.854564666748047, 'ave_value': -24.855922146847373} step=17300
2022-04-21 23:12.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421225500/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.51

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-21 23:12.22 [info     ] FQE_20220421231220: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.0001591602003717997, 'time_algorithm_update': 0.008897097713976022, 'loss': 0.0061032501111053915, 'time_step': 0.00912872136357319, 'init_value': -0.26476019620895386, 'ave_value': -0.22790765944901886, 'soft_opc': nan} step=166




2022-04-21 23:12.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:12.24 [info     ] FQE_20220421231220: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00016395298831434134, 'time_algorithm_update': 0.00875330976693027, 'loss': 0.0039261340034609455, 'time_step': 0.008990530508110323, 'init_value': -0.3095080554485321, 'ave_value': -0.24890481481278265, 'soft_opc': nan} step=332




2022-04-21 23:12.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:12.25 [info     ] FQE_20220421231220: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00016403485493487622, 'time_algorithm_update': 0.00913377124142934, 'loss': 0.0033541378580561154, 'time_step': 0.009368785892624453, 'init_value': -0.31561407446861267, 'ave_value': -0.25913023708035815, 'soft_opc': nan} step=498




2022-04-21 23:12.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:12.27 [info     ] FQE_20220421231220: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00016058353056390602, 'time_algorithm_update': 0.008742173034024525, 'loss': 0.0030330352797958418, 'time_step': 0.008971586284867251, 'init_value': -0.3175506293773651, 'ave_value': -0.2532670114810268, 'soft_opc': nan} step=664




2022-04-21 23:12.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:12.29 [info     ] FQE_20220421231220: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00016197382685649827, 'time_algorithm_update': 0.00926924182708005, 'loss': 0.002784012918534453, 'time_step': 0.00949247055743114, 'init_value': -0.34910809993743896, 'ave_value': -0.271599913254246, 'soft_opc': nan} step=830




2022-04-21 23:12.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:12.30 [info     ] FQE_20220421231220: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00016624094491981598, 'time_algorithm_update': 0.009190609656184553, 'loss': 0.002437842938998512, 'time_step': 0.00942823973046728, 'init_value': -0.35039710998535156, 'ave_value': -0.27775941575197755, 'soft_opc': nan} step=996




2022-04-21 23:12.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:12.32 [info     ] FQE_20220421231220: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.0001596341650170016, 'time_algorithm_update': 0.00907724449433476, 'loss': 0.0022240602348213004, 'time_step': 0.009304799229265696, 'init_value': -0.38372570276260376, 'ave_value': -0.3080473854273625, 'soft_opc': nan} step=1162




2022-04-21 23:12.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:12.34 [info     ] FQE_20220421231220: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00016289446727339043, 'time_algorithm_update': 0.008877261575446072, 'loss': 0.0018915423154915087, 'time_step': 0.009111450379153332, 'init_value': -0.3919851779937744, 'ave_value': -0.31656187590786367, 'soft_opc': nan} step=1328




2022-04-21 23:12.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:12.35 [info     ] FQE_20220421231220: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00016481905098421028, 'time_algorithm_update': 0.009012571300368711, 'loss': 0.0016350471923066057, 'time_step': 0.009250217173472944, 'init_value': -0.40163832902908325, 'ave_value': -0.32694397154746596, 'soft_opc': nan} step=1494




2022-04-21 23:12.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:12.37 [info     ] FQE_20220421231220: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00015788767711225762, 'time_algorithm_update': 0.009135919881154256, 'loss': 0.0016172949728943094, 'time_step': 0.009363598134144243, 'init_value': -0.42705777287483215, 'ave_value': -0.35504057541623846, 'soft_opc': nan} step=1660




2022-04-21 23:12.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:12.38 [info     ] FQE_20220421231220: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.0001631099057484822, 'time_algorithm_update': 0.008334987134818572, 'loss': 0.0015190898083818022, 'time_step': 0.008572230856102633, 'init_value': -0.43229979276657104, 'ave_value': -0.3613008059745839, 'soft_opc': nan} step=1826




2022-04-21 23:12.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:12.40 [info     ] FQE_20220421231220: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00016013685479221573, 'time_algorithm_update': 0.008957814021282885, 'loss': 0.0015364124705097694, 'time_step': 0.009190437305404479, 'init_value': -0.4843020439147949, 'ave_value': -0.4040358271382682, 'soft_opc': nan} step=1992




2022-04-21 23:12.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:12.42 [info     ] FQE_20220421231220: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00016837809459272637, 'time_algorithm_update': 0.00910937786102295, 'loss': 0.001697389512738011, 'time_step': 0.009347480463694376, 'init_value': -0.5421708822250366, 'ave_value': -0.45276923753774245, 'soft_opc': nan} step=2158




2022-04-21 23:12.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:12.44 [info     ] FQE_20220421231220: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00016805350062358812, 'time_algorithm_update': 0.009173608687986811, 'loss': 0.0018114658719186772, 'time_step': 0.009412432291421545, 'init_value': -0.5928993821144104, 'ave_value': -0.48538420633182167, 'soft_opc': nan} step=2324




2022-04-21 23:12.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:12.45 [info     ] FQE_20220421231220: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.0001653921173279544, 'time_algorithm_update': 0.00894613294716341, 'loss': 0.0018708120077057372, 'time_step': 0.009185571268380406, 'init_value': -0.6142523288726807, 'ave_value': -0.4989043082832149, 'soft_opc': nan} step=2490




2022-04-21 23:12.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:12.47 [info     ] FQE_20220421231220: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00016486644744873047, 'time_algorithm_update': 0.009216028523732381, 'loss': 0.0020305274697146884, 'time_step': 0.009452111749763948, 'init_value': -0.6744624376296997, 'ave_value': -0.545888176505026, 'soft_opc': nan} step=2656




2022-04-21 23:12.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:12.49 [info     ] FQE_20220421231220: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00016076737139598433, 'time_algorithm_update': 0.00910177575536521, 'loss': 0.002151961623675594, 'time_step': 0.00933905825557479, 'init_value': -0.7340037226676941, 'ave_value': -0.5840249429324742, 'soft_opc': nan} step=2822




2022-04-21 23:12.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:12.50 [info     ] FQE_20220421231220: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00016911920294704208, 'time_algorithm_update': 0.008816286741969097, 'loss': 0.0023009555848421954, 'time_step': 0.009058237075805664, 'init_value': -0.7473663091659546, 'ave_value': -0.5900390901007094, 'soft_opc': nan} step=2988




2022-04-21 23:12.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:12.52 [info     ] FQE_20220421231220: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00016386968543730587, 'time_algorithm_update': 0.008657115051545292, 'loss': 0.0026303778244014158, 'time_step': 0.008893403662256447, 'init_value': -0.8260059356689453, 'ave_value': -0.646370362273946, 'soft_opc': nan} step=3154




2022-04-21 23:12.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:12.53 [info     ] FQE_20220421231220: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00015966289014701383, 'time_algorithm_update': 0.009142071367746377, 'loss': 0.002684724047648193, 'time_step': 0.009375089622405639, 'init_value': -0.8570326566696167, 'ave_value': -0.671881174761802, 'soft_opc': nan} step=3320




2022-04-21 23:12.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:12.55 [info     ] FQE_20220421231220: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.0001654050436364599, 'time_algorithm_update': 0.009258294680032385, 'loss': 0.0029759183533052094, 'time_step': 0.009495858686516085, 'init_value': -0.9223495721817017, 'ave_value': -0.722920184473398, 'soft_opc': nan} step=3486




2022-04-21 23:12.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:12.57 [info     ] FQE_20220421231220: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00016421869576695454, 'time_algorithm_update': 0.008787045995873141, 'loss': 0.0031656085768453665, 'time_step': 0.009022439818784415, 'init_value': -0.9605427384376526, 'ave_value': -0.7529237348355587, 'soft_opc': nan} step=3652




2022-04-21 23:12.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:12.58 [info     ] FQE_20220421231220: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00016035660203680936, 'time_algorithm_update': 0.009120369532022131, 'loss': 0.003518903161278443, 'time_step': 0.00935372961572854, 'init_value': -0.9866300225257874, 'ave_value': -0.7666355388274685, 'soft_opc': nan} step=3818




2022-04-21 23:12.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:13.00 [info     ] FQE_20220421231220: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00016410092273390437, 'time_algorithm_update': 0.00908168109066515, 'loss': 0.003617007199096976, 'time_step': 0.009320404156144843, 'init_value': -0.9851298332214355, 'ave_value': -0.7681570249032816, 'soft_opc': nan} step=3984




2022-04-21 23:13.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:13.02 [info     ] FQE_20220421231220: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00016278818429234517, 'time_algorithm_update': 0.008777219128895956, 'loss': 0.003666811898464326, 'time_step': 0.009016126035207725, 'init_value': -1.035863995552063, 'ave_value': -0.7950830967937437, 'soft_opc': nan} step=4150




2022-04-21 23:13.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:13.03 [info     ] FQE_20220421231220: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00016577847032661898, 'time_algorithm_update': 0.009221958826823407, 'loss': 0.003872484621993008, 'time_step': 0.009463893361838466, 'init_value': -1.0633147954940796, 'ave_value': -0.8023683251745932, 'soft_opc': nan} step=4316




2022-04-21 23:13.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:13.05 [info     ] FQE_20220421231220: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00016383377902479056, 'time_algorithm_update': 0.009224670479096562, 'loss': 0.004050133400947327, 'time_step': 0.00946129804634186, 'init_value': -1.087661623954773, 'ave_value': -0.8266595309071646, 'soft_opc': nan} step=4482




2022-04-21 23:13.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:13.07 [info     ] FQE_20220421231220: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001599487051906356, 'time_algorithm_update': 0.008683873946408191, 'loss': 0.004254001163907703, 'time_step': 0.008913159370422363, 'init_value': -1.1074535846710205, 'ave_value': -0.8370246126133999, 'soft_opc': nan} step=4648




2022-04-21 23:13.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:13.08 [info     ] FQE_20220421231220: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00016645351088190652, 'time_algorithm_update': 0.008773776422063988, 'loss': 0.004509306961077221, 'time_step': 0.009013029466192406, 'init_value': -1.1482905149459839, 'ave_value': -0.8770654161964115, 'soft_opc': nan} step=4814




2022-04-21 23:13.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:13.10 [info     ] FQE_20220421231220: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.0001609555209975645, 'time_algorithm_update': 0.009009131466049746, 'loss': 0.00467789237329972, 'time_step': 0.009241148649928081, 'init_value': -1.1898833513259888, 'ave_value': -0.9044447519380163, 'soft_opc': nan} step=4980




2022-04-21 23:13.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:13.12 [info     ] FQE_20220421231220: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00016198818942150437, 'time_algorithm_update': 0.009123322475387389, 'loss': 0.004849995540964125, 'time_step': 0.00935784018183329, 'init_value': -1.1981921195983887, 'ave_value': -0.9028433825745171, 'soft_opc': nan} step=5146




2022-04-21 23:13.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:13.13 [info     ] FQE_20220421231220: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00016134187399622905, 'time_algorithm_update': 0.00880889863852995, 'loss': 0.004894576346289916, 'time_step': 0.009043170745114246, 'init_value': -1.1955126523971558, 'ave_value': -0.9004045965888344, 'soft_opc': nan} step=5312




2022-04-21 23:13.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:13.15 [info     ] FQE_20220421231220: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.0001644930207585714, 'time_algorithm_update': 0.009165278400283262, 'loss': 0.0044565218562507124, 'time_step': 0.009398506348391613, 'init_value': -1.186233639717102, 'ave_value': -0.9123418559383198, 'soft_opc': nan} step=5478




2022-04-21 23:13.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:13.17 [info     ] FQE_20220421231220: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00016246071781020566, 'time_algorithm_update': 0.009257384093410998, 'loss': 0.005003792554257909, 'time_step': 0.009493465883186064, 'init_value': -1.2476871013641357, 'ave_value': -0.9670744122517807, 'soft_opc': nan} step=5644




2022-04-21 23:13.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:13.18 [info     ] FQE_20220421231220: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.000162792493061847, 'time_algorithm_update': 0.009155188698366463, 'loss': 0.005296500550312304, 'time_step': 0.009392262941383454, 'init_value': -1.2400039434432983, 'ave_value': -0.9654411301839056, 'soft_opc': nan} step=5810




2022-04-21 23:13.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:13.20 [info     ] FQE_20220421231220: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.0001634359359741211, 'time_algorithm_update': 0.008676231625568435, 'loss': 0.005160667321641918, 'time_step': 0.008908869272255036, 'init_value': -1.2317824363708496, 'ave_value': -0.9485376869429003, 'soft_opc': nan} step=5976




2022-04-21 23:13.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:13.22 [info     ] FQE_20220421231220: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00016627110630632882, 'time_algorithm_update': 0.010064018778054112, 'loss': 0.00567886722537111, 'time_step': 0.010304178100034415, 'init_value': -1.2397310733795166, 'ave_value': -0.9654032010014529, 'soft_opc': nan} step=6142




2022-04-21 23:13.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:13.24 [info     ] FQE_20220421231220: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00016170668314738446, 'time_algorithm_update': 0.009868325957332749, 'loss': 0.005607341213680591, 'time_step': 0.010102111172963339, 'init_value': -1.2466793060302734, 'ave_value': -0.981164198632243, 'soft_opc': nan} step=6308




2022-04-21 23:13.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:13.26 [info     ] FQE_20220421231220: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00016369302588773062, 'time_algorithm_update': 0.009604571813560394, 'loss': 0.005567524806913325, 'time_step': 0.009840409439730358, 'init_value': -1.2739503383636475, 'ave_value': -1.0063516225636442, 'soft_opc': nan} step=6474




2022-04-21 23:13.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:13.27 [info     ] FQE_20220421231220: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00016090525202004305, 'time_algorithm_update': 0.00928680437156953, 'loss': 0.005877885013553651, 'time_step': 0.009519365896661598, 'init_value': -1.312699556350708, 'ave_value': -1.0265815582898286, 'soft_opc': nan} step=6640




2022-04-21 23:13.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:13.29 [info     ] FQE_20220421231220: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00015870490706110574, 'time_algorithm_update': 0.009796482970915645, 'loss': 0.0059101866007325275, 'time_step': 0.01002208870577525, 'init_value': -1.3067259788513184, 'ave_value': -1.0256317890064606, 'soft_opc': nan} step=6806




2022-04-21 23:13.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:13.31 [info     ] FQE_20220421231220: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00014523569359836807, 'time_algorithm_update': 0.009809324540287614, 'loss': 0.006109487664912468, 'time_step': 0.0100196240896202, 'init_value': -1.272816777229309, 'ave_value': -0.9761397350243889, 'soft_opc': nan} step=6972




2022-04-21 23:13.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:13.33 [info     ] FQE_20220421231220: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.0001509448131883001, 'time_algorithm_update': 0.009567886950021767, 'loss': 0.0063763265655774354, 'time_step': 0.009782531175268701, 'init_value': -1.3443905115127563, 'ave_value': -1.022496827762272, 'soft_opc': nan} step=7138




2022-04-21 23:13.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:13.34 [info     ] FQE_20220421231220: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00014976133783179592, 'time_algorithm_update': 0.008922023945544139, 'loss': 0.006619975856029855, 'time_step': 0.009135356868606016, 'init_value': -1.3497788906097412, 'ave_value': -1.0480076369955331, 'soft_opc': nan} step=7304




2022-04-21 23:13.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:13.36 [info     ] FQE_20220421231220: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00015214695987931216, 'time_algorithm_update': 0.009831908237503236, 'loss': 0.006470796011152666, 'time_step': 0.010051226041403162, 'init_value': -1.3347172737121582, 'ave_value': -1.0160129943510166, 'soft_opc': nan} step=7470




2022-04-21 23:13.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:13.38 [info     ] FQE_20220421231220: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00016080471406500023, 'time_algorithm_update': 0.009613900299531868, 'loss': 0.006798982806959616, 'time_step': 0.009844693792871681, 'init_value': -1.3943182229995728, 'ave_value': -1.0674527825783469, 'soft_opc': nan} step=7636




2022-04-21 23:13.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:13.40 [info     ] FQE_20220421231220: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00016512928238834244, 'time_algorithm_update': 0.009924008185604969, 'loss': 0.006918344916165694, 'time_step': 0.01016343501676996, 'init_value': -1.4533872604370117, 'ave_value': -1.136744082300412, 'soft_opc': nan} step=7802




2022-04-21 23:13.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:13.41 [info     ] FQE_20220421231220: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00016007796827569064, 'time_algorithm_update': 0.009522738226925034, 'loss': 0.007203011005606323, 'time_step': 0.009752310902239329, 'init_value': -1.4925649166107178, 'ave_value': -1.15234845735569, 'soft_opc': nan} step=7968




2022-04-21 23:13.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:13.43 [info     ] FQE_20220421231220: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00016072715621396718, 'time_algorithm_update': 0.009697865290814135, 'loss': 0.007441868922106518, 'time_step': 0.009929537773132324, 'init_value': -1.5220867395401, 'ave_value': -1.198853682663275, 'soft_opc': nan} step=8134




2022-04-21 23:13.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:13.45 [info     ] FQE_20220421231220: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00015011034816144462, 'time_algorithm_update': 0.009574663208191654, 'loss': 0.007587295092736281, 'time_step': 0.009792658219854516, 'init_value': -1.5510737895965576, 'ave_value': -1.2234640671611503, 'soft_opc': nan} step=8300




2022-04-21 23:13.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231220/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-21 23:13.45 [info     ] Directory is created at d3rlpy_logs/FQE_20220421231345
2022-04-21 23:13.45 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-21 23:13.45 [debug    ] Building models...
2022-04-21 23:13.45 [debug    ] Models have been built.
2022-04-21 23:13.45 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220421231345/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-21 23:13.49 [info     ] FQE_20220421231345: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016135185263877692, 'time_algorithm_update': 0.009741035311721092, 'loss': 0.023659260816287334, 'time_step': 0.009974429773729901, 'init_value': -1.0154865980148315, 'ave_value': -1.0098711355379573, 'soft_opc': nan} step=344




2022-04-21 23:13.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:13.53 [info     ] FQE_20220421231345: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.0001677205396253009, 'time_algorithm_update': 0.00950183909992839, 'loss': 0.021429122574519108, 'time_step': 0.009743013354234917, 'init_value': -1.607008695602417, 'ave_value': -1.6510543664766324, 'soft_opc': nan} step=688




2022-04-21 23:13.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:13.56 [info     ] FQE_20220421231345: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016899788102438284, 'time_algorithm_update': 0.009838316329689912, 'loss': 0.025045453034731192, 'time_step': 0.01007867829744206, 'init_value': -2.3632454872131348, 'ave_value': -2.5434995577142043, 'soft_opc': nan} step=1032




2022-04-21 23:13.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:14.00 [info     ] FQE_20220421231345: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00016561566397201184, 'time_algorithm_update': 0.009607875069906546, 'loss': 0.028853756822320786, 'time_step': 0.00984401550403861, 'init_value': -2.701035499572754, 'ave_value': -3.0934599741420765, 'soft_opc': nan} step=1376




2022-04-21 23:14.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:14.04 [info     ] FQE_20220421231345: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00016785014507382414, 'time_algorithm_update': 0.00974506416986155, 'loss': 0.03525521564513965, 'time_step': 0.009984363650166712, 'init_value': -3.1342592239379883, 'ave_value': -3.8512613465992716, 'soft_opc': nan} step=1720




2022-04-21 23:14.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:14.07 [info     ] FQE_20220421231345: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.000167267267094102, 'time_algorithm_update': 0.009530441705570665, 'loss': 0.04287141409085327, 'time_step': 0.009770883377208266, 'init_value': -3.470241069793701, 'ave_value': -4.573425173836651, 'soft_opc': nan} step=2064




2022-04-21 23:14.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:14.11 [info     ] FQE_20220421231345: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00016835193301356115, 'time_algorithm_update': 0.00974746914797051, 'loss': 0.05407316992492523, 'time_step': 0.009992846915888231, 'init_value': -3.8029160499572754, 'ave_value': -5.410750915996126, 'soft_opc': nan} step=2408




2022-04-21 23:14.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:14.14 [info     ] FQE_20220421231345: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00017481210619904274, 'time_algorithm_update': 0.009633022685383642, 'loss': 0.0652029161493099, 'time_step': 0.009880395822746808, 'init_value': -3.9120256900787354, 'ave_value': -6.12645993620023, 'soft_opc': nan} step=2752




2022-04-21 23:14.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:14.18 [info     ] FQE_20220421231345: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00016775588656580724, 'time_algorithm_update': 0.00990764762079993, 'loss': 0.07320195483585257, 'time_step': 0.010150754867598068, 'init_value': -4.154047012329102, 'ave_value': -6.965291156445269, 'soft_opc': nan} step=3096




2022-04-21 23:14.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:14.22 [info     ] FQE_20220421231345: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00016808648442113124, 'time_algorithm_update': 0.00903936458188434, 'loss': 0.08642566466682353, 'time_step': 0.009283860062443933, 'init_value': -4.3505449295043945, 'ave_value': -7.849011350758709, 'soft_opc': nan} step=3440




2022-04-21 23:14.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:14.25 [info     ] FQE_20220421231345: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.0001688038193902304, 'time_algorithm_update': 0.00990027813024299, 'loss': 0.09223181373158167, 'time_step': 0.010143431120140607, 'init_value': -4.475156784057617, 'ave_value': -8.495745779883519, 'soft_opc': nan} step=3784




2022-04-21 23:14.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:14.29 [info     ] FQE_20220421231345: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00016697409541107888, 'time_algorithm_update': 0.009429428466530733, 'loss': 0.10404600739630676, 'time_step': 0.009672534327174341, 'init_value': -4.6236114501953125, 'ave_value': -9.258974062209226, 'soft_opc': nan} step=4128




2022-04-21 23:14.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:14.33 [info     ] FQE_20220421231345: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00017122543135354685, 'time_algorithm_update': 0.009980634894481925, 'loss': 0.10914628315435419, 'time_step': 0.010224850371826527, 'init_value': -4.607564449310303, 'ave_value': -9.747451674630279, 'soft_opc': nan} step=4472




2022-04-21 23:14.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:14.36 [info     ] FQE_20220421231345: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001687941163085228, 'time_algorithm_update': 0.009458504443944887, 'loss': 0.11856820277848043, 'time_step': 0.00970004117766092, 'init_value': -4.778444290161133, 'ave_value': -10.465326639581555, 'soft_opc': nan} step=4816




2022-04-21 23:14.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:14.40 [info     ] FQE_20220421231345: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00017052195792974426, 'time_algorithm_update': 0.009835177382757498, 'loss': 0.12833669688281885, 'time_step': 0.010079543257868567, 'init_value': -5.003600120544434, 'ave_value': -11.041405371131443, 'soft_opc': nan} step=5160




2022-04-21 23:14.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:14.44 [info     ] FQE_20220421231345: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.000166949144629545, 'time_algorithm_update': 0.009886729162792826, 'loss': 0.13972551327978455, 'time_step': 0.010121220766111862, 'init_value': -5.7367329597473145, 'ave_value': -12.137887383665184, 'soft_opc': nan} step=5504




2022-04-21 23:14.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:14.47 [info     ] FQE_20220421231345: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016717993935873342, 'time_algorithm_update': 0.009576927783877351, 'loss': 0.15323180081268642, 'time_step': 0.009817912828090579, 'init_value': -5.674556732177734, 'ave_value': -12.309119560182733, 'soft_opc': nan} step=5848




2022-04-21 23:14.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:14.51 [info     ] FQE_20220421231345: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00017098562661991564, 'time_algorithm_update': 0.009767849778020105, 'loss': 0.1561014786508844, 'time_step': 0.010013057742007943, 'init_value': -5.945372581481934, 'ave_value': -12.869391658782186, 'soft_opc': nan} step=6192




2022-04-21 23:14.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:14.55 [info     ] FQE_20220421231345: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.0001679347005001334, 'time_algorithm_update': 0.009449630282646003, 'loss': 0.1734256516562688, 'time_step': 0.009690413641375165, 'init_value': -6.367223739624023, 'ave_value': -13.475962754047837, 'soft_opc': nan} step=6536




2022-04-21 23:14.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:14.59 [info     ] FQE_20220421231345: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00017084423885789027, 'time_algorithm_update': 0.009968226039132406, 'loss': 0.18035400173962549, 'time_step': 0.01021408341651739, 'init_value': -6.658753395080566, 'ave_value': -13.899245451018214, 'soft_opc': nan} step=6880




2022-04-21 23:14.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:15.02 [info     ] FQE_20220421231345: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00016694845155228015, 'time_algorithm_update': 0.009513454381809679, 'loss': 0.19427723527972607, 'time_step': 0.009754522595294687, 'init_value': -7.194891929626465, 'ave_value': -14.592749918703628, 'soft_opc': nan} step=7224




2022-04-21 23:15.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:15.06 [info     ] FQE_20220421231345: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00016910808030949084, 'time_algorithm_update': 0.009567740351654763, 'loss': 0.20371244254056364, 'time_step': 0.009808823119762331, 'init_value': -7.709219932556152, 'ave_value': -15.21383454184308, 'soft_opc': nan} step=7568




2022-04-21 23:15.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:15.09 [info     ] FQE_20220421231345: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00016772816347521404, 'time_algorithm_update': 0.009610997382984605, 'loss': 0.2098190813476956, 'time_step': 0.009853008181549782, 'init_value': -8.214139938354492, 'ave_value': -15.844665739032532, 'soft_opc': nan} step=7912




2022-04-21 23:15.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:15.13 [info     ] FQE_20220421231345: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00016913303109102471, 'time_algorithm_update': 0.009750578985657803, 'loss': 0.2193308391521663, 'time_step': 0.009993698014769443, 'init_value': -8.758444786071777, 'ave_value': -16.410506047647413, 'soft_opc': nan} step=8256




2022-04-21 23:15.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:15.17 [info     ] FQE_20220421231345: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00017156850459963777, 'time_algorithm_update': 0.009649988523749419, 'loss': 0.23100920924700277, 'time_step': 0.009896247885948005, 'init_value': -8.969804763793945, 'ave_value': -16.76569229561735, 'soft_opc': nan} step=8600




2022-04-21 23:15.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:15.20 [info     ] FQE_20220421231345: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00016924045806707337, 'time_algorithm_update': 0.009499897097432337, 'loss': 0.2473975234545854, 'time_step': 0.009744406439537225, 'init_value': -9.614852905273438, 'ave_value': -17.51841847046807, 'soft_opc': nan} step=8944




2022-04-21 23:15.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:15.24 [info     ] FQE_20220421231345: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00017317575077677882, 'time_algorithm_update': 0.009627233410990515, 'loss': 0.25358996747148244, 'time_step': 0.009874626647594363, 'init_value': -9.584741592407227, 'ave_value': -17.46268908516378, 'soft_opc': nan} step=9288




2022-04-21 23:15.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:15.28 [info     ] FQE_20220421231345: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00016844965690790222, 'time_algorithm_update': 0.009769085534783296, 'loss': 0.27093045943860655, 'time_step': 0.010011905847593795, 'init_value': -10.311447143554688, 'ave_value': -18.264412935467455, 'soft_opc': nan} step=9632




2022-04-21 23:15.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:15.31 [info     ] FQE_20220421231345: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00017054829486580782, 'time_algorithm_update': 0.00958201289176941, 'loss': 0.2901064236188134, 'time_step': 0.009830139404119448, 'init_value': -10.690914154052734, 'ave_value': -18.595763102415447, 'soft_opc': nan} step=9976




2022-04-21 23:15.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:15.35 [info     ] FQE_20220421231345: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.0001692016457402429, 'time_algorithm_update': 0.009661980839662774, 'loss': 0.2997027507355047, 'time_step': 0.009907572768455329, 'init_value': -11.031566619873047, 'ave_value': -18.705704160491088, 'soft_opc': nan} step=10320




2022-04-21 23:15.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:15.39 [info     ] FQE_20220421231345: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00016958907593128293, 'time_algorithm_update': 0.00975569112356319, 'loss': 0.300137733350248, 'time_step': 0.01000229841054872, 'init_value': -11.072301864624023, 'ave_value': -18.746381540690457, 'soft_opc': nan} step=10664




2022-04-21 23:15.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:15.42 [info     ] FQE_20220421231345: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00017021145931510039, 'time_algorithm_update': 0.009812686332436494, 'loss': 0.31747316231214723, 'time_step': 0.010058759949928107, 'init_value': -12.026909828186035, 'ave_value': -19.640707828279012, 'soft_opc': nan} step=11008




2022-04-21 23:15.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:15.46 [info     ] FQE_20220421231345: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00017058086949725483, 'time_algorithm_update': 0.009799898363823114, 'loss': 0.33761539551981745, 'time_step': 0.010047353284303532, 'init_value': -12.354965209960938, 'ave_value': -19.929371449190217, 'soft_opc': nan} step=11352




2022-04-21 23:15.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:15.50 [info     ] FQE_20220421231345: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.0001803601897040079, 'time_algorithm_update': 0.00928246489790983, 'loss': 0.34326910785789233, 'time_step': 0.009538443282593128, 'init_value': -13.076796531677246, 'ave_value': -20.562018134357693, 'soft_opc': nan} step=11696




2022-04-21 23:15.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:15.53 [info     ] FQE_20220421231345: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00017411972201147744, 'time_algorithm_update': 0.009924593359925026, 'loss': 0.3631350161632263, 'time_step': 0.010175158118092737, 'init_value': -13.549795150756836, 'ave_value': -21.005717968618548, 'soft_opc': nan} step=12040




2022-04-21 23:15.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:15.57 [info     ] FQE_20220421231345: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00016813985137052314, 'time_algorithm_update': 0.009486215059147325, 'loss': 0.37461468473152626, 'time_step': 0.009731446587762167, 'init_value': -14.092781066894531, 'ave_value': -21.519777380399876, 'soft_opc': nan} step=12384




2022-04-21 23:15.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:16.01 [info     ] FQE_20220421231345: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00017065918722818064, 'time_algorithm_update': 0.009970962308173957, 'loss': 0.37888421863046756, 'time_step': 0.010217269492703815, 'init_value': -14.297212600708008, 'ave_value': -21.67407982671583, 'soft_opc': nan} step=12728




2022-04-21 23:16.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:16.04 [info     ] FQE_20220421231345: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00016898956409720488, 'time_algorithm_update': 0.008843191141305967, 'loss': 0.3917782087783264, 'time_step': 0.009086175020350966, 'init_value': -14.429178237915039, 'ave_value': -21.810482481256262, 'soft_opc': nan} step=13072




2022-04-21 23:16.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:16.08 [info     ] FQE_20220421231345: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00017036255015883336, 'time_algorithm_update': 0.009520554958387863, 'loss': 0.4138224054978034, 'time_step': 0.009767361851625665, 'init_value': -15.090351104736328, 'ave_value': -22.505280213474155, 'soft_opc': nan} step=13416




2022-04-21 23:16.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:16.11 [info     ] FQE_20220421231345: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00017100849816965502, 'time_algorithm_update': 0.009193307438562082, 'loss': 0.42204639528401544, 'time_step': 0.009438520254090775, 'init_value': -15.593191146850586, 'ave_value': -23.015824028017285, 'soft_opc': nan} step=13760




2022-04-21 23:16.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:16.15 [info     ] FQE_20220421231345: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00017006868539854537, 'time_algorithm_update': 0.009538119615510453, 'loss': 0.4398048462340782, 'time_step': 0.009781457657037779, 'init_value': -15.700456619262695, 'ave_value': -23.028252120544245, 'soft_opc': nan} step=14104




2022-04-21 23:16.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:16.18 [info     ] FQE_20220421231345: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00016751123029132222, 'time_algorithm_update': 0.009003801401271376, 'loss': 0.4492797266453678, 'time_step': 0.00924411416053772, 'init_value': -16.093425750732422, 'ave_value': -23.42750383690671, 'soft_opc': nan} step=14448




2022-04-21 23:16.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:16.22 [info     ] FQE_20220421231345: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00016859589621078138, 'time_algorithm_update': 0.009472684804783311, 'loss': 0.47153383009623046, 'time_step': 0.009715757397718208, 'init_value': -16.417316436767578, 'ave_value': -23.924701203097094, 'soft_opc': nan} step=14792




2022-04-21 23:16.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:16.25 [info     ] FQE_20220421231345: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00016863401546034703, 'time_algorithm_update': 0.009402683307958204, 'loss': 0.48803210002911646, 'time_step': 0.009646561256674834, 'init_value': -16.740478515625, 'ave_value': -24.19865853931453, 'soft_opc': nan} step=15136




2022-04-21 23:16.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:16.29 [info     ] FQE_20220421231345: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016743221948313159, 'time_algorithm_update': 0.00926781185837679, 'loss': 0.5136576654026718, 'time_step': 0.009511727233265722, 'init_value': -17.066373825073242, 'ave_value': -24.36435944937371, 'soft_opc': nan} step=15480




2022-04-21 23:16.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:16.32 [info     ] FQE_20220421231345: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.0001698898714642192, 'time_algorithm_update': 0.009195988261422445, 'loss': 0.5293031540688474, 'time_step': 0.00943851678870445, 'init_value': -17.11656951904297, 'ave_value': -24.52711114426991, 'soft_opc': nan} step=15824




2022-04-21 23:16.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:16.36 [info     ] FQE_20220421231345: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016834777454997218, 'time_algorithm_update': 0.009213158557581346, 'loss': 0.5526540136820268, 'time_step': 0.009456482737563377, 'init_value': -17.603445053100586, 'ave_value': -24.96674670985153, 'soft_opc': nan} step=16168




2022-04-21 23:16.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:16.40 [info     ] FQE_20220421231345: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.0001705981964288756, 'time_algorithm_update': 0.009401245865710946, 'loss': 0.5611735718584684, 'time_step': 0.009647666714912238, 'init_value': -17.854185104370117, 'ave_value': -25.0376679829649, 'soft_opc': nan} step=16512




2022-04-21 23:16.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:16.43 [info     ] FQE_20220421231345: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016934303350226823, 'time_algorithm_update': 0.009161942920019461, 'loss': 0.5806797251652189, 'time_step': 0.009404902541360189, 'init_value': -17.422733306884766, 'ave_value': -24.641482918488013, 'soft_opc': nan} step=16856




2022-04-21 23:16.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:16.47 [info     ] FQE_20220421231345: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00017031611398208972, 'time_algorithm_update': 0.009287597828133161, 'loss': 0.5930714876185236, 'time_step': 0.00953529255334721, 'init_value': -17.55489158630371, 'ave_value': -24.831502176942053, 'soft_opc': nan} step=17200




2022-04-21 23:16.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421231345/model_17200.pt
search iteration:  4
using hyper params:  [0.0023171377286123626, 0.0044517498285527295, 2.133534195243116e-05, 3]
2022-04-21 23:16.47 [debug    ] RoundIterator is selected.
2022-04-21 23:16.47 [info     ] Directory is created at d3rlpy_logs/CQL_20220421231647
2022-04-21 23:16.47 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-21 23:16.47 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-21 23:16.47 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220421231647/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.0023171377286123626, 'actor_optim_factory': {'opt

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:17.08 [info     ] CQL_20220421231647: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.00035880962548228356, 'time_algorithm_update': 0.058360518747671494, 'temp_loss': 4.861398892595589, 'temp': 0.9961240298486169, 'alpha_loss': -17.6473845718913, 'alpha': 1.0177601885933407, 'critic_loss': 62.193858427808465, 'actor_loss': 1.00135763510631, 'time_step': 0.058811722463266006, 'td_error': 1.2326636532708988, 'init_value': -3.6553351879119873, 'ave_value': -3.2895247382083146} step=346
2022-04-21 23:17.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:17.29 [info     ] CQL_20220421231647: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.0003566480096364986, 'time_algorithm_update': 0.05827416505427719, 'temp_loss': 4.98946144677311, 'temp': 0.9885966136965448, 'alpha_loss': -18.36386899451989, 'alpha': 1.0543193372687854, 'critic_loss': 100.05385410992396, 'actor_loss': 3.6546635992954233, 'time_step': 0.05872447091030937, 'td_error': 1.2767485755249808, 'init_value': -5.895442008972168, 'ave_value': -5.261224315979289} step=692
2022-04-21 23:17.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:17.50 [info     ] CQL_20220421231647: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.00036275180088991377, 'time_algorithm_update': 0.05864615867592696, 'temp_loss': 4.9561033855283885, 'temp': 0.9812812152280973, 'alpha_loss': -19.031263963335512, 'alpha': 1.0926513241205602, 'critic_loss': 196.1133577203475, 'actor_loss': 5.354006685962567, 'time_step': 0.05910551961446773, 'td_error': 1.2783626312820273, 'init_value': -6.789315700531006, 'ave_value': -6.216129739943586} step=1038
2022-04-21 23:17.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:18.11 [info     ] CQL_20220421231647: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.0003565026156475089, 'time_algorithm_update': 0.05744352506075291, 'temp_loss': 4.918142048609739, 'temp': 0.9740795775645041, 'alpha_loss': -19.714699546725765, 'alpha': 1.1328411388259403, 'critic_loss': 349.3939858585424, 'actor_loss': 5.375885053866171, 'time_step': 0.05789391705066482, 'td_error': 1.2651732229569324, 'init_value': -5.945420742034912, 'ave_value': -5.586169413188556} step=1384
2022-04-21 23:18.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:18.31 [info     ] CQL_20220421231647: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.00035677273149435233, 'time_algorithm_update': 0.054859331577499476, 'temp_loss': 4.881512797636793, 'temp': 0.9669686033891115, 'alpha_loss': -20.43809066618109, 'alpha': 1.1749376465130403, 'critic_loss': 566.208811500858, 'actor_loss': 3.673159371910757, 'time_step': 0.05530930047779414, 'td_error': 1.2567694442459652, 'init_value': -4.416015625, 'ave_value': -4.256651838100482} step=1730
2022-04-21 23:18.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:18.51 [info     ] CQL_20220421231647: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.00036356559378563323, 'time_algorithm_update': 0.05490840859495835, 'temp_loss': 4.846782440395025, 'temp': 0.9599312840169565, 'alpha_loss': -21.202407792813517, 'alpha': 1.219000983100406, 'critic_loss': 824.024152810863, 'actor_loss': 2.327607951412311, 'time_step': 0.05536523719743497, 'td_error': 1.264312539776505, 'init_value': -3.845973253250122, 'ave_value': -3.7449100912662976} step=2076
2022-04-21 23:18.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:19.11 [info     ] CQL_20220421231647: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.0003589474396898567, 'time_algorithm_update': 0.055049847316190684, 'temp_loss': 4.811648395020149, 'temp': 0.9529609836939442, 'alpha_loss': -21.998793050732917, 'alpha': 1.2650712435645175, 'critic_loss': 1082.8558527775583, 'actor_loss': 2.0151238854909908, 'time_step': 0.0554982175716775, 'td_error': 1.2681698125696277, 'init_value': -3.743955135345459, 'ave_value': -3.6821607692369414} step=2422
2022-04-21 23:19.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:19.31 [info     ] CQL_20220421231647: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.00035702768777836263, 'time_algorithm_update': 0.05453318736456722, 'temp_loss': 4.776151335997389, 'temp': 0.946055964066114, 'alpha_loss': -22.837201399610223, 'alpha': 1.3131894307329475, 'critic_loss': 1339.7693029855718, 'actor_loss': 2.006176736658019, 'time_step': 0.05497416182060462, 'td_error': 1.2722437699461764, 'init_value': -3.8946404457092285, 'ave_value': -3.8491630640568015} step=2768
2022-04-21 23:19.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:19.51 [info     ] CQL_20220421231647: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.00035648814515571375, 'time_algorithm_update': 0.05519317684835092, 'temp_loss': 4.742282240376996, 'temp': 0.9392106775948078, 'alpha_loss': -23.707055400561735, 'alpha': 1.3633966466594982, 'critic_loss': 1607.3091141364478, 'actor_loss': 2.0742413039841403, 'time_step': 0.05563795566558838, 'td_error': 1.2765859383917568, 'init_value': -4.033670902252197, 'ave_value': -3.997983894940517} step=3114
2022-04-21 23:19.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:20.10 [info     ] CQL_20220421231647: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.0003567975380517155, 'time_algorithm_update': 0.05254270989081763, 'temp_loss': 4.707905623265084, 'temp': 0.9324216949457378, 'alpha_loss': -24.61958265580194, 'alpha': 1.415739273749335, 'critic_loss': 1890.5636473198158, 'actor_loss': 2.265667878134402, 'time_step': 0.05298674520040523, 'td_error': 1.280947506881777, 'init_value': -4.2093825340271, 'ave_value': -4.179277161909766} step=3460
2022-04-21 23:20.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:20.30 [info     ] CQL_20220421231647: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.0003596110150993215, 'time_algorithm_update': 0.052787934424560194, 'temp_loss': 4.6740716440829235, 'temp': 0.9256873356469105, 'alpha_loss': -25.56532444154596, 'alpha': 1.4702680565718282, 'critic_loss': 2196.0955658840994, 'actor_loss': 2.529094986143829, 'time_step': 0.053236762223216154, 'td_error': 1.28593665456971, 'init_value': -4.515296459197998, 'ave_value': -4.491296940632147} step=3806
2022-04-21 23:20.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:20.49 [info     ] CQL_20220421231647: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.00035932780690275864, 'time_algorithm_update': 0.05239733243953286, 'temp_loss': 4.64001060496865, 'temp': 0.9190065678144466, 'alpha_loss': -26.552984673163795, 'alpha': 1.527040803019022, 'critic_loss': 2545.392150526102, 'actor_loss': 2.841116229233714, 'time_step': 0.052844217057862033, 'td_error': 1.2920507155095633, 'init_value': -4.890493869781494, 'ave_value': -4.868589363984442} step=4152
2022-04-21 23:20.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:21.08 [info     ] CQL_20220421231647: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.0003548288620965329, 'time_algorithm_update': 0.05362907517163051, 'temp_loss': 4.607627383546333, 'temp': 0.9123770735856426, 'alpha_loss': -27.58575855100775, 'alpha': 1.5861274538012597, 'critic_loss': 2930.0475869592215, 'actor_loss': 3.214352466467488, 'time_step': 0.05407292856646411, 'td_error': 1.2992645264415437, 'init_value': -5.366751194000244, 'ave_value': -5.342797412709525} step=4498
2022-04-21 23:21.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:21.28 [info     ] CQL_20220421231647: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.00036239072766607207, 'time_algorithm_update': 0.05417417721941292, 'temp_loss': 4.574086543452533, 'temp': 0.9057970975520294, 'alpha_loss': -28.65120161751102, 'alpha': 1.6475951582021107, 'critic_loss': 3321.493664339099, 'actor_loss': 3.642400897996274, 'time_step': 0.054632179309867024, 'td_error': 1.306269496091217, 'init_value': -5.619850158691406, 'ave_value': -5.6070970775844815} step=4844
2022-04-21 23:21.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:21.47 [info     ] CQL_20220421231647: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.00036012368395149365, 'time_algorithm_update': 0.05380200512836434, 'temp_loss': 4.541833228458559, 'temp': 0.8992670882988527, 'alpha_loss': -29.760510929747124, 'alpha': 1.7115086964789155, 'critic_loss': 3725.6833129177203, 'actor_loss': 4.0870067404873796, 'time_step': 0.0542555461729193, 'td_error': 1.314999284247412, 'init_value': -6.153067111968994, 'ave_value': -6.139837619850227} step=5190
2022-04-21 23:21.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:22.07 [info     ] CQL_20220421231647: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.00035901290143845396, 'time_algorithm_update': 0.05361773099513412, 'temp_loss': 4.509730831047014, 'temp': 0.892784882659857, 'alpha_loss': -30.919796662523567, 'alpha': 1.7779630333012928, 'critic_loss': 4128.293298269283, 'actor_loss': 4.575255413275922, 'time_step': 0.05407112595662905, 'td_error': 1.3253388157964665, 'init_value': -6.780569076538086, 'ave_value': -6.76273454821692} step=5536
2022-04-21 23:22.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:22.27 [info     ] CQL_20220421231647: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.00036263052438724935, 'time_algorithm_update': 0.05604771864896565, 'temp_loss': 4.476305600535663, 'temp': 0.8863514413723367, 'alpha_loss': -32.117472940786726, 'alpha': 1.8470397188484324, 'critic_loss': 4562.292978628522, 'actor_loss': 5.061208008341707, 'time_step': 0.05650133893668996, 'td_error': 1.3326661472634527, 'init_value': -7.0135040283203125, 'ave_value': -7.005647927271127} step=5882
2022-04-21 23:22.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:22.48 [info     ] CQL_20220421231647: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.000355003197069113, 'time_algorithm_update': 0.057895845071428774, 'temp_loss': 4.445704471169179, 'temp': 0.8799645638879324, 'alpha_loss': -33.36836397027694, 'alpha': 1.918837856695142, 'critic_loss': 4905.876497301752, 'actor_loss': 5.524999053492023, 'time_step': 0.05834170504112464, 'td_error': 1.3437512104023168, 'init_value': -7.639193058013916, 'ave_value': -7.625678483520768} step=6228
2022-04-21 23:22.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:23.10 [info     ] CQL_20220421231647: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.00036378334023359885, 'time_algorithm_update': 0.0584597911448837, 'temp_loss': 4.4136035456133715, 'temp': 0.8736240960269994, 'alpha_loss': -34.663553778146735, 'alpha': 1.9934486698553053, 'critic_loss': 5147.661043905799, 'actor_loss': 6.005058052911924, 'time_step': 0.058917160668125045, 'td_error': 1.3536302106760096, 'init_value': -8.057233810424805, 'ave_value': -8.047662019707005} step=6574
2022-04-21 23:23.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:23.31 [info     ] CQL_20220421231647: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.0003637902309439775, 'time_algorithm_update': 0.0581287400570908, 'temp_loss': 4.381074060594416, 'temp': 0.8673306980918598, 'alpha_loss': -36.01034046459749, 'alpha': 2.070984579924214, 'critic_loss': 5391.995766347543, 'actor_loss': 6.529684426467543, 'time_step': 0.05858424495410368, 'td_error': 1.364647113791722, 'init_value': -8.50329303741455, 'ave_value': -8.497240837607242} step=6920
2022-04-21 23:23.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:23.52 [info     ] CQL_20220421231647: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.0003534334932448547, 'time_algorithm_update': 0.05791945333425709, 'temp_loss': 4.3492351584351825, 'temp': 0.8610831499099731, 'alpha_loss': -37.41377870195863, 'alpha': 2.1515523519130113, 'critic_loss': 5707.571733596008, 'actor_loss': 7.090486392809477, 'time_step': 0.0583653904799092, 'td_error': 1.3780303563888174, 'init_value': -9.116813659667969, 'ave_value': -9.108689027445301} step=7266
2022-04-21 23:23.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:24.13 [info     ] CQL_20220421231647: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.0003636813577199947, 'time_algorithm_update': 0.0573508911739195, 'temp_loss': 4.317325146901125, 'temp': 0.8548822769884429, 'alpha_loss': -38.873612210929736, 'alpha': 2.235271942408788, 'critic_loss': 6138.747393481304, 'actor_loss': 7.678106387915639, 'time_step': 0.05780886707967416, 'td_error': 1.3913488088599797, 'init_value': -9.616117477416992, 'ave_value': -9.611788474309495} step=7612
2022-04-21 23:24.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:24.33 [info     ] CQL_20220421231647: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.00035085498942116094, 'time_algorithm_update': 0.05679557364800073, 'temp_loss': 4.287422116781245, 'temp': 0.8487250441416151, 'alpha_loss': -40.382381670736855, 'alpha': 2.322258005252463, 'critic_loss': 6582.511042776824, 'actor_loss': 8.243899922839478, 'time_step': 0.05723635378600545, 'td_error': 1.4062164405998194, 'init_value': -10.224839210510254, 'ave_value': -10.218383205281778} step=7958
2022-04-21 23:24.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:24.55 [info     ] CQL_20220421231647: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.0003680672948760104, 'time_algorithm_update': 0.05798390421564179, 'temp_loss': 4.255622513721444, 'temp': 0.8426125335555545, 'alpha_loss': -41.94967140903363, 'alpha': 2.4126250578488917, 'critic_loss': 6675.036059429191, 'actor_loss': 8.695046400059166, 'time_step': 0.05844706400281432, 'td_error': 1.4166856466931512, 'init_value': -10.540285110473633, 'ave_value': -10.541344211699556} step=8304
2022-04-21 23:24.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:25.16 [info     ] CQL_20220421231647: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.0003614315407813629, 'time_algorithm_update': 0.05991480047303128, 'temp_loss': 4.225002480380108, 'temp': 0.8365444699463817, 'alpha_loss': -43.59119902594241, 'alpha': 2.5065254944597366, 'critic_loss': 6792.370348627168, 'actor_loss': 9.30003135328348, 'time_step': 0.06037285837823945, 'td_error': 1.435667969276739, 'init_value': -11.319756507873535, 'ave_value': -11.312570626705165} step=8650
2022-04-21 23:25.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:25.38 [info     ] CQL_20220421231647: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.00035620355881707516, 'time_algorithm_update': 0.05965183167099264, 'temp_loss': 4.19458492505068, 'temp': 0.8305200410716106, 'alpha_loss': -45.277946185514416, 'alpha': 2.604079029463619, 'critic_loss': 7296.264954671695, 'actor_loss': 9.907957156958608, 'time_step': 0.06009674554615352, 'td_error': 1.4532237819096194, 'init_value': -11.952402114868164, 'ave_value': -11.943524508795052} step=8996
2022-04-21 23:25.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:26.00 [info     ] CQL_20220421231647: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.0003572206276689651, 'time_algorithm_update': 0.059808177065987116, 'temp_loss': 4.164879123599543, 'temp': 0.8245387411530997, 'alpha_loss': -47.0387558027499, 'alpha': 2.705424409381227, 'critic_loss': 7720.692274148753, 'actor_loss': 10.48558221938293, 'time_step': 0.06025452214169365, 'td_error': 1.4695299171741492, 'init_value': -12.444409370422363, 'ave_value': -12.441080614581047} step=9342
2022-04-21 23:26.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:26.22 [info     ] CQL_20220421231647: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.00037097172930061474, 'time_algorithm_update': 0.05977214967584334, 'temp_loss': 4.134087058161035, 'temp': 0.8186009831166681, 'alpha_loss': -48.87763945077885, 'alpha': 2.8107244465392447, 'critic_loss': 7903.338700663837, 'actor_loss': 10.982837632901406, 'time_step': 0.060232716488700384, 'td_error': 1.482606536048368, 'init_value': -12.840888977050781, 'ave_value': -12.839563285462265} step=9688
2022-04-21 23:26.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:26.43 [info     ] CQL_20220421231647: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.00036491548394881234, 'time_algorithm_update': 0.058772678320118454, 'temp_loss': 4.104334967673858, 'temp': 0.8127065500772068, 'alpha_loss': -50.77774469011781, 'alpha': 2.920140886582391, 'critic_loss': 8110.532381796424, 'actor_loss': 11.599855684820627, 'time_step': 0.0592313109105722, 'td_error': 1.5060323949066619, 'init_value': -13.650197982788086, 'ave_value': -13.641412719882117} step=10034
2022-04-21 23:26.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:27.05 [info     ] CQL_20220421231647: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.00036492168558815313, 'time_algorithm_update': 0.05922135865757231, 'temp_loss': 4.074765300475104, 'temp': 0.8068535886058917, 'alpha_loss': -52.7509751182071, 'alpha': 3.0338082465133227, 'critic_loss': 8778.613346166005, 'actor_loss': 12.221275197288204, 'time_step': 0.05968122123982865, 'td_error': 1.520176162676947, 'init_value': -13.960660934448242, 'ave_value': -13.962975576987702} step=10380
2022-04-21 23:27.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:27.26 [info     ] CQL_20220421231647: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.00036763593640630644, 'time_algorithm_update': 0.05907434948606987, 'temp_loss': 4.045963933702149, 'temp': 0.8010429584911104, 'alpha_loss': -54.80883974284795, 'alpha': 3.1518844090445195, 'critic_loss': 9329.60249390354, 'actor_loss': 12.814832888586674, 'time_step': 0.05953680848799689, 'td_error': 1.5410060365548746, 'init_value': -14.595341682434082, 'ave_value': -14.595712269239936} step=10726
2022-04-21 23:27.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:27.48 [info     ] CQL_20220421231647: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.00037042943039381437, 'time_algorithm_update': 0.05936797368044109, 'temp_loss': 4.018003327998123, 'temp': 0.7952728216358692, 'alpha_loss': -56.9376765565376, 'alpha': 3.274573494933244, 'critic_loss': 9713.46580902276, 'actor_loss': 13.32696169373617, 'time_step': 0.0598365378517636, 'td_error': 1.5618304581787823, 'init_value': -15.23166561126709, 'ave_value': -15.226296498030495} step=11072
2022-04-21 23:27.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:28.09 [info     ] CQL_20220421231647: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.0003669668484285388, 'time_algorithm_update': 0.05822009296086482, 'temp_loss': 3.9867321266604296, 'temp': 0.7895457203677624, 'alpha_loss': -59.16096969560392, 'alpha': 3.402039938579405, 'critic_loss': 9765.00342361362, 'actor_loss': 13.829710502845014, 'time_step': 0.05867957655405034, 'td_error': 1.5800533991517798, 'init_value': -15.699835777282715, 'ave_value': -15.698686157667902} step=11418
2022-04-21 23:28.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:28.30 [info     ] CQL_20220421231647: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.0003663074074453012, 'time_algorithm_update': 0.05840901418917441, 'temp_loss': 3.960069743195021, 'temp': 0.7838596303683485, 'alpha_loss': -61.46078597052249, 'alpha': 3.534461716006946, 'critic_loss': 10174.568565412754, 'actor_loss': 14.376210331227737, 'time_step': 0.05885993883099859, 'td_error': 1.5976166467975974, 'init_value': -16.161033630371094, 'ave_value': -16.160262118792183} step=11764
2022-04-21 23:28.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:28.52 [info     ] CQL_20220421231647: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.0003674216353135302, 'time_algorithm_update': 0.05935804830121167, 'temp_loss': 3.931208413460351, 'temp': 0.7782136200135843, 'alpha_loss': -63.85873918037194, 'alpha': 3.6720559300714837, 'critic_loss': 10509.346010770412, 'actor_loss': 14.884607535566209, 'time_step': 0.059811250322816, 'td_error': 1.617188781384762, 'init_value': -16.678340911865234, 'ave_value': -16.6768268427842} step=12110
2022-04-21 23:28.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:29.13 [info     ] CQL_20220421231647: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.00036032075826832326, 'time_algorithm_update': 0.05903010423472851, 'temp_loss': 3.9018530060101106, 'temp': 0.7726097365335233, 'alpha_loss': -66.34677397446825, 'alpha': 3.814991156489863, 'critic_loss': 10604.879806606756, 'actor_loss': 15.377498466844504, 'time_step': 0.059480769785842456, 'td_error': 1.6361383609391165, 'init_value': -17.128393173217773, 'ave_value': -17.130156409316516} step=12456
2022-04-21 23:29.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:29.35 [info     ] CQL_20220421231647: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.0003606515123664988, 'time_algorithm_update': 0.059155781834111736, 'temp_loss': 3.873949027474905, 'temp': 0.7670466724158711, 'alpha_loss': -68.91881611994926, 'alpha': 3.9634983456892776, 'critic_loss': 11112.314057984104, 'actor_loss': 15.975275373183234, 'time_step': 0.059602730536047435, 'td_error': 1.6623692743808414, 'init_value': -17.80327033996582, 'ave_value': -17.802176670058184} step=12802
2022-04-21 23:29.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:29.56 [info     ] CQL_20220421231647: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.0003522138375078323, 'time_algorithm_update': 0.05897582404186271, 'temp_loss': 3.8451005702762933, 'temp': 0.7615234884568033, 'alpha_loss': -71.61034393310547, 'alpha': 4.117771403637924, 'critic_loss': 11663.376270095738, 'actor_loss': 16.588428320912268, 'time_step': 0.05941868103997556, 'td_error': 1.6834518726879462, 'init_value': -18.259536743164062, 'ave_value': -18.264278461063796} step=13148
2022-04-21 23:29.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:30.18 [info     ] CQL_20220421231647: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.0003625285418736452, 'time_algorithm_update': 0.05907118940629022, 'temp_loss': 3.818926831890393, 'temp': 0.7560395669041341, 'alpha_loss': -74.39773127384957, 'alpha': 4.27808091268374, 'critic_loss': 10598.512669910586, 'actor_loss': 16.88408286585284, 'time_step': 0.059520143994017144, 'td_error': 1.6940022717425247, 'init_value': -18.512287139892578, 'ave_value': -18.515731639147596} step=13494
2022-04-21 23:30.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:30.40 [info     ] CQL_20220421231647: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.00036549085826543024, 'time_algorithm_update': 0.05924836266247523, 'temp_loss': 3.7907231357056284, 'temp': 0.7505943068879188, 'alpha_loss': -77.2855634523954, 'alpha': 4.444606115363237, 'critic_loss': 8812.581960689125, 'actor_loss': 17.216173045208, 'time_step': 0.05971182564090442, 'td_error': 1.7123963194942458, 'init_value': -18.936565399169922, 'ave_value': -18.940247607152006} step=13840
2022-04-21 23:30.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:31.01 [info     ] CQL_20220421231647: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.00036535648941304644, 'time_algorithm_update': 0.058919673021129104, 'temp_loss': 3.7639586994413694, 'temp': 0.7451888472703151, 'alpha_loss': -80.29647681616635, 'alpha': 4.617611891961511, 'critic_loss': 8229.86890495168, 'actor_loss': 17.775628393096042, 'time_step': 0.05938043828644504, 'td_error': 1.7338544745847178, 'init_value': -19.428329467773438, 'ave_value': -19.431934619051635} step=14186
2022-04-21 23:31.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:31.22 [info     ] CQL_20220421231647: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.00036437111782889833, 'time_algorithm_update': 0.059099610141247, 'temp_loss': 3.736916112761966, 'temp': 0.7398222817506405, 'alpha_loss': -83.42691767698078, 'alpha': 4.79736018043033, 'critic_loss': 7925.927529748465, 'actor_loss': 18.378258820903095, 'time_step': 0.05955793678415993, 'td_error': 1.7644945470094497, 'init_value': -20.146621704101562, 'ave_value': -20.145489453297195} step=14532
2022-04-21 23:31.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:31.44 [info     ] CQL_20220421231647: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.0003497207784928338, 'time_algorithm_update': 0.05914222987401003, 'temp_loss': 3.70961414665156, 'temp': 0.7344938118678297, 'alpha_loss': -86.6750894888288, 'alpha': 4.9841074447411335, 'critic_loss': 7505.014449455834, 'actor_loss': 18.963521510879428, 'time_step': 0.05958344550491068, 'td_error': 1.7891105094151831, 'init_value': -20.64621353149414, 'ave_value': -20.64873541392891} step=14878
2022-04-21 23:31.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:32.05 [info     ] CQL_20220421231647: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.0003563413730246483, 'time_algorithm_update': 0.05781314552174827, 'temp_loss': 3.6830590956472937, 'temp': 0.7292047439283029, 'alpha_loss': -90.04739684176583, 'alpha': 5.178120606207434, 'critic_loss': 7101.776155504877, 'actor_loss': 19.572334471465535, 'time_step': 0.05826202775701622, 'td_error': 1.8208569962789296, 'init_value': -21.33884620666504, 'ave_value': -21.338405443899664} step=15224
2022-04-21 23:32.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:32.26 [info     ] CQL_20220421231647: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.000351701857726698, 'time_algorithm_update': 0.05710803566640512, 'temp_loss': 3.6568397600526756, 'temp': 0.723953188671542, 'alpha_loss': -93.5432477962075, 'alpha': 5.379692332593002, 'critic_loss': 6725.074265320177, 'actor_loss': 20.202812073547715, 'time_step': 0.0575544544727127, 'td_error': 1.8478977721437857, 'init_value': -21.858797073364258, 'ave_value': -21.863141774882518} step=15570
2022-04-21 23:32.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:32.47 [info     ] CQL_20220421231647: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.0003515564637377083, 'time_algorithm_update': 0.05700052542493522, 'temp_loss': 3.6301828464331654, 'temp': 0.7187390134513723, 'alpha_loss': -97.18655624830654, 'alpha': 5.589094226759983, 'critic_loss': 6356.213068438403, 'actor_loss': 20.846499790346, 'time_step': 0.05744497004271932, 'td_error': 1.884296297458258, 'init_value': -22.63932228088379, 'ave_value': -22.636607830215823} step=15916
2022-04-21 23:32.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:33.08 [info     ] CQL_20220421231647: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.00036527104460435105, 'time_algorithm_update': 0.057603195912576136, 'temp_loss': 3.6038670091959784, 'temp': 0.7135632167317275, 'alpha_loss': -100.95048911034027, 'alpha': 5.806636197029511, 'critic_loss': 5988.916462980943, 'actor_loss': 21.495938339674403, 'time_step': 0.05806410243745484, 'td_error': 1.9148349680875962, 'init_value': -23.213382720947266, 'ave_value': -23.2129292645009} step=16262
2022-04-21 23:33.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:33.29 [info     ] CQL_20220421231647: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.00035758170089280673, 'time_algorithm_update': 0.05781746117365843, 'temp_loss': 3.5785267401292833, 'temp': 0.7084239535249037, 'alpha_loss': -104.90058607862174, 'alpha': 6.032668801401392, 'critic_loss': 5644.1968196802745, 'actor_loss': 22.129125490353974, 'time_step': 0.05826728054553787, 'td_error': 1.9481326146461428, 'init_value': -23.841564178466797, 'ave_value': -23.842043180723447} step=16608
2022-04-21 23:33.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:33.50 [info     ] CQL_20220421231647: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.00035940015936173454, 'time_algorithm_update': 0.05785485568074133, 'temp_loss': 3.5515107612389363, 'temp': 0.7033228619250259, 'alpha_loss': -108.99641729365884, 'alpha': 6.267519451979267, 'critic_loss': 5325.70778907379, 'actor_loss': 22.771975991353823, 'time_step': 0.058306494200160736, 'td_error': 1.9816021791993235, 'init_value': -24.460906982421875, 'ave_value': -24.461830876324626} step=16954
2022-04-21 23:33.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:34.11 [info     ] CQL_20220421231647: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.0003643518238398381, 'time_algorithm_update': 0.057783227435426215, 'temp_loss': 3.527018467814936, 'temp': 0.6982580478480785, 'alpha_loss': -113.21107392504037, 'alpha': 6.511502059208865, 'critic_loss': 5036.4281520953755, 'actor_loss': 23.43388028227525, 'time_step': 0.05823787030457072, 'td_error': 2.018946945015519, 'init_value': -25.143884658813477, 'ave_value': -25.14364053473873} step=17300
2022-04-21 23:34.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421231647/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.5191

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-21 23:34.13 [info     ] FQE_20220421233411: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.000161456774516278, 'time_algorithm_update': 0.00893197863934988, 'loss': 0.006427926753355886, 'time_step': 0.009166227765830166, 'init_value': -0.19482526183128357, 'ave_value': -0.1858187519970375, 'soft_opc': nan} step=166




2022-04-21 23:34.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:34.14 [info     ] FQE_20220421233411: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00016911202166453902, 'time_algorithm_update': 0.008704020316342273, 'loss': 0.00410439325103828, 'time_step': 0.008945318589727563, 'init_value': -0.2382189780473709, 'ave_value': -0.2033196042409284, 'soft_opc': nan} step=332




2022-04-21 23:34.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:34.16 [info     ] FQE_20220421233411: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00016622658235480986, 'time_algorithm_update': 0.009268779352486852, 'loss': 0.0032773659072244114, 'time_step': 0.009505905300737864, 'init_value': -0.2537751793861389, 'ave_value': -0.21588236925502619, 'soft_opc': nan} step=498




2022-04-21 23:34.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:34.18 [info     ] FQE_20220421233411: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00016875295753938607, 'time_algorithm_update': 0.008839286953569895, 'loss': 0.0027657652719799116, 'time_step': 0.00907975363444133, 'init_value': -0.2905915677547455, 'ave_value': -0.24485832464184848, 'soft_opc': nan} step=664




2022-04-21 23:34.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:34.19 [info     ] FQE_20220421233411: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.0001645432897360928, 'time_algorithm_update': 0.009149304355483457, 'loss': 0.0023768156583151632, 'time_step': 0.00938748738851892, 'init_value': -0.3009938597679138, 'ave_value': -0.2599540665544368, 'soft_opc': nan} step=830




2022-04-21 23:34.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:34.21 [info     ] FQE_20220421233411: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00016633573784885635, 'time_algorithm_update': 0.009163939809224692, 'loss': 0.002072911187781986, 'time_step': 0.009399530399276549, 'init_value': -0.3200303614139557, 'ave_value': -0.27747550190485143, 'soft_opc': nan} step=996




2022-04-21 23:34.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:34.23 [info     ] FQE_20220421233411: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00016466824405164603, 'time_algorithm_update': 0.00912003632051399, 'loss': 0.001898031976320837, 'time_step': 0.009356870708695376, 'init_value': -0.3416910171508789, 'ave_value': -0.30796699725628435, 'soft_opc': nan} step=1162




2022-04-21 23:34.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:34.24 [info     ] FQE_20220421233411: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00016888078436794052, 'time_algorithm_update': 0.008887109986270767, 'loss': 0.0016503347857456923, 'time_step': 0.009127850992133818, 'init_value': -0.3617456555366516, 'ave_value': -0.3356575581001806, 'soft_opc': nan} step=1328




2022-04-21 23:34.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:34.26 [info     ] FQE_20220421233411: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.0001630941069269755, 'time_algorithm_update': 0.009150528046021978, 'loss': 0.0014540911191044915, 'time_step': 0.009384353476834584, 'init_value': -0.3612491488456726, 'ave_value': -0.33405625499650704, 'soft_opc': nan} step=1494




2022-04-21 23:34.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:34.28 [info     ] FQE_20220421233411: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00015706182962440583, 'time_algorithm_update': 0.00900930525308632, 'loss': 0.0014914081308146354, 'time_step': 0.009230921067387224, 'init_value': -0.38210171461105347, 'ave_value': -0.3586106996899387, 'soft_opc': nan} step=1660




2022-04-21 23:34.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:34.29 [info     ] FQE_20220421233411: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.0001599501414471362, 'time_algorithm_update': 0.008535846170172634, 'loss': 0.0014310573967221383, 'time_step': 0.008764687790928117, 'init_value': -0.41995295882225037, 'ave_value': -0.4002057696323473, 'soft_opc': nan} step=1826




2022-04-21 23:34.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:34.31 [info     ] FQE_20220421233411: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00016552425292601068, 'time_algorithm_update': 0.009206082447465644, 'loss': 0.0014716231121109373, 'time_step': 0.009446819144559193, 'init_value': -0.43894147872924805, 'ave_value': -0.41064999412392844, 'soft_opc': nan} step=1992




2022-04-21 23:34.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:34.33 [info     ] FQE_20220421233411: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00016588188079466303, 'time_algorithm_update': 0.009218034974063736, 'loss': 0.0014973997782932662, 'time_step': 0.009454870798501623, 'init_value': -0.4954519271850586, 'ave_value': -0.46508501508516503, 'soft_opc': nan} step=2158




2022-04-21 23:34.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:34.34 [info     ] FQE_20220421233411: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00016345029853912722, 'time_algorithm_update': 0.009169581424759096, 'loss': 0.001549635525703071, 'time_step': 0.009402257850371212, 'init_value': -0.5369381904602051, 'ave_value': -0.4866336583101132, 'soft_opc': nan} step=2324




2022-04-21 23:34.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:34.36 [info     ] FQE_20220421233411: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.0001635364739291639, 'time_algorithm_update': 0.008967958300946707, 'loss': 0.001736462601013089, 'time_step': 0.009204146373702819, 'init_value': -0.5716646909713745, 'ave_value': -0.5180435400691118, 'soft_opc': nan} step=2490




2022-04-21 23:34.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:34.38 [info     ] FQE_20220421233411: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.0001640190561133695, 'time_algorithm_update': 0.009163704263158592, 'loss': 0.0018221640779499345, 'time_step': 0.009397996477333897, 'init_value': -0.6607599258422852, 'ave_value': -0.588671284722718, 'soft_opc': nan} step=2656




2022-04-21 23:34.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:34.39 [info     ] FQE_20220421233411: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001658545919211514, 'time_algorithm_update': 0.009211950991527143, 'loss': 0.0018592162735561043, 'time_step': 0.009452064353299428, 'init_value': -0.7034651041030884, 'ave_value': -0.6139110652998359, 'soft_opc': nan} step=2822




2022-04-21 23:34.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:34.41 [info     ] FQE_20220421233411: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00016502730817679898, 'time_algorithm_update': 0.00884101333388363, 'loss': 0.0020788660588890247, 'time_step': 0.00907806746930961, 'init_value': -0.7428296804428101, 'ave_value': -0.6493978808759837, 'soft_opc': nan} step=2988




2022-04-21 23:34.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:34.43 [info     ] FQE_20220421233411: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00016484921237072312, 'time_algorithm_update': 0.008706493550036326, 'loss': 0.0021859814441995695, 'time_step': 0.008943109627229622, 'init_value': -0.7780250310897827, 'ave_value': -0.6849716048561842, 'soft_opc': nan} step=3154




2022-04-21 23:34.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:34.44 [info     ] FQE_20220421233411: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.0001612987863012107, 'time_algorithm_update': 0.009067727858761707, 'loss': 0.002281288307087204, 'time_step': 0.009299867124442595, 'init_value': -0.8400206565856934, 'ave_value': -0.7233872392424592, 'soft_opc': nan} step=3320




2022-04-21 23:34.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:34.46 [info     ] FQE_20220421233411: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.0001619206853659756, 'time_algorithm_update': 0.009123858199062118, 'loss': 0.0023930680448734985, 'time_step': 0.009356294769838631, 'init_value': -0.8541417121887207, 'ave_value': -0.7368023151145862, 'soft_opc': nan} step=3486




2022-04-21 23:34.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:34.48 [info     ] FQE_20220421233411: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00016401761985686888, 'time_algorithm_update': 0.008812641522970545, 'loss': 0.002632259236349225, 'time_step': 0.009047529783593604, 'init_value': -0.9013146758079529, 'ave_value': -0.769141244099618, 'soft_opc': nan} step=3652




2022-04-21 23:34.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:34.49 [info     ] FQE_20220421233411: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00016336699566209172, 'time_algorithm_update': 0.009211281695997858, 'loss': 0.002722094513101402, 'time_step': 0.009448067251458225, 'init_value': -0.9553216695785522, 'ave_value': -0.8067863673986951, 'soft_opc': nan} step=3818




2022-04-21 23:34.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:34.51 [info     ] FQE_20220421233411: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00016390128308031932, 'time_algorithm_update': 0.009185341467340308, 'loss': 0.0028980584949793585, 'time_step': 0.009428417826273355, 'init_value': -0.9883476495742798, 'ave_value': -0.8332554611335466, 'soft_opc': nan} step=3984




2022-04-21 23:34.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:34.53 [info     ] FQE_20220421233411: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00016101153500108835, 'time_algorithm_update': 0.008825154189603874, 'loss': 0.003231880250134427, 'time_step': 0.009056308183325342, 'init_value': -1.048675298690796, 'ave_value': -0.8797753772220096, 'soft_opc': nan} step=4150




2022-04-21 23:34.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:34.54 [info     ] FQE_20220421233411: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00016557739441653332, 'time_algorithm_update': 0.009114963462553829, 'loss': 0.0032232969980728983, 'time_step': 0.009352754397564623, 'init_value': -1.1155924797058105, 'ave_value': -0.9305646044445467, 'soft_opc': nan} step=4316




2022-04-21 23:34.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:34.56 [info     ] FQE_20220421233411: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00016460935753512094, 'time_algorithm_update': 0.00874190158154591, 'loss': 0.003262502855129669, 'time_step': 0.008984337370079684, 'init_value': -1.1210105419158936, 'ave_value': -0.9237193458856227, 'soft_opc': nan} step=4482




2022-04-21 23:34.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:34.58 [info     ] FQE_20220421233411: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001689999936574913, 'time_algorithm_update': 0.009167911058448884, 'loss': 0.0035117994856295534, 'time_step': 0.009410896933222392, 'init_value': -1.1935837268829346, 'ave_value': -0.9787112057779555, 'soft_opc': nan} step=4648




2022-04-21 23:34.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:34.59 [info     ] FQE_20220421233411: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00016005642442818148, 'time_algorithm_update': 0.008917034390461013, 'loss': 0.003560061394713864, 'time_step': 0.009147147098219538, 'init_value': -1.209691047668457, 'ave_value': -0.977701079744745, 'soft_opc': nan} step=4814




2022-04-21 23:34.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:35.01 [info     ] FQE_20220421233411: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.0001630739993359669, 'time_algorithm_update': 0.009223504238818065, 'loss': 0.003836696938203412, 'time_step': 0.009461233414799333, 'init_value': -1.2623467445373535, 'ave_value': -1.0188409130676306, 'soft_opc': nan} step=4980




2022-04-21 23:35.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:35.03 [info     ] FQE_20220421233411: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.0001611752682421581, 'time_algorithm_update': 0.009096228932759848, 'loss': 0.004031980177036947, 'time_step': 0.009328014879341585, 'init_value': -1.3522437810897827, 'ave_value': -1.1030589421959343, 'soft_opc': nan} step=5146




2022-04-21 23:35.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:35.04 [info     ] FQE_20220421233411: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00015152506081454726, 'time_algorithm_update': 0.008772727954818541, 'loss': 0.004289126855037904, 'time_step': 0.008994113968079349, 'init_value': -1.3663980960845947, 'ave_value': -1.1046943954519324, 'soft_opc': nan} step=5312




2022-04-21 23:35.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:35.06 [info     ] FQE_20220421233411: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00015888013035418038, 'time_algorithm_update': 0.009008353015026414, 'loss': 0.0044470741445092055, 'time_step': 0.00923635873449854, 'init_value': -1.4483448266983032, 'ave_value': -1.1787174510391982, 'soft_opc': nan} step=5478




2022-04-21 23:35.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:35.08 [info     ] FQE_20220421233411: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.000160487301378365, 'time_algorithm_update': 0.009148143860230962, 'loss': 0.004779356166072392, 'time_step': 0.009379797671214643, 'init_value': -1.5149786472320557, 'ave_value': -1.248532825126103, 'soft_opc': nan} step=5644




2022-04-21 23:35.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:35.09 [info     ] FQE_20220421233411: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00017027395317353397, 'time_algorithm_update': 0.009155103959232927, 'loss': 0.004859313428056909, 'time_step': 0.009401413331548852, 'init_value': -1.503640055656433, 'ave_value': -1.219764833289895, 'soft_opc': nan} step=5810




2022-04-21 23:35.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:35.11 [info     ] FQE_20220421233411: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.0001640535262693842, 'time_algorithm_update': 0.008474011019051793, 'loss': 0.005110947951286644, 'time_step': 0.008712329060198313, 'init_value': -1.595469355583191, 'ave_value': -1.308256508010599, 'soft_opc': nan} step=5976




2022-04-21 23:35.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:35.13 [info     ] FQE_20220421233411: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00016835655074521718, 'time_algorithm_update': 0.009106334433498153, 'loss': 0.005542081195499232, 'time_step': 0.009350164827094021, 'init_value': -1.620341420173645, 'ave_value': -1.3193902786200244, 'soft_opc': nan} step=6142




2022-04-21 23:35.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:35.14 [info     ] FQE_20220421233411: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00016828761043318784, 'time_algorithm_update': 0.00917269379259592, 'loss': 0.005525046277157289, 'time_step': 0.009417380195066154, 'init_value': -1.6640079021453857, 'ave_value': -1.3520448910500351, 'soft_opc': nan} step=6308




2022-04-21 23:35.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:35.16 [info     ] FQE_20220421233411: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00016375622117375754, 'time_algorithm_update': 0.008873194097036338, 'loss': 0.00549385755663704, 'time_step': 0.00911344103066318, 'init_value': -1.6764142513275146, 'ave_value': -1.3541998313639212, 'soft_opc': nan} step=6474




2022-04-21 23:35.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:35.18 [info     ] FQE_20220421233411: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00016620072973779886, 'time_algorithm_update': 0.00906874616462064, 'loss': 0.0057228714205234885, 'time_step': 0.009304493306631065, 'init_value': -1.7514042854309082, 'ave_value': -1.4172894096371156, 'soft_opc': nan} step=6640




2022-04-21 23:35.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:35.19 [info     ] FQE_20220421233411: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.0001677375241934535, 'time_algorithm_update': 0.009218303554029349, 'loss': 0.00604363870337123, 'time_step': 0.009462218686758754, 'init_value': -1.7782707214355469, 'ave_value': -1.430659132137861, 'soft_opc': nan} step=6806




2022-04-21 23:35.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:35.21 [info     ] FQE_20220421233411: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00016541078866246235, 'time_algorithm_update': 0.009116291999816895, 'loss': 0.0061303791903865425, 'time_step': 0.00935297988983522, 'init_value': -1.7934370040893555, 'ave_value': -1.4301717172838277, 'soft_opc': nan} step=6972




2022-04-21 23:35.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:35.23 [info     ] FQE_20220421233411: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00016803195677607893, 'time_algorithm_update': 0.008903158716408604, 'loss': 0.006402322426732877, 'time_step': 0.00914656397808029, 'init_value': -1.8322982788085938, 'ave_value': -1.4558685458391099, 'soft_opc': nan} step=7138




2022-04-21 23:35.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:35.24 [info     ] FQE_20220421233411: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016446429562855917, 'time_algorithm_update': 0.00816070315349533, 'loss': 0.006498607763030331, 'time_step': 0.008401971265494105, 'init_value': -1.9082775115966797, 'ave_value': -1.5210789308623152, 'soft_opc': nan} step=7304




2022-04-21 23:35.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:35.26 [info     ] FQE_20220421233411: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016746319920183663, 'time_algorithm_update': 0.008158517171101397, 'loss': 0.006710641772496088, 'time_step': 0.00839982262576919, 'init_value': -1.9581226110458374, 'ave_value': -1.5492963249149085, 'soft_opc': nan} step=7470




2022-04-21 23:35.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:35.27 [info     ] FQE_20220421233411: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.0001667048557695136, 'time_algorithm_update': 0.008011241993272161, 'loss': 0.007055798911030898, 'time_step': 0.008250388754419533, 'init_value': -1.9990746974945068, 'ave_value': -1.5776568856828652, 'soft_opc': nan} step=7636




2022-04-21 23:35.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:35.29 [info     ] FQE_20220421233411: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.0001640707613473915, 'time_algorithm_update': 0.008258345615432924, 'loss': 0.007501539768453362, 'time_step': 0.008495431348501918, 'init_value': -2.079594850540161, 'ave_value': -1.631537839226626, 'soft_opc': nan} step=7802




2022-04-21 23:35.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:35.30 [info     ] FQE_20220421233411: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00016625817999782333, 'time_algorithm_update': 0.00825373666832246, 'loss': 0.007725642644098948, 'time_step': 0.008491412702813205, 'init_value': -2.1275393962860107, 'ave_value': -1.6501891854042943, 'soft_opc': nan} step=7968




2022-04-21 23:35.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:35.32 [info     ] FQE_20220421233411: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00016746750797133847, 'time_algorithm_update': 0.008266740534678999, 'loss': 0.007875934276338498, 'time_step': 0.008504629135131836, 'init_value': -2.1525721549987793, 'ave_value': -1.6641994904491815, 'soft_opc': nan} step=8134




2022-04-21 23:35.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-21 23:35.33 [info     ] FQE_20220421233411: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.0001663285565663533, 'time_algorithm_update': 0.007997914969202984, 'loss': 0.008074248065303798, 'time_step': 0.008233475397868329, 'init_value': -2.18495774269104, 'ave_value': -1.683037186575097, 'soft_opc': nan} step=8300




2022-04-21 23:35.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233411/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-21 23:35.33 [info     ] Directory is created at d3rlpy_logs/FQE_20220421233533
2022-04-21 23:35.33 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-21 23:35.33 [debug    ] Building models...
2022-04-21 23:35.33 [debug    ] Models have been built.
2022-04-21 23:35.33 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220421233533/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/355 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-21 23:35.37 [info     ] FQE_20220421233533: epoch=1 step=355 epoch=1 metrics={'time_sample_batch': 0.00016946188161070917, 'time_algorithm_update': 0.00829083885945065, 'loss': 0.02246497595079348, 'time_step': 0.008533661802050093, 'init_value': -1.1356685161590576, 'ave_value': -1.1273968961324778, 'soft_opc': nan} step=355




2022-04-21 23:35.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_355.pt


Epoch 2/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:35.40 [info     ] FQE_20220421233533: epoch=2 step=710 epoch=2 metrics={'time_sample_batch': 0.00016485805242833957, 'time_algorithm_update': 0.008058038899596309, 'loss': 0.020578584145807044, 'time_step': 0.008295435972616706, 'init_value': -2.3485991954803467, 'ave_value': -2.360783824773369, 'soft_opc': nan} step=710




2022-04-21 23:35.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_710.pt


Epoch 3/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:35.43 [info     ] FQE_20220421233533: epoch=3 step=1065 epoch=3 metrics={'time_sample_batch': 0.00016371565805354589, 'time_algorithm_update': 0.008129125917461557, 'loss': 0.022166225912285523, 'time_step': 0.00836724899184536, 'init_value': -2.967236280441284, 'ave_value': -3.012449713087757, 'soft_opc': nan} step=1065




2022-04-21 23:35.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_1065.pt


Epoch 4/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:35.46 [info     ] FQE_20220421233533: epoch=4 step=1420 epoch=4 metrics={'time_sample_batch': 0.000171486088927363, 'time_algorithm_update': 0.008244005391295528, 'loss': 0.027547371731152837, 'time_step': 0.00848765104589328, 'init_value': -4.0445332527160645, 'ave_value': -4.174751841024696, 'soft_opc': nan} step=1420




2022-04-21 23:35.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_1420.pt


Epoch 5/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:35.49 [info     ] FQE_20220421233533: epoch=5 step=1775 epoch=5 metrics={'time_sample_batch': 0.0001694088250818387, 'time_algorithm_update': 0.008044557840051786, 'loss': 0.03270365313272661, 'time_step': 0.008287299518853845, 'init_value': -4.529839992523193, 'ave_value': -4.767528535347034, 'soft_opc': nan} step=1775




2022-04-21 23:35.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_1775.pt


Epoch 6/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:35.53 [info     ] FQE_20220421233533: epoch=6 step=2130 epoch=6 metrics={'time_sample_batch': 0.00016999781971246424, 'time_algorithm_update': 0.008145111379489093, 'loss': 0.041419276230456964, 'time_step': 0.008389520645141601, 'init_value': -5.324107646942139, 'ave_value': -5.748244926214525, 'soft_opc': nan} step=2130




2022-04-21 23:35.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_2130.pt


Epoch 7/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:35.56 [info     ] FQE_20220421233533: epoch=7 step=2485 epoch=7 metrics={'time_sample_batch': 0.00016694001748528278, 'time_algorithm_update': 0.007974767684936523, 'loss': 0.049623458546546984, 'time_step': 0.008215983484832334, 'init_value': -5.769686222076416, 'ave_value': -6.386344974627059, 'soft_opc': nan} step=2485




2022-04-21 23:35.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_2485.pt


Epoch 8/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:35.59 [info     ] FQE_20220421233533: epoch=8 step=2840 epoch=8 metrics={'time_sample_batch': 0.00017285212664537028, 'time_algorithm_update': 0.008193021425059144, 'loss': 0.0628817347967079, 'time_step': 0.008442930436470139, 'init_value': -6.298365592956543, 'ave_value': -7.286807951672504, 'soft_opc': nan} step=2840




2022-04-21 23:35.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_2840.pt


Epoch 9/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:36.02 [info     ] FQE_20220421233533: epoch=9 step=3195 epoch=9 metrics={'time_sample_batch': 0.00017028056399922975, 'time_algorithm_update': 0.008139544473567479, 'loss': 0.07150546979211586, 'time_step': 0.008384793241259078, 'init_value': -6.6956987380981445, 'ave_value': -8.011585701294386, 'soft_opc': nan} step=3195




2022-04-21 23:36.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_3195.pt


Epoch 10/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:36.05 [info     ] FQE_20220421233533: epoch=10 step=3550 epoch=10 metrics={'time_sample_batch': 0.00016862573757977554, 'time_algorithm_update': 0.007900675249771333, 'loss': 0.08413139947388373, 'time_step': 0.008145075113001004, 'init_value': -7.0150837898254395, 'ave_value': -8.770298803236187, 'soft_opc': nan} step=3550




2022-04-21 23:36.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_3550.pt


Epoch 11/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:36.08 [info     ] FQE_20220421233533: epoch=11 step=3905 epoch=11 metrics={'time_sample_batch': 0.00016782451683366802, 'time_algorithm_update': 0.00816133526009573, 'loss': 0.09315470889420577, 'time_step': 0.00840446713944556, 'init_value': -7.835047245025635, 'ave_value': -9.9155474284134, 'soft_opc': nan} step=3905




2022-04-21 23:36.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_3905.pt


Epoch 12/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:36.11 [info     ] FQE_20220421233533: epoch=12 step=4260 epoch=12 metrics={'time_sample_batch': 0.00017002199737119002, 'time_algorithm_update': 0.008022390285008391, 'loss': 0.10453498020348415, 'time_step': 0.008263206481933594, 'init_value': -8.147137641906738, 'ave_value': -10.568533483343234, 'soft_opc': nan} step=4260




2022-04-21 23:36.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_4260.pt


Epoch 13/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:36.15 [info     ] FQE_20220421233533: epoch=13 step=4615 epoch=13 metrics={'time_sample_batch': 0.00016853641456281634, 'time_algorithm_update': 0.008186527037284744, 'loss': 0.11386909871558908, 'time_step': 0.008429808683798347, 'init_value': -8.910338401794434, 'ave_value': -11.650757314032722, 'soft_opc': nan} step=4615




2022-04-21 23:36.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_4615.pt


Epoch 14/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:36.18 [info     ] FQE_20220421233533: epoch=14 step=4970 epoch=14 metrics={'time_sample_batch': 0.0001714048251299791, 'time_algorithm_update': 0.00814846267162914, 'loss': 0.12042148964295925, 'time_step': 0.008393945828290053, 'init_value': -9.263198852539062, 'ave_value': -12.319164997208839, 'soft_opc': nan} step=4970




2022-04-21 23:36.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_4970.pt


Epoch 15/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:36.21 [info     ] FQE_20220421233533: epoch=15 step=5325 epoch=15 metrics={'time_sample_batch': 0.00016826508750378245, 'time_algorithm_update': 0.008006760771845428, 'loss': 0.12939571760804722, 'time_step': 0.008252347355157556, 'init_value': -9.765259742736816, 'ave_value': -13.190998184726965, 'soft_opc': nan} step=5325




2022-04-21 23:36.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_5325.pt


Epoch 16/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:36.24 [info     ] FQE_20220421233533: epoch=16 step=5680 epoch=16 metrics={'time_sample_batch': 0.00016810726112043354, 'time_algorithm_update': 0.008222741140446193, 'loss': 0.1414884090423584, 'time_step': 0.008464472058793188, 'init_value': -10.4826021194458, 'ave_value': -14.182431266175888, 'soft_opc': nan} step=5680




2022-04-21 23:36.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_5680.pt


Epoch 17/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:36.27 [info     ] FQE_20220421233533: epoch=17 step=6035 epoch=17 metrics={'time_sample_batch': 0.00016893803233831702, 'time_algorithm_update': 0.008050353762129663, 'loss': 0.14888040140061312, 'time_step': 0.00829044865890288, 'init_value': -11.09360408782959, 'ave_value': -15.100131965021061, 'soft_opc': nan} step=6035




2022-04-21 23:36.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_6035.pt


Epoch 18/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:36.30 [info     ] FQE_20220421233533: epoch=18 step=6390 epoch=18 metrics={'time_sample_batch': 0.00017299383458956865, 'time_algorithm_update': 0.008211549570862677, 'loss': 0.15301432425917033, 'time_step': 0.008458793667000784, 'init_value': -11.518418312072754, 'ave_value': -15.80990953973208, 'soft_opc': nan} step=6390




2022-04-21 23:36.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_6390.pt


Epoch 19/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:36.33 [info     ] FQE_20220421233533: epoch=19 step=6745 epoch=19 metrics={'time_sample_batch': 0.0001670232960875605, 'time_algorithm_update': 0.007442580478292116, 'loss': 0.16343473532355168, 'time_step': 0.007683602185316489, 'init_value': -12.176538467407227, 'ave_value': -16.820880798759607, 'soft_opc': nan} step=6745




2022-04-21 23:36.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_6745.pt


Epoch 20/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:36.37 [info     ] FQE_20220421233533: epoch=20 step=7100 epoch=20 metrics={'time_sample_batch': 0.00016743968909894917, 'time_algorithm_update': 0.008263852562702878, 'loss': 0.1711089226036844, 'time_step': 0.008503891044939068, 'init_value': -12.465119361877441, 'ave_value': -17.35801300603604, 'soft_opc': nan} step=7100




2022-04-21 23:36.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_7100.pt


Epoch 21/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:36.40 [info     ] FQE_20220421233533: epoch=21 step=7455 epoch=21 metrics={'time_sample_batch': 0.00017120535944549131, 'time_algorithm_update': 0.008451160914461377, 'loss': 0.18042212580920944, 'time_step': 0.008699207574548855, 'init_value': -12.847073554992676, 'ave_value': -18.13555947939555, 'soft_opc': nan} step=7455




2022-04-21 23:36.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_7455.pt


Epoch 22/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:36.43 [info     ] FQE_20220421233533: epoch=22 step=7810 epoch=22 metrics={'time_sample_batch': 0.00016955321943256218, 'time_algorithm_update': 0.008330708490291112, 'loss': 0.1853926087926391, 'time_step': 0.008574894784202038, 'init_value': -13.275808334350586, 'ave_value': -18.906772487166613, 'soft_opc': nan} step=7810




2022-04-21 23:36.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_7810.pt


Epoch 23/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:36.46 [info     ] FQE_20220421233533: epoch=23 step=8165 epoch=23 metrics={'time_sample_batch': 0.0001689648964035679, 'time_algorithm_update': 0.008537270317614918, 'loss': 0.18992871294244074, 'time_step': 0.008781273264280508, 'init_value': -13.375494956970215, 'ave_value': -19.44440456504527, 'soft_opc': nan} step=8165




2022-04-21 23:36.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_8165.pt


Epoch 24/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:36.50 [info     ] FQE_20220421233533: epoch=24 step=8520 epoch=24 metrics={'time_sample_batch': 0.0001673832745619223, 'time_algorithm_update': 0.008196781722592635, 'loss': 0.195563349757396, 'time_step': 0.00843787260458503, 'init_value': -13.64172649383545, 'ave_value': -19.969637461886904, 'soft_opc': nan} step=8520




2022-04-21 23:36.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_8520.pt


Epoch 25/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:36.53 [info     ] FQE_20220421233533: epoch=25 step=8875 epoch=25 metrics={'time_sample_batch': 0.00016961567838427047, 'time_algorithm_update': 0.00851778849749498, 'loss': 0.20238669006757334, 'time_step': 0.00876320650879766, 'init_value': -13.80311393737793, 'ave_value': -20.456144929453828, 'soft_opc': nan} step=8875




2022-04-21 23:36.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_8875.pt


Epoch 26/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:36.56 [info     ] FQE_20220421233533: epoch=26 step=9230 epoch=26 metrics={'time_sample_batch': 0.00017468358429384903, 'time_algorithm_update': 0.008245054433043573, 'loss': 0.2067610714951871, 'time_step': 0.008494959414844782, 'init_value': -13.892045974731445, 'ave_value': -20.95904646410752, 'soft_opc': nan} step=9230




2022-04-21 23:36.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_9230.pt


Epoch 27/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:36.59 [info     ] FQE_20220421233533: epoch=27 step=9585 epoch=27 metrics={'time_sample_batch': 0.000167419541050011, 'time_algorithm_update': 0.008533953948759696, 'loss': 0.20780364322725317, 'time_step': 0.008776847409530424, 'init_value': -14.094154357910156, 'ave_value': -21.323512360365388, 'soft_opc': nan} step=9585




2022-04-21 23:36.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_9585.pt


Epoch 28/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:37.03 [info     ] FQE_20220421233533: epoch=28 step=9940 epoch=28 metrics={'time_sample_batch': 0.0001677815343292666, 'time_algorithm_update': 0.00823790389047542, 'loss': 0.21777215129367902, 'time_step': 0.008480767129172742, 'init_value': -14.578691482543945, 'ave_value': -22.000276227960867, 'soft_opc': nan} step=9940




2022-04-21 23:37.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_9940.pt


Epoch 29/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:37.06 [info     ] FQE_20220421233533: epoch=29 step=10295 epoch=29 metrics={'time_sample_batch': 0.00015218022843481788, 'time_algorithm_update': 0.008461523056030273, 'loss': 0.2186983722590015, 'time_step': 0.008678514856687735, 'init_value': -14.701422691345215, 'ave_value': -22.26609075189097, 'soft_opc': nan} step=10295




2022-04-21 23:37.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_10295.pt


Epoch 30/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:37.09 [info     ] FQE_20220421233533: epoch=30 step=10650 epoch=30 metrics={'time_sample_batch': 0.00015004587845063547, 'time_algorithm_update': 0.008055512334259463, 'loss': 0.23188931000169735, 'time_step': 0.008269332160412425, 'init_value': -15.075579643249512, 'ave_value': -22.770727201594347, 'soft_opc': nan} step=10650




2022-04-21 23:37.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_10650.pt


Epoch 31/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:37.12 [info     ] FQE_20220421233533: epoch=31 step=11005 epoch=31 metrics={'time_sample_batch': 0.00016314882627675232, 'time_algorithm_update': 0.00846640559988962, 'loss': 0.2358396447198072, 'time_step': 0.008700113565149442, 'init_value': -15.179047584533691, 'ave_value': -23.04997258462501, 'soft_opc': nan} step=11005




2022-04-21 23:37.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_11005.pt


Epoch 32/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:37.16 [info     ] FQE_20220421233533: epoch=32 step=11360 epoch=32 metrics={'time_sample_batch': 0.00016862305117325044, 'time_algorithm_update': 0.008232445784018074, 'loss': 0.23871837475822424, 'time_step': 0.008479096855915767, 'init_value': -15.031608581542969, 'ave_value': -23.053894100655615, 'soft_opc': nan} step=11360




2022-04-21 23:37.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_11360.pt


Epoch 33/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:37.19 [info     ] FQE_20220421233533: epoch=33 step=11715 epoch=33 metrics={'time_sample_batch': 0.00017022549266546545, 'time_algorithm_update': 0.008585340875974843, 'loss': 0.24896870594419224, 'time_step': 0.008829308899355607, 'init_value': -15.583253860473633, 'ave_value': -23.442098589569444, 'soft_opc': nan} step=11715




2022-04-21 23:37.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_11715.pt


Epoch 34/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:37.22 [info     ] FQE_20220421233533: epoch=34 step=12070 epoch=34 metrics={'time_sample_batch': 0.0001682449394548443, 'time_algorithm_update': 0.008300916913529516, 'loss': 0.25572213458133414, 'time_step': 0.008544604879030039, 'init_value': -15.77576732635498, 'ave_value': -23.725112431389945, 'soft_opc': nan} step=12070




2022-04-21 23:37.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_12070.pt


Epoch 35/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:37.25 [info     ] FQE_20220421233533: epoch=35 step=12425 epoch=35 metrics={'time_sample_batch': 0.0001677311642069212, 'time_algorithm_update': 0.008512897222814425, 'loss': 0.2604397705165853, 'time_step': 0.008753686555674378, 'init_value': -16.140275955200195, 'ave_value': -24.1497682795408, 'soft_opc': nan} step=12425




2022-04-21 23:37.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_12425.pt


Epoch 36/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:37.29 [info     ] FQE_20220421233533: epoch=36 step=12780 epoch=36 metrics={'time_sample_batch': 0.00017213351289990922, 'time_algorithm_update': 0.00843434602441922, 'loss': 0.27659684842268767, 'time_step': 0.008680281168977979, 'init_value': -16.418485641479492, 'ave_value': -24.35537961452931, 'soft_opc': nan} step=12780




2022-04-21 23:37.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_12780.pt


Epoch 37/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:37.32 [info     ] FQE_20220421233533: epoch=37 step=13135 epoch=37 metrics={'time_sample_batch': 0.0001691301104048608, 'time_algorithm_update': 0.008368877625801194, 'loss': 0.28839956971121505, 'time_step': 0.008612585067749024, 'init_value': -16.76735496520996, 'ave_value': -24.756230410154874, 'soft_opc': nan} step=13135




2022-04-21 23:37.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_13135.pt


Epoch 38/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:37.35 [info     ] FQE_20220421233533: epoch=38 step=13490 epoch=38 metrics={'time_sample_batch': 0.00016821471738143705, 'time_algorithm_update': 0.008516189414010921, 'loss': 0.29331738842834887, 'time_step': 0.008760299816937513, 'init_value': -16.96929359436035, 'ave_value': -25.05995588946987, 'soft_opc': nan} step=13490




2022-04-21 23:37.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_13490.pt


Epoch 39/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:37.39 [info     ] FQE_20220421233533: epoch=39 step=13845 epoch=39 metrics={'time_sample_batch': 0.00016854380218076035, 'time_algorithm_update': 0.008226661950769558, 'loss': 0.305886932362763, 'time_step': 0.008469785099298181, 'init_value': -17.067670822143555, 'ave_value': -25.275253181752092, 'soft_opc': nan} step=13845




2022-04-21 23:37.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_13845.pt


Epoch 40/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:37.42 [info     ] FQE_20220421233533: epoch=40 step=14200 epoch=40 metrics={'time_sample_batch': 0.00016833291926854094, 'time_algorithm_update': 0.008501901760907241, 'loss': 0.321898646442823, 'time_step': 0.008744726718311578, 'init_value': -17.519550323486328, 'ave_value': -25.586784763962147, 'soft_opc': nan} step=14200




2022-04-21 23:37.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_14200.pt


Epoch 41/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:37.45 [info     ] FQE_20220421233533: epoch=41 step=14555 epoch=41 metrics={'time_sample_batch': 0.00016954448861135563, 'time_algorithm_update': 0.008260160096934144, 'loss': 0.3262598728341326, 'time_step': 0.008503148925136513, 'init_value': -17.656484603881836, 'ave_value': -25.630502570948853, 'soft_opc': nan} step=14555




2022-04-21 23:37.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_14555.pt


Epoch 42/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:37.48 [info     ] FQE_20220421233533: epoch=42 step=14910 epoch=42 metrics={'time_sample_batch': 0.00016934502292686784, 'time_algorithm_update': 0.008515461397842622, 'loss': 0.33029068305196474, 'time_step': 0.008761610111720126, 'init_value': -17.662893295288086, 'ave_value': -25.763027785889122, 'soft_opc': nan} step=14910




2022-04-21 23:37.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_14910.pt


Epoch 43/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:37.52 [info     ] FQE_20220421233533: epoch=43 step=15265 epoch=43 metrics={'time_sample_batch': 0.00016621334452024647, 'time_algorithm_update': 0.008062282078702685, 'loss': 0.33994610926897173, 'time_step': 0.00830261808046153, 'init_value': -17.487518310546875, 'ave_value': -25.7386600351395, 'soft_opc': nan} step=15265




2022-04-21 23:37.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_15265.pt


Epoch 44/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:37.55 [info     ] FQE_20220421233533: epoch=44 step=15620 epoch=44 metrics={'time_sample_batch': 0.00016687554372868068, 'time_algorithm_update': 0.008449154840388768, 'loss': 0.3489351595371542, 'time_step': 0.008688453217627297, 'init_value': -17.710790634155273, 'ave_value': -25.920094684222796, 'soft_opc': nan} step=15620




2022-04-21 23:37.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_15620.pt


Epoch 45/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:37.58 [info     ] FQE_20220421233533: epoch=45 step=15975 epoch=45 metrics={'time_sample_batch': 0.00016516631757709343, 'time_algorithm_update': 0.008198286781848316, 'loss': 0.35966516533000786, 'time_step': 0.008438122440391863, 'init_value': -17.85515022277832, 'ave_value': -25.99254419843509, 'soft_opc': nan} step=15975




2022-04-21 23:37.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_15975.pt


Epoch 46/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:38.01 [info     ] FQE_20220421233533: epoch=46 step=16330 epoch=46 metrics={'time_sample_batch': 0.0001705458466435822, 'time_algorithm_update': 0.008573443453076859, 'loss': 0.3685442395141007, 'time_step': 0.008817395358018473, 'init_value': -17.9910888671875, 'ave_value': -26.16528308253491, 'soft_opc': nan} step=16330




2022-04-21 23:38.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_16330.pt


Epoch 47/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:38.05 [info     ] FQE_20220421233533: epoch=47 step=16685 epoch=47 metrics={'time_sample_batch': 0.00017084470936949825, 'time_algorithm_update': 0.008243916068278567, 'loss': 0.3852354282775605, 'time_step': 0.008488521441607408, 'init_value': -18.1917667388916, 'ave_value': -26.31218208812532, 'soft_opc': nan} step=16685




2022-04-21 23:38.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_16685.pt


Epoch 48/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:38.08 [info     ] FQE_20220421233533: epoch=48 step=17040 epoch=48 metrics={'time_sample_batch': 0.0001668654697042116, 'time_algorithm_update': 0.008480607287984498, 'loss': 0.39336616970651167, 'time_step': 0.008722081654508348, 'init_value': -17.9101505279541, 'ave_value': -26.339051447074233, 'soft_opc': nan} step=17040




2022-04-21 23:38.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_17040.pt


Epoch 49/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:38.11 [info     ] FQE_20220421233533: epoch=49 step=17395 epoch=49 metrics={'time_sample_batch': 0.00016914488564074879, 'time_algorithm_update': 0.008362739858492998, 'loss': 0.38834560037341337, 'time_step': 0.00860575152115083, 'init_value': -17.936532974243164, 'ave_value': -26.10583783098169, 'soft_opc': nan} step=17395




2022-04-21 23:38.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_17395.pt


Epoch 50/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-21 23:38.14 [info     ] FQE_20220421233533: epoch=50 step=17750 epoch=50 metrics={'time_sample_batch': 0.00016921473221040109, 'time_algorithm_update': 0.00843326877540266, 'loss': 0.4019943233763038, 'time_step': 0.008673774692374216, 'init_value': -18.540817260742188, 'ave_value': -26.670165085884594, 'soft_opc': nan} step=17750




2022-04-21 23:38.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421233533/model_17750.pt
search iteration:  5
using hyper params:  [0.0029606638451320244, 0.004498437280523096, 5.296004229703394e-05, 3]
2022-04-21 23:38.14 [debug    ] RoundIterator is selected.
2022-04-21 23:38.14 [info     ] Directory is created at d3rlpy_logs/CQL_20220421233814
2022-04-21 23:38.14 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-21 23:38.14 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-21 23:38.14 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220421233814/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.0029606638451320244, 'actor_optim_factory': {'opti

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:38.34 [info     ] CQL_20220421233814: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.00035231857630558784, 'time_algorithm_update': 0.054527294429051396, 'temp_loss': 4.860173532728515, 'temp': 0.9903920840665784, 'alpha_loss': -17.67286254353606, 'alpha': 1.017736463188436, 'critic_loss': 60.82972824228981, 'actor_loss': 1.2804083814682987, 'time_step': 0.054970235493830864, 'td_error': 1.2490767379881804, 'init_value': -4.299127101898193, 'ave_value': -3.857677393267439} step=346
2022-04-21 23:38.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:38.54 [info     ] CQL_20220421233814: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.0003502293129187788, 'time_algorithm_update': 0.054705066487968315, 'temp_loss': 4.907582123155539, 'temp': 0.972066694429155, 'alpha_loss': -18.359792025792117, 'alpha': 1.0542521535316645, 'critic_loss': 95.4713294409603, 'actor_loss': 4.021427503900032, 'time_step': 0.05514779049537085, 'td_error': 1.2715155675412684, 'init_value': -6.280937194824219, 'ave_value': -5.6137497484655725} step=692
2022-04-21 23:38.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:39.14 [info     ] CQL_20220421233814: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.000365820923292568, 'time_algorithm_update': 0.055156933778972295, 'temp_loss': 4.819049778701253, 'temp': 0.9545163520843307, 'alpha_loss': -19.038915005722487, 'alpha': 1.0925606916405561, 'critic_loss': 184.15966347049425, 'actor_loss': 5.985600780200407, 'time_step': 0.055617504037184524, 'td_error': 1.3022426353228016, 'init_value': -7.428341388702393, 'ave_value': -6.778774172991811} step=1038
2022-04-21 23:39.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:39.36 [info     ] CQL_20220421233814: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.0003693393200119107, 'time_algorithm_update': 0.05853919900221632, 'temp_loss': 4.73332615532627, 'temp': 0.9374705697759728, 'alpha_loss': -19.726048938111763, 'alpha': 1.1327561618964797, 'critic_loss': 327.23370083494683, 'actor_loss': 6.334282562222784, 'time_step': 0.059003770006874394, 'td_error': 1.2847704737888503, 'init_value': -7.051659107208252, 'ave_value': -6.617035389093496} step=1384
2022-04-21 23:39.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:39.57 [info     ] CQL_20220421233814: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.00036417955608037165, 'time_algorithm_update': 0.05853619258527811, 'temp_loss': 4.648922466818308, 'temp': 0.9208566961605424, 'alpha_loss': -20.440354242490205, 'alpha': 1.1748491139770243, 'critic_loss': 534.0902575895276, 'actor_loss': 4.657185085247018, 'time_step': 0.0589951407702672, 'td_error': 1.267329749636447, 'init_value': -5.126580715179443, 'ave_value': -4.927992609995358} step=1730
2022-04-21 23:39.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:40.18 [info     ] CQL_20220421233814: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.00036556321072440614, 'time_algorithm_update': 0.05859212585956375, 'temp_loss': 4.5681375106635125, 'temp': 0.904632377142162, 'alpha_loss': -21.196286598381967, 'alpha': 1.2188893280966433, 'critic_loss': 788.7945129747336, 'actor_loss': 3.022403250539923, 'time_step': 0.059051304883350524, 'td_error': 1.2698089022106192, 'init_value': -4.353285312652588, 'ave_value': -4.257529401507859} step=2076
2022-04-21 23:40.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:40.39 [info     ] CQL_20220421233814: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.00035297112657844677, 'time_algorithm_update': 0.0577934925266773, 'temp_loss': 4.487398986871532, 'temp': 0.8887659282009036, 'alpha_loss': -21.997529051896464, 'alpha': 1.2649402160175962, 'critic_loss': 1046.711029581941, 'actor_loss': 2.6115148618731197, 'time_step': 0.05823605253517283, 'td_error': 1.2745634304769855, 'init_value': -4.15170431137085, 'ave_value': -4.092832710631033} step=2422
2022-04-21 23:40.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:41.01 [info     ] CQL_20220421233814: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.00035722269488207867, 'time_algorithm_update': 0.05831368190015672, 'temp_loss': 4.408398617209727, 'temp': 0.8732359331811783, 'alpha_loss': -22.83358522646689, 'alpha': 1.3130454886166347, 'critic_loss': 1296.3124322615606, 'actor_loss': 2.5986099477448215, 'time_step': 0.05875932687968877, 'td_error': 1.278244123939726, 'init_value': -4.190249919891357, 'ave_value': -4.144900618983197} step=2768
2022-04-21 23:41.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:41.22 [info     ] CQL_20220421233814: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.00036026218723010466, 'time_algorithm_update': 0.05904171508171655, 'temp_loss': 4.333870157341048, 'temp': 0.858021530285047, 'alpha_loss': -23.705599266669655, 'alpha': 1.3632430644393656, 'critic_loss': 1542.6597501721685, 'actor_loss': 2.7134729616903845, 'time_step': 0.059490533233377975, 'td_error': 1.282490607756729, 'init_value': -4.4529500007629395, 'ave_value': -4.408086340435312} step=3114
2022-04-21 23:41.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:41.44 [info     ] CQL_20220421233814: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.0003561725506203712, 'time_algorithm_update': 0.059751844819570556, 'temp_loss': 4.257532258943327, 'temp': 0.8431031957182581, 'alpha_loss': -24.615146190444857, 'alpha': 1.4155745592420501, 'critic_loss': 1786.7514715470331, 'actor_loss': 2.8885443141694704, 'time_step': 0.06020017579800821, 'td_error': 1.2859566328125145, 'init_value': -4.558835029602051, 'ave_value': -4.526131010598173} step=3460
2022-04-21 23:41.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:42.06 [info     ] CQL_20220421233814: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.0003568278571773816, 'time_algorithm_update': 0.06022806663733686, 'temp_loss': 4.184490837802777, 'temp': 0.8284733925940674, 'alpha_loss': -25.563963950713934, 'alpha': 1.4700975704055301, 'critic_loss': 2027.3324740194862, 'actor_loss': 3.1218914303476413, 'time_step': 0.06067617986932655, 'td_error': 1.2897722171074182, 'init_value': -4.666558742523193, 'ave_value': -4.643112001740124} step=3806
2022-04-21 23:42.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:42.28 [info     ] CQL_20220421233814: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.00035910041346026294, 'time_algorithm_update': 0.059710980839811995, 'temp_loss': 4.11127144477271, 'temp': 0.8141197190119353, 'alpha_loss': -26.551630698187502, 'alpha': 1.5268636766196675, 'critic_loss': 2262.777908942603, 'actor_loss': 3.398406058377613, 'time_step': 0.06016108066360385, 'td_error': 1.2960967731875799, 'init_value': -5.081642150878906, 'ave_value': -5.059251880057902} step=4152
2022-04-21 23:42.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:42.49 [info     ] CQL_20220421233814: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.00036482245935870044, 'time_algorithm_update': 0.05997201128502112, 'temp_loss': 4.040574543048881, 'temp': 0.8000314342493267, 'alpha_loss': -27.580066713983612, 'alpha': 1.585940775154643, 'critic_loss': 2480.4331047631413, 'actor_loss': 3.7576454540208584, 'time_step': 0.06043066661481913, 'td_error': 1.301878475553704, 'init_value': -5.370345592498779, 'ave_value': -5.354345205080006} step=4498
2022-04-21 23:42.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:43.11 [info     ] CQL_20220421233814: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.0003595283265747776, 'time_algorithm_update': 0.059940958298699706, 'temp_loss': 3.9705787766186487, 'temp': 0.7861983557312475, 'alpha_loss': -28.65034622126232, 'alpha': 1.6473992184407449, 'critic_loss': 2673.235743880961, 'actor_loss': 4.167443178292644, 'time_step': 0.06039561770554912, 'td_error': 1.309782251732117, 'init_value': -5.861094951629639, 'ave_value': -5.8457621647657065} step=4844
2022-04-21 23:43.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:43.32 [info     ] CQL_20220421233814: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.0003598535681046502, 'time_algorithm_update': 0.05713586862376659, 'temp_loss': 3.902010216878329, 'temp': 0.772614972095269, 'alpha_loss': -29.75629938820194, 'alpha': 1.7113090953385899, 'critic_loss': 2823.6884892634575, 'actor_loss': 4.601136334369637, 'time_step': 0.057588217575426044, 'td_error': 1.3174468944948479, 'init_value': -6.1818342208862305, 'ave_value': -6.174754392642893} step=5190
2022-04-21 23:43.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:43.53 [info     ] CQL_20220421233814: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.00035137868340993895, 'time_algorithm_update': 0.056886572369261285, 'temp_loss': 3.8344736492013656, 'temp': 0.7592752612050558, 'alpha_loss': -30.91354469343417, 'alpha': 1.7777492004322868, 'critic_loss': 2984.927298308797, 'actor_loss': 5.107291012141057, 'time_step': 0.057328712733494755, 'td_error': 1.3285251958715465, 'init_value': -6.875204086303711, 'ave_value': -6.861713098496156} step=5536
2022-04-21 23:43.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:44.14 [info     ] CQL_20220421233814: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.0003492480757608579, 'time_algorithm_update': 0.05744067230665615, 'temp_loss': 3.7686625691507594, 'temp': 0.7461703057923069, 'alpha_loss': -32.118639097048366, 'alpha': 1.8468197418775172, 'critic_loss': 3145.4617969314486, 'actor_loss': 5.651231231027945, 'time_step': 0.057880128050126094, 'td_error': 1.3380000518616355, 'init_value': -7.2748122215271, 'ave_value': -7.268064202262983} step=5882
2022-04-21 23:44.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:44.34 [info     ] CQL_20220421233814: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.00035841340963551076, 'time_algorithm_update': 0.05694637684463766, 'temp_loss': 3.703305745400445, 'temp': 0.7332947882958231, 'alpha_loss': -33.360553068921746, 'alpha': 1.9186089249015543, 'critic_loss': 3247.6766237468387, 'actor_loss': 6.175527006215443, 'time_step': 0.057395261147118716, 'td_error': 1.3510538642178693, 'init_value': -7.946661472320557, 'ave_value': -7.937086242496713} step=6228
2022-04-21 23:44.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:44.55 [info     ] CQL_20220421233814: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.00035900118723081024, 'time_algorithm_update': 0.056810604354549696, 'temp_loss': 3.63911127079429, 'temp': 0.7206452185707974, 'alpha_loss': -34.660752908342836, 'alpha': 1.993209483995603, 'critic_loss': 3284.889818489207, 'actor_loss': 6.734334608033903, 'time_step': 0.05726036928981715, 'td_error': 1.361615400265937, 'init_value': -8.35912036895752, 'ave_value': -8.354467589168992} step=6574
2022-04-21 23:44.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:45.16 [info     ] CQL_20220421233814: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.0003684848719249571, 'time_algorithm_update': 0.05742421866841399, 'temp_loss': 3.577790889436799, 'temp': 0.7082161150571239, 'alpha_loss': -36.010076379500376, 'alpha': 2.070740767297028, 'critic_loss': 3314.845905634709, 'actor_loss': 7.324268098511448, 'time_step': 0.05788531055340188, 'td_error': 1.3752981301944578, 'init_value': -8.940391540527344, 'ave_value': -8.937104307276877} step=6920
2022-04-21 23:45.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:45.37 [info     ] CQL_20220421233814: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.00036529309487756277, 'time_algorithm_update': 0.05757230072352239, 'temp_loss': 3.5162067199718057, 'temp': 0.6960001868664185, 'alpha_loss': -37.40505171097772, 'alpha': 2.151297474183099, 'critic_loss': 3320.2763050939307, 'actor_loss': 7.95269175209751, 'time_step': 0.05802905215004276, 'td_error': 1.3902035439175422, 'init_value': -9.564192771911621, 'ave_value': -9.561123798039356} step=7266
2022-04-21 23:45.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:45.58 [info     ] CQL_20220421233814: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.00035490741619484964, 'time_algorithm_update': 0.05654945125469583, 'temp_loss': 3.4546267800248427, 'temp': 0.6839973387346102, 'alpha_loss': -38.857864401933085, 'alpha': 2.234994044193643, 'critic_loss': 3398.473589064758, 'actor_loss': 8.593452153178308, 'time_step': 0.056993251591059516, 'td_error': 1.407060163269718, 'init_value': -10.29077434539795, 'ave_value': -10.282399524654354} step=7612
2022-04-21 23:45.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:46.18 [info     ] CQL_20220421233814: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.0003714361631801363, 'time_algorithm_update': 0.057497323592963244, 'temp_loss': 3.3948010426725266, 'temp': 0.6722059416977656, 'alpha_loss': -40.374471234448386, 'alpha': 2.321961353279952, 'critic_loss': 3611.839956647399, 'actor_loss': 9.26556968137708, 'time_step': 0.05796198968942455, 'td_error': 1.422629380453308, 'init_value': -10.84749698638916, 'ave_value': -10.844143701899725} step=7958
2022-04-21 23:46.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:46.39 [info     ] CQL_20220421233814: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.00036660646427573494, 'time_algorithm_update': 0.05750570062957058, 'temp_loss': 3.3369385679333194, 'temp': 0.6606175954631298, 'alpha_loss': -41.94641477110758, 'alpha': 2.4123226941665474, 'critic_loss': 3702.6093601822163, 'actor_loss': 9.872872909369496, 'time_step': 0.057966718094886384, 'td_error': 1.4389426912031271, 'init_value': -11.36926555633545, 'ave_value': -11.368200874283394} step=8304
2022-04-21 23:46.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:47.00 [info     ] CQL_20220421233814: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.0003651539025279139, 'time_algorithm_update': 0.05631352986903549, 'temp_loss': 3.278358416061181, 'temp': 0.649229483797371, 'alpha_loss': -43.57938285783536, 'alpha': 2.506210248594339, 'critic_loss': 3592.301095951499, 'actor_loss': 10.479271522147117, 'time_step': 0.05676403969009488, 'td_error': 1.45775221488618, 'init_value': -12.063115119934082, 'ave_value': -12.059310339027357} step=8650
2022-04-21 23:47.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:47.20 [info     ] CQL_20220421233814: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.0003627318178298156, 'time_algorithm_update': 0.05626834748108263, 'temp_loss': 3.223158377443435, 'temp': 0.6380368412574592, 'alpha_loss': -45.270558869907624, 'alpha': 2.603750673332655, 'critic_loss': 3699.7033042246208, 'actor_loss': 11.236085938580464, 'time_step': 0.056717804401596156, 'td_error': 1.4793124983523873, 'init_value': -12.744911193847656, 'ave_value': -12.744140247826104} step=8996
2022-04-21 23:47.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:47.41 [info     ] CQL_20220421233814: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.00035293253860032626, 'time_algorithm_update': 0.05682208221082743, 'temp_loss': 3.167198297605349, 'temp': 0.6270367399805543, 'alpha_loss': -47.04218458715891, 'alpha': 2.7050966961535416, 'critic_loss': 3801.0054959864974, 'actor_loss': 11.881816290706569, 'time_step': 0.05726123269582759, 'td_error': 1.4977038538541902, 'init_value': -13.304880142211914, 'ave_value': -13.304267310734437} step=9342
2022-04-21 23:47.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:48.02 [info     ] CQL_20220421233814: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.0003635841987036556, 'time_algorithm_update': 0.05612508409974203, 'temp_loss': 3.112694483271913, 'temp': 0.6162271380769035, 'alpha_loss': -48.86845720985721, 'alpha': 2.8104041422033585, 'critic_loss': 3635.683315740155, 'actor_loss': 12.43905098865487, 'time_step': 0.0565781048956634, 'td_error': 1.5165144667956043, 'init_value': -13.857304573059082, 'ave_value': -13.858253813671693} step=9688
2022-04-21 23:48.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:48.22 [info     ] CQL_20220421233814: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.0003538214402391731, 'time_algorithm_update': 0.056117099833626276, 'temp_loss': 3.0582667279105653, 'temp': 0.6056059392201418, 'alpha_loss': -50.76834057934711, 'alpha': 2.919801170426297, 'critic_loss': 3467.1125890478233, 'actor_loss': 13.076158338888533, 'time_step': 0.0565581383732702, 'td_error': 1.5372067064331785, 'init_value': -14.467726707458496, 'ave_value': -14.467439723523547} step=10034
2022-04-21 23:48.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:48.43 [info     ] CQL_20220421233814: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.00036025185116453666, 'time_algorithm_update': 0.058808479694961814, 'temp_loss': 3.0066544796001016, 'temp': 0.5951654916209292, 'alpha_loss': -52.75530645337408, 'alpha': 3.0334662212801806, 'critic_loss': 3330.622377957912, 'actor_loss': 13.722099739692114, 'time_step': 0.05925731920782541, 'td_error': 1.5597976914201863, 'init_value': -15.120277404785156, 'ave_value': -15.120158811562883} step=10380
2022-04-21 23:48.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:49.04 [info     ] CQL_20220421233814: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.0003520760233002591, 'time_algorithm_update': 0.05756440741478363, 'temp_loss': 2.953798739896344, 'temp': 0.5849066473156042, 'alpha_loss': -54.804096166798146, 'alpha': 3.1515557421425173, 'critic_loss': 3194.2949931414832, 'actor_loss': 14.356211800106689, 'time_step': 0.05800115855442995, 'td_error': 1.583890959973017, 'init_value': -15.829347610473633, 'ave_value': -15.827453928549971} step=10726
2022-04-21 23:49.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:49.25 [info     ] CQL_20220421233814: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.00035438923477437457, 'time_algorithm_update': 0.0580394019970315, 'temp_loss': 2.9036007368495698, 'temp': 0.5748249307877755, 'alpha_loss': -56.93436858557552, 'alpha': 3.2742341040186798, 'critic_loss': 3136.410839984872, 'actor_loss': 15.069193506516473, 'time_step': 0.058478677892960566, 'td_error': 1.6121687678577288, 'init_value': -16.56346321105957, 'ave_value': -16.56227630924534} step=11072
2022-04-21 23:49.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:49.47 [info     ] CQL_20220421233814: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.0003662908697403924, 'time_algorithm_update': 0.05910783144779977, 'temp_loss': 2.8531151355346505, 'temp': 0.5649164238072544, 'alpha_loss': -59.15353664773048, 'alpha': 3.40169975040965, 'critic_loss': 3262.1732036612625, 'actor_loss': 15.795708909889177, 'time_step': 0.05956461594972996, 'td_error': 1.6341828813932229, 'init_value': -17.096969604492188, 'ave_value': -17.0968165388148} step=11418
2022-04-21 23:49.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:50.08 [info     ] CQL_20220421233814: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.0003706016981532808, 'time_algorithm_update': 0.05875680281247707, 'temp_loss': 2.8038506618124903, 'temp': 0.5551795046453532, 'alpha_loss': -61.44777152441829, 'alpha': 3.534125405239921, 'critic_loss': 3068.4288682882498, 'actor_loss': 16.361624717712402, 'time_step': 0.05921596392041686, 'td_error': 1.661871639718049, 'init_value': -17.801607131958008, 'ave_value': -17.8005682000621} step=11764
2022-04-21 23:50.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:50.30 [info     ] CQL_20220421233814: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.00036960805771667834, 'time_algorithm_update': 0.05857412870219677, 'temp_loss': 2.755703399636153, 'temp': 0.5456098033858172, 'alpha_loss': -63.84572219848633, 'alpha': 3.67170529282851, 'critic_loss': 2956.593687200822, 'actor_loss': 17.03450676471512, 'time_step': 0.05903504900849624, 'td_error': 1.6859369735868375, 'init_value': -18.349166870117188, 'ave_value': -18.350212571156312} step=12110
2022-04-21 23:50.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:50.51 [info     ] CQL_20220421233814: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.0003602993970661494, 'time_algorithm_update': 0.058446515502268176, 'temp_loss': 2.708311060949557, 'temp': 0.5362052731431288, 'alpha_loss': -66.33589842691588, 'alpha': 3.8146364743998973, 'critic_loss': 2850.8564685975884, 'actor_loss': 17.65951887582768, 'time_step': 0.05889346351513284, 'td_error': 1.7122669043823395, 'init_value': -18.971708297729492, 'ave_value': -18.972042399008956} step=12456
2022-04-21 23:50.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:51.12 [info     ] CQL_20220421233814: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.00036010438996243336, 'time_algorithm_update': 0.059151600551053965, 'temp_loss': 2.6610851467000267, 'temp': 0.5269628876895573, 'alpha_loss': -68.90426776863936, 'alpha': 3.96313036383921, 'critic_loss': 2730.437705332144, 'actor_loss': 18.28939505119544, 'time_step': 0.05959803176064023, 'td_error': 1.7394061426785754, 'init_value': -19.60382843017578, 'ave_value': -19.603173658462314} step=12802
2022-04-21 23:51.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:51.34 [info     ] CQL_20220421233814: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.00035682441182219223, 'time_algorithm_update': 0.05886981321897121, 'temp_loss': 2.614839578639565, 'temp': 0.5178810824893113, 'alpha_loss': -71.59427259147512, 'alpha': 4.117412079276377, 'critic_loss': 2735.747404065435, 'actor_loss': 18.987619918205834, 'time_step': 0.059316794307245686, 'td_error': 1.7687051006518566, 'init_value': -20.25193977355957, 'ave_value': -20.250676717798875} step=13148
2022-04-21 23:51.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:51.55 [info     ] CQL_20220421233814: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.0003584623336791992, 'time_algorithm_update': 0.0586126829158364, 'temp_loss': 2.5704062935933902, 'temp': 0.5089545243048255, 'alpha_loss': -74.39182890081682, 'alpha': 4.277709034826025, 'critic_loss': 2614.068656436281, 'actor_loss': 19.557462879688064, 'time_step': 0.059060496401924616, 'td_error': 1.7964262682461687, 'init_value': -20.851165771484375, 'ave_value': -20.849093304431058} step=13494
2022-04-21 23:51.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:52.17 [info     ] CQL_20220421233814: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.00035883856646587393, 'time_algorithm_update': 0.06009767992648086, 'temp_loss': 2.5260079403144085, 'temp': 0.5001818297398573, 'alpha_loss': -77.28968222292862, 'alpha': 4.444241709791856, 'critic_loss': 2583.234386995349, 'actor_loss': 20.20038931080372, 'time_step': 0.06054638644863415, 'td_error': 1.8265119869758808, 'init_value': -21.512956619262695, 'ave_value': -21.50846239897582} step=13840
2022-04-21 23:52.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:52.39 [info     ] CQL_20220421233814: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.0003624306937862683, 'time_algorithm_update': 0.05890646077304906, 'temp_loss': 2.4827247678889015, 'temp': 0.4915600660219358, 'alpha_loss': -80.27626763051644, 'alpha': 4.617241321960625, 'critic_loss': 2491.2730077842757, 'actor_loss': 20.73556868051518, 'time_step': 0.05936034704219399, 'td_error': 1.8509497639128785, 'init_value': -21.9473819732666, 'ave_value': -21.948036835056847} step=14186
2022-04-21 23:52.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:52.59 [info     ] CQL_20220421233814: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.00035211047685215237, 'time_algorithm_update': 0.05616323107239828, 'temp_loss': 2.440054650940647, 'temp': 0.4830860911593961, 'alpha_loss': -83.41310260750654, 'alpha': 4.796979117255679, 'critic_loss': 2442.8677470477332, 'actor_loss': 21.334048794873187, 'time_step': 0.056603801043736454, 'td_error': 1.8757942399795382, 'init_value': -22.447206497192383, 'ave_value': -22.447407859035327} step=14532
2022-04-21 23:52.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:53.20 [info     ] CQL_20220421233814: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.0003601546921481976, 'time_algorithm_update': 0.05689329908073293, 'temp_loss': 2.3983254825448714, 'temp': 0.4747579955641245, 'alpha_loss': -86.6593758467305, 'alpha': 4.9837199456429895, 'critic_loss': 2348.433300922372, 'actor_loss': 21.865833310033544, 'time_step': 0.057342192341137486, 'td_error': 1.906205627248566, 'init_value': -23.07821273803711, 'ave_value': -23.077282537132035} step=14878
2022-04-21 23:53.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:53.40 [info     ] CQL_20220421233814: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.0003613199112732286, 'time_algorithm_update': 0.05671675219012134, 'temp_loss': 2.3565577654480245, 'temp': 0.46657336757362233, 'alpha_loss': -90.03219708128472, 'alpha': 5.177736320936611, 'critic_loss': 2371.8700445097993, 'actor_loss': 22.46553364241054, 'time_step': 0.05716925687183534, 'td_error': 1.9339293369879507, 'init_value': -23.57000160217285, 'ave_value': -23.569477420985955} step=15224
2022-04-21 23:53.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:54.01 [info     ] CQL_20220421233814: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.00036390186045211177, 'time_algorithm_update': 0.05670968369941491, 'temp_loss': 2.3156222318638267, 'temp': 0.4585321201926711, 'alpha_loss': -93.54341958988608, 'alpha': 5.379307844735294, 'critic_loss': 2296.0396220477332, 'actor_loss': 22.970308243194758, 'time_step': 0.05716835970134404, 'td_error': 1.9603782765816948, 'init_value': -24.07085609436035, 'ave_value': -24.071758512910254} step=15570
2022-04-21 23:54.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:54.22 [info     ] CQL_20220421233814: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.00036295369870400845, 'time_algorithm_update': 0.05651677412793815, 'temp_loss': 2.2758537689385387, 'temp': 0.45062840544764016, 'alpha_loss': -97.18685364034134, 'alpha': 5.588732452061824, 'critic_loss': 2241.8799639575054, 'actor_loss': 23.48467599174191, 'time_step': 0.056967653980144875, 'td_error': 1.9908992407969859, 'init_value': -24.666135787963867, 'ave_value': -24.66664312271328} step=15916
2022-04-21 23:54.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:54.42 [info     ] CQL_20220421233814: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.0003606136134594162, 'time_algorithm_update': 0.056591985542650165, 'temp_loss': 2.2369743337520975, 'temp': 0.4428610067836122, 'alpha_loss': -100.9610363513748, 'alpha': 5.806299171006748, 'critic_loss': 2289.5858923410406, 'actor_loss': 24.04414203401246, 'time_step': 0.05704078371125149, 'td_error': 2.017118801885365, 'init_value': -25.121936798095703, 'ave_value': -25.12025000010129} step=16262
2022-04-21 23:54.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:55.03 [info     ] CQL_20220421233814: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.0003596309981594196, 'time_algorithm_update': 0.05655118426835606, 'temp_loss': 2.198254763735512, 'temp': 0.4352264527468323, 'alpha_loss': -104.88602273312607, 'alpha': 6.0323289391622374, 'critic_loss': 2209.475044735594, 'actor_loss': 24.494430354564866, 'time_step': 0.0569938317888734, 'td_error': 2.0419168262087184, 'init_value': -25.562013626098633, 'ave_value': -25.56124842721967} step=16608
2022-04-21 23:55.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:55.23 [info     ] CQL_20220421233814: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.00035606367739638844, 'time_algorithm_update': 0.05515514081613177, 'temp_loss': 2.1606793327827676, 'temp': 0.4277233763064952, 'alpha_loss': -108.98289156786969, 'alpha': 6.267164736124822, 'critic_loss': 2147.307648234285, 'actor_loss': 24.969347628554857, 'time_step': 0.055596884964518464, 'td_error': 2.0691101187680454, 'init_value': -26.06000328063965, 'ave_value': -26.057864717761166} step=16954
2022-04-21 23:55.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-21 23:55.42 [info     ] CQL_20220421233814: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.0003567120932430201, 'time_algorithm_update': 0.05369408971312418, 'temp_loss': 2.1231315749229034, 'temp': 0.42035092284224623, 'alpha_loss': -113.22434534502857, 'alpha': 6.51115079697846, 'critic_loss': 2153.615157463647, 'actor_loss': 25.47597849300142, 'time_step': 0.054136083994297625, 'td_error': 2.1015021843674697, 'init_value': -26.651025772094727, 'ave_value': -26.647512405706617} step=17300
2022-04-21 23:55.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421233814/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.51

Epoch 1/50:   0%|          | 0/177 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-21 23:55.44 [info     ] FQE_20220421235543: epoch=1 step=177 epoch=1 metrics={'time_sample_batch': 0.00015777787246273062, 'time_algorithm_update': 0.008183319016365008, 'loss': 0.006222791171311743, 'time_step': 0.008412734263360836, 'init_value': -0.3939772844314575, 'ave_value': -0.3497372379666334, 'soft_opc': nan} step=177




2022-04-21 23:55.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_177.pt


Epoch 2/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:55.46 [info     ] FQE_20220421235543: epoch=2 step=354 epoch=2 metrics={'time_sample_batch': 0.00015944006752833135, 'time_algorithm_update': 0.007895775433987548, 'loss': 0.004209106519692025, 'time_step': 0.008123362805210265, 'init_value': -0.4631059169769287, 'ave_value': -0.38378496949558144, 'soft_opc': nan} step=354




2022-04-21 23:55.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_354.pt


Epoch 3/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:55.47 [info     ] FQE_20220421235543: epoch=3 step=531 epoch=3 metrics={'time_sample_batch': 0.00015862109297412936, 'time_algorithm_update': 0.008136813923463984, 'loss': 0.003787430722166463, 'time_step': 0.008363384311482057, 'init_value': -0.5138669610023499, 'ave_value': -0.41328420889628187, 'soft_opc': nan} step=531




2022-04-21 23:55.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_531.pt


Epoch 4/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:55.49 [info     ] FQE_20220421235543: epoch=4 step=708 epoch=4 metrics={'time_sample_batch': 0.00016003678747489628, 'time_algorithm_update': 0.008113016516475354, 'loss': 0.0035036161619548998, 'time_step': 0.008340677972567283, 'init_value': -0.5284333825111389, 'ave_value': -0.417837448149651, 'soft_opc': nan} step=708




2022-04-21 23:55.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_708.pt


Epoch 5/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:55.50 [info     ] FQE_20220421235543: epoch=5 step=885 epoch=5 metrics={'time_sample_batch': 0.0001590224982654981, 'time_algorithm_update': 0.008070177951101529, 'loss': 0.0031730471702429365, 'time_step': 0.008296600169381179, 'init_value': -0.5733290314674377, 'ave_value': -0.44430180596517727, 'soft_opc': nan} step=885




2022-04-21 23:55.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_885.pt


Epoch 6/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:55.52 [info     ] FQE_20220421235543: epoch=6 step=1062 epoch=6 metrics={'time_sample_batch': 0.00015961652421681894, 'time_algorithm_update': 0.007767424071575962, 'loss': 0.002934227996810976, 'time_step': 0.007997150475022483, 'init_value': -0.5808828473091125, 'ave_value': -0.4336367063470431, 'soft_opc': nan} step=1062




2022-04-21 23:55.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_1062.pt


Epoch 7/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:55.54 [info     ] FQE_20220421235543: epoch=7 step=1239 epoch=7 metrics={'time_sample_batch': 0.00016042472279004458, 'time_algorithm_update': 0.00821061996416857, 'loss': 0.002544496178174507, 'time_step': 0.008448785307717189, 'init_value': -0.6052173972129822, 'ave_value': -0.43995787602018666, 'soft_opc': nan} step=1239




2022-04-21 23:55.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_1239.pt


Epoch 8/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:55.55 [info     ] FQE_20220421235543: epoch=8 step=1416 epoch=8 metrics={'time_sample_batch': 0.00016269306678556454, 'time_algorithm_update': 0.008137915767518813, 'loss': 0.002271295817158485, 'time_step': 0.008370326737226066, 'init_value': -0.6755363941192627, 'ave_value': -0.5005833725805755, 'soft_opc': nan} step=1416




2022-04-21 23:55.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_1416.pt


Epoch 9/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:55.57 [info     ] FQE_20220421235543: epoch=9 step=1593 epoch=9 metrics={'time_sample_batch': 0.00015984955480543233, 'time_algorithm_update': 0.007867822539334917, 'loss': 0.002102778787135362, 'time_step': 0.00809423398163359, 'init_value': -0.6666248440742493, 'ave_value': -0.48846882529802865, 'soft_opc': nan} step=1593




2022-04-21 23:55.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_1593.pt


Epoch 10/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:55.58 [info     ] FQE_20220421235543: epoch=10 step=1770 epoch=10 metrics={'time_sample_batch': 0.00016108205762960142, 'time_algorithm_update': 0.00822524566434871, 'loss': 0.0021390787297517777, 'time_step': 0.008458300498919298, 'init_value': -0.6880719065666199, 'ave_value': -0.4997563863928254, 'soft_opc': nan} step=1770




2022-04-21 23:55.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_1770.pt


Epoch 11/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.00 [info     ] FQE_20220421235543: epoch=11 step=1947 epoch=11 metrics={'time_sample_batch': 0.0001577711374746204, 'time_algorithm_update': 0.008155651685208251, 'loss': 0.002169645340431818, 'time_step': 0.008382713727358371, 'init_value': -0.7646793723106384, 'ave_value': -0.5650250238453125, 'soft_opc': nan} step=1947




2022-04-21 23:56.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_1947.pt


Epoch 12/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.01 [info     ] FQE_20220421235543: epoch=12 step=2124 epoch=12 metrics={'time_sample_batch': 0.00016013511830130538, 'time_algorithm_update': 0.007981983281798282, 'loss': 0.0022345859018722707, 'time_step': 0.008213378615298514, 'init_value': -0.7584365010261536, 'ave_value': -0.5485901833313319, 'soft_opc': nan} step=2124




2022-04-21 23:56.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_2124.pt


Epoch 13/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.03 [info     ] FQE_20220421235543: epoch=13 step=2301 epoch=13 metrics={'time_sample_batch': 0.00015595942567297295, 'time_algorithm_update': 0.008029625240692312, 'loss': 0.0024389932459906944, 'time_step': 0.008257137180048193, 'init_value': -0.7826350331306458, 'ave_value': -0.5609940762723888, 'soft_opc': nan} step=2301




2022-04-21 23:56.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_2301.pt


Epoch 14/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.05 [info     ] FQE_20220421235543: epoch=14 step=2478 epoch=14 metrics={'time_sample_batch': 0.0001636790690448998, 'time_algorithm_update': 0.008168821280958962, 'loss': 0.002672829430110984, 'time_step': 0.008406800738835738, 'init_value': -0.8445940613746643, 'ave_value': -0.6043253999723641, 'soft_opc': nan} step=2478




2022-04-21 23:56.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_2478.pt


Epoch 15/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.06 [info     ] FQE_20220421235543: epoch=15 step=2655 epoch=15 metrics={'time_sample_batch': 0.00015865207391943634, 'time_algorithm_update': 0.008115614874888275, 'loss': 0.003142302080733691, 'time_step': 0.008344204412341792, 'init_value': -0.866938054561615, 'ave_value': -0.6255389966657973, 'soft_opc': nan} step=2655




2022-04-21 23:56.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_2655.pt


Epoch 16/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.08 [info     ] FQE_20220421235543: epoch=16 step=2832 epoch=16 metrics={'time_sample_batch': 0.00016241019728493555, 'time_algorithm_update': 0.007800205952703616, 'loss': 0.0031045496376299717, 'time_step': 0.008033512675829526, 'init_value': -0.922914981842041, 'ave_value': -0.6771424437551438, 'soft_opc': nan} step=2832




2022-04-21 23:56.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_2832.pt


Epoch 17/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.09 [info     ] FQE_20220421235543: epoch=17 step=3009 epoch=17 metrics={'time_sample_batch': 0.0001616397146451271, 'time_algorithm_update': 0.008149113358750855, 'loss': 0.0034020194312883805, 'time_step': 0.00838029990761967, 'init_value': -0.997750997543335, 'ave_value': -0.7350887280846904, 'soft_opc': nan} step=3009




2022-04-21 23:56.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_3009.pt


Epoch 18/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.11 [info     ] FQE_20220421235543: epoch=18 step=3186 epoch=18 metrics={'time_sample_batch': 0.00016130027124437236, 'time_algorithm_update': 0.008254537474637651, 'loss': 0.0037245361190644424, 'time_step': 0.008488264461021638, 'init_value': -0.9928895831108093, 'ave_value': -0.7196994300047914, 'soft_opc': nan} step=3186




2022-04-21 23:56.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_3186.pt


Epoch 19/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.12 [info     ] FQE_20220421235543: epoch=19 step=3363 epoch=19 metrics={'time_sample_batch': 0.0001580795999300682, 'time_algorithm_update': 0.007849493942691781, 'loss': 0.003960331447756623, 'time_step': 0.008079803596108647, 'init_value': -0.9561813473701477, 'ave_value': -0.6844976885574269, 'soft_opc': nan} step=3363




2022-04-21 23:56.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_3363.pt


Epoch 20/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.14 [info     ] FQE_20220421235543: epoch=20 step=3540 epoch=20 metrics={'time_sample_batch': 0.00016131912921108095, 'time_algorithm_update': 0.008088548304670947, 'loss': 0.0044544375839309066, 'time_step': 0.008322310312993108, 'init_value': -0.9694053530693054, 'ave_value': -0.6981897543629145, 'soft_opc': nan} step=3540




2022-04-21 23:56.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_3540.pt


Epoch 21/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.16 [info     ] FQE_20220421235543: epoch=21 step=3717 epoch=21 metrics={'time_sample_batch': 0.00015841500233795683, 'time_algorithm_update': 0.008134572519420903, 'loss': 0.004601043691111487, 'time_step': 0.008363512276256152, 'init_value': -1.0229204893112183, 'ave_value': -0.7355995007418655, 'soft_opc': nan} step=3717




2022-04-21 23:56.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_3717.pt


Epoch 22/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.17 [info     ] FQE_20220421235543: epoch=22 step=3894 epoch=22 metrics={'time_sample_batch': 0.0001602240201443602, 'time_algorithm_update': 0.008058397109899144, 'loss': 0.00523201928860087, 'time_step': 0.008291882983708785, 'init_value': -1.0324894189834595, 'ave_value': -0.7408437089675719, 'soft_opc': nan} step=3894




2022-04-21 23:56.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_3894.pt


Epoch 23/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.19 [info     ] FQE_20220421235543: epoch=23 step=4071 epoch=23 metrics={'time_sample_batch': 0.00015500036336607852, 'time_algorithm_update': 0.007786679402583063, 'loss': 0.005333593494711328, 'time_step': 0.00801182062612415, 'init_value': -1.0725884437561035, 'ave_value': -0.7562082686257344, 'soft_opc': nan} step=4071




2022-04-21 23:56.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_4071.pt


Epoch 24/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.20 [info     ] FQE_20220421235543: epoch=24 step=4248 epoch=24 metrics={'time_sample_batch': 0.00015503538530425162, 'time_algorithm_update': 0.008124886259520795, 'loss': 0.005827252204874583, 'time_step': 0.00834829255012469, 'init_value': -1.1311532258987427, 'ave_value': -0.7999991111468535, 'soft_opc': nan} step=4248




2022-04-21 23:56.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_4248.pt


Epoch 25/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.22 [info     ] FQE_20220421235543: epoch=25 step=4425 epoch=25 metrics={'time_sample_batch': 0.00015355368792000464, 'time_algorithm_update': 0.008042031088791325, 'loss': 0.006384718571894797, 'time_step': 0.008265797027760306, 'init_value': -1.1961578130722046, 'ave_value': -0.8616202921491933, 'soft_opc': nan} step=4425




2022-04-21 23:56.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_4425.pt


Epoch 26/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.23 [info     ] FQE_20220421235543: epoch=26 step=4602 epoch=26 metrics={'time_sample_batch': 0.00015033436360332251, 'time_algorithm_update': 0.007646904153338933, 'loss': 0.006538084391136206, 'time_step': 0.007863576802830239, 'init_value': -1.2663463354110718, 'ave_value': -0.9117940778153257, 'soft_opc': nan} step=4602




2022-04-21 23:56.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_4602.pt


Epoch 27/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.25 [info     ] FQE_20220421235543: epoch=27 step=4779 epoch=27 metrics={'time_sample_batch': 0.0001568915480274265, 'time_algorithm_update': 0.007959683736165365, 'loss': 0.007021982864395183, 'time_step': 0.008184765691811082, 'init_value': -1.2529287338256836, 'ave_value': -0.8755996536083139, 'soft_opc': nan} step=4779




2022-04-21 23:56.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_4779.pt


Epoch 28/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.26 [info     ] FQE_20220421235543: epoch=28 step=4956 epoch=28 metrics={'time_sample_batch': 0.00015532229579774673, 'time_algorithm_update': 0.008020856286172813, 'loss': 0.007608693404324448, 'time_step': 0.008245710599220406, 'init_value': -1.2751129865646362, 'ave_value': -0.9075720749001156, 'soft_opc': nan} step=4956




2022-04-21 23:56.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_4956.pt


Epoch 29/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.28 [info     ] FQE_20220421235543: epoch=29 step=5133 epoch=29 metrics={'time_sample_batch': 0.0001619670350672835, 'time_algorithm_update': 0.007915848392551229, 'loss': 0.008200376029823523, 'time_step': 0.008146633536128675, 'init_value': -1.3740087747573853, 'ave_value': -0.9734979836382084, 'soft_opc': nan} step=5133




2022-04-21 23:56.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_5133.pt


Epoch 30/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.29 [info     ] FQE_20220421235543: epoch=30 step=5310 epoch=30 metrics={'time_sample_batch': 0.00015939696360442598, 'time_algorithm_update': 0.008047064818904898, 'loss': 0.009103088258202635, 'time_step': 0.008279491952583614, 'init_value': -1.3785767555236816, 'ave_value': -0.9801635491759614, 'soft_opc': nan} step=5310




2022-04-21 23:56.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_5310.pt


Epoch 31/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.31 [info     ] FQE_20220421235543: epoch=31 step=5487 epoch=31 metrics={'time_sample_batch': 0.00016265669784976938, 'time_algorithm_update': 0.008202866645856092, 'loss': 0.009644967903242326, 'time_step': 0.008440812428792318, 'init_value': -1.3941768407821655, 'ave_value': -0.9892756000345757, 'soft_opc': nan} step=5487




2022-04-21 23:56.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_5487.pt


Epoch 32/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.33 [info     ] FQE_20220421235543: epoch=32 step=5664 epoch=32 metrics={'time_sample_batch': 0.00016463409035892809, 'time_algorithm_update': 0.008114425475988011, 'loss': 0.009932807425371145, 'time_step': 0.008349007805861995, 'init_value': -1.4003595113754272, 'ave_value': -0.9796637784756788, 'soft_opc': nan} step=5664




2022-04-21 23:56.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_5664.pt


Epoch 33/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.34 [info     ] FQE_20220421235543: epoch=33 step=5841 epoch=33 metrics={'time_sample_batch': 0.00016283315453825698, 'time_algorithm_update': 0.007897269254350393, 'loss': 0.010201986708269207, 'time_step': 0.008130907338891326, 'init_value': -1.408129096031189, 'ave_value': -0.9609010392990154, 'soft_opc': nan} step=5841




2022-04-21 23:56.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_5841.pt


Epoch 34/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.36 [info     ] FQE_20220421235543: epoch=34 step=6018 epoch=34 metrics={'time_sample_batch': 0.0001600502574511167, 'time_algorithm_update': 0.0081710357450496, 'loss': 0.010896745495995412, 'time_step': 0.008401450464280985, 'init_value': -1.46728515625, 'ave_value': -1.0175434986957246, 'soft_opc': nan} step=6018




2022-04-21 23:56.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_6018.pt


Epoch 35/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.37 [info     ] FQE_20220421235543: epoch=35 step=6195 epoch=35 metrics={'time_sample_batch': 0.00016412492525779595, 'time_algorithm_update': 0.008248789835784394, 'loss': 0.011606761603780607, 'time_step': 0.008484944111883304, 'init_value': -1.4818590879440308, 'ave_value': -1.0286813739167007, 'soft_opc': nan} step=6195




2022-04-21 23:56.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_6195.pt


Epoch 36/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.39 [info     ] FQE_20220421235543: epoch=36 step=6372 epoch=36 metrics={'time_sample_batch': 0.00015984686081018824, 'time_algorithm_update': 0.007897911772216108, 'loss': 0.012442025070546963, 'time_step': 0.008130644674355028, 'init_value': -1.5030416250228882, 'ave_value': -1.0503031018735456, 'soft_opc': nan} step=6372




2022-04-21 23:56.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_6372.pt


Epoch 37/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.40 [info     ] FQE_20220421235543: epoch=37 step=6549 epoch=37 metrics={'time_sample_batch': 0.00016345950843250684, 'time_algorithm_update': 0.00785687818365582, 'loss': 0.012958340890413561, 'time_step': 0.008093318023250601, 'init_value': -1.5699764490127563, 'ave_value': -1.105578510696488, 'soft_opc': nan} step=6549




2022-04-21 23:56.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_6549.pt


Epoch 38/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.42 [info     ] FQE_20220421235543: epoch=38 step=6726 epoch=38 metrics={'time_sample_batch': 0.00016415051821261475, 'time_algorithm_update': 0.008294547345005187, 'loss': 0.013827143390899745, 'time_step': 0.008530627536234882, 'init_value': -1.6224079132080078, 'ave_value': -1.1367156000824663, 'soft_opc': nan} step=6726




2022-04-21 23:56.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_6726.pt


Epoch 39/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.44 [info     ] FQE_20220421235543: epoch=39 step=6903 epoch=39 metrics={'time_sample_batch': 0.00016156562977591476, 'time_algorithm_update': 0.008028527437630346, 'loss': 0.013899168416615681, 'time_step': 0.008261544556267517, 'init_value': -1.6780411005020142, 'ave_value': -1.189749497318411, 'soft_opc': nan} step=6903




2022-04-21 23:56.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_6903.pt


Epoch 40/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.45 [info     ] FQE_20220421235543: epoch=40 step=7080 epoch=40 metrics={'time_sample_batch': 0.0001640414114052293, 'time_algorithm_update': 0.008582920677917825, 'loss': 0.014678227068661676, 'time_step': 0.008818348922298453, 'init_value': -1.6693059206008911, 'ave_value': -1.1709734817754265, 'soft_opc': nan} step=7080




2022-04-21 23:56.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_7080.pt


Epoch 41/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.47 [info     ] FQE_20220421235543: epoch=41 step=7257 epoch=41 metrics={'time_sample_batch': 0.00016445224567995234, 'time_algorithm_update': 0.009161674370199947, 'loss': 0.01420293789255659, 'time_step': 0.009397561940769692, 'init_value': -1.7519383430480957, 'ave_value': -1.2404027446483572, 'soft_opc': nan} step=7257




2022-04-21 23:56.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_7257.pt


Epoch 42/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.49 [info     ] FQE_20220421235543: epoch=42 step=7434 epoch=42 metrics={'time_sample_batch': 0.00016240480929444737, 'time_algorithm_update': 0.008964934591519631, 'loss': 0.016272836142827332, 'time_step': 0.009196021462564414, 'init_value': -1.75397527217865, 'ave_value': -1.246050942590175, 'soft_opc': nan} step=7434




2022-04-21 23:56.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_7434.pt


Epoch 43/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.50 [info     ] FQE_20220421235543: epoch=43 step=7611 epoch=43 metrics={'time_sample_batch': 0.00016327631675590903, 'time_algorithm_update': 0.008758466796012922, 'loss': 0.016806679055631414, 'time_step': 0.008994645318069027, 'init_value': -1.8133732080459595, 'ave_value': -1.2902875781406065, 'soft_opc': nan} step=7611




2022-04-21 23:56.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_7611.pt


Epoch 44/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.52 [info     ] FQE_20220421235543: epoch=44 step=7788 epoch=44 metrics={'time_sample_batch': 0.00016283315453825698, 'time_algorithm_update': 0.009033876623811022, 'loss': 0.01766760563553511, 'time_step': 0.009266907212424413, 'init_value': -1.8467655181884766, 'ave_value': -1.3084685963122649, 'soft_opc': nan} step=7788




2022-04-21 23:56.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_7788.pt


Epoch 45/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.54 [info     ] FQE_20220421235543: epoch=45 step=7965 epoch=45 metrics={'time_sample_batch': 0.00016808105727373544, 'time_algorithm_update': 0.009032268308650303, 'loss': 0.018301563919850727, 'time_step': 0.009272033885373907, 'init_value': -1.8289217948913574, 'ave_value': -1.286609544603898, 'soft_opc': nan} step=7965




2022-04-21 23:56.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_7965.pt


Epoch 46/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.56 [info     ] FQE_20220421235543: epoch=46 step=8142 epoch=46 metrics={'time_sample_batch': 0.00016445089868233028, 'time_algorithm_update': 0.008852651563741393, 'loss': 0.01840802405284728, 'time_step': 0.009088675181070963, 'init_value': -1.8016681671142578, 'ave_value': -1.2600742777546605, 'soft_opc': nan} step=8142




2022-04-21 23:56.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_8142.pt


Epoch 47/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.57 [info     ] FQE_20220421235543: epoch=47 step=8319 epoch=47 metrics={'time_sample_batch': 0.00016340428153000308, 'time_algorithm_update': 0.00896590308280988, 'loss': 0.018730749883215433, 'time_step': 0.00919762708372989, 'init_value': -1.8238335847854614, 'ave_value': -1.2813437989378744, 'soft_opc': nan} step=8319




2022-04-21 23:56.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_8319.pt


Epoch 48/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:56.59 [info     ] FQE_20220421235543: epoch=48 step=8496 epoch=48 metrics={'time_sample_batch': 0.00016059713848566606, 'time_algorithm_update': 0.00876819211884407, 'loss': 0.01907249158287768, 'time_step': 0.0090016833806442, 'init_value': -1.8350363969802856, 'ave_value': -1.2826955791853658, 'soft_opc': nan} step=8496




2022-04-21 23:56.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_8496.pt


Epoch 49/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:57.01 [info     ] FQE_20220421235543: epoch=49 step=8673 epoch=49 metrics={'time_sample_batch': 0.0001631712509413897, 'time_algorithm_update': 0.008921589555039917, 'loss': 0.019990094529930502, 'time_step': 0.009156779380841444, 'init_value': -1.7934527397155762, 'ave_value': -1.2364971989452527, 'soft_opc': nan} step=8673




2022-04-21 23:57.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_8673.pt


Epoch 50/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-21 23:57.02 [info     ] FQE_20220421235543: epoch=50 step=8850 epoch=50 metrics={'time_sample_batch': 0.00016462196738032973, 'time_algorithm_update': 0.008389157763982222, 'loss': 0.019911192894234497, 'time_step': 0.008625755202298784, 'init_value': -1.8017417192459106, 'ave_value': -1.249139814795339, 'soft_opc': nan} step=8850




2022-04-21 23:57.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235543/model_8850.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-21 23:57.03 [info     ] Directory is created at d3rlpy_logs/FQE_20220421235703
2022-04-21 23:57.03 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-21 23:57.03 [debug    ] Building models...
2022-04-21 23:57.03 [debug    ] Models have been built.
2022-04-21 23:57.03 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220421235703/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-21 23:57.06 [info     ] FQE_20220421235703: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016885580018509265, 'time_algorithm_update': 0.00862112987873166, 'loss': 0.025575380988938863, 'time_step': 0.008864901786626772, 'init_value': -1.0999659299850464, 'ave_value': -1.1172906247546543, 'soft_opc': nan} step=344




2022-04-21 23:57.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:57.09 [info     ] FQE_20220421235703: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00016961957133093545, 'time_algorithm_update': 0.008664772954098014, 'loss': 0.021626692674079434, 'time_step': 0.008908142877179523, 'init_value': -1.639880895614624, 'ave_value': -1.7017974094496118, 'soft_opc': nan} step=688




2022-04-21 23:57.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:57.13 [info     ] FQE_20220421235703: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.0001692820427029632, 'time_algorithm_update': 0.008376365484193314, 'loss': 0.025821601287028643, 'time_step': 0.008621494437372961, 'init_value': -2.241178512573242, 'ave_value': -2.363815914121297, 'soft_opc': nan} step=1032




2022-04-21 23:57.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:57.16 [info     ] FQE_20220421235703: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00016943244046943131, 'time_algorithm_update': 0.008640339901280958, 'loss': 0.029034195142949738, 'time_step': 0.00888477647027304, 'init_value': -2.588813304901123, 'ave_value': -2.76812746531523, 'soft_opc': nan} step=1376




2022-04-21 23:57.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:57.19 [info     ] FQE_20220421235703: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00016683340072631836, 'time_algorithm_update': 0.008631953666376512, 'loss': 0.039427636467396866, 'time_step': 0.008869713128999222, 'init_value': -3.1117701530456543, 'ave_value': -3.338439361056363, 'soft_opc': nan} step=1720




2022-04-21 23:57.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:57.22 [info     ] FQE_20220421235703: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00017036324323609818, 'time_algorithm_update': 0.008678414793901666, 'loss': 0.050967063976798295, 'time_step': 0.008921836004700772, 'init_value': -3.4761760234832764, 'ave_value': -3.717523458166266, 'soft_opc': nan} step=2064




2022-04-21 23:57.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:57.26 [info     ] FQE_20220421235703: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00016824727834657182, 'time_algorithm_update': 0.008469843587210013, 'loss': 0.0642239169899894, 'time_step': 0.008711243784704874, 'init_value': -4.088173866271973, 'ave_value': -4.323793651982471, 'soft_opc': nan} step=2408




2022-04-21 23:57.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:57.29 [info     ] FQE_20220421235703: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.0001733753570290499, 'time_algorithm_update': 0.008713607178177945, 'loss': 0.07898152163143941, 'time_step': 0.008962961130363996, 'init_value': -4.368865966796875, 'ave_value': -4.611309918309855, 'soft_opc': nan} step=2752




2022-04-21 23:57.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:57.32 [info     ] FQE_20220421235703: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00017098354738812114, 'time_algorithm_update': 0.008749216794967651, 'loss': 0.0916781818634982, 'time_step': 0.008996700824693192, 'init_value': -4.6649169921875, 'ave_value': -4.892552156690172, 'soft_opc': nan} step=3096




2022-04-21 23:57.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:57.36 [info     ] FQE_20220421235703: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.000170939190443172, 'time_algorithm_update': 0.008686762909556544, 'loss': 0.10927478940940874, 'time_step': 0.008928361327149147, 'init_value': -5.111565113067627, 'ave_value': -5.464339634384708, 'soft_opc': nan} step=3440




2022-04-21 23:57.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:57.39 [info     ] FQE_20220421235703: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016827985297801883, 'time_algorithm_update': 0.00843745470046997, 'loss': 0.12413871782658578, 'time_step': 0.008679613817569822, 'init_value': -5.524377822875977, 'ave_value': -6.030064541363233, 'soft_opc': nan} step=3784




2022-04-21 23:57.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:57.42 [info     ] FQE_20220421235703: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00017087126887121865, 'time_algorithm_update': 0.008693534967511199, 'loss': 0.14314767670666062, 'time_step': 0.00893875887227613, 'init_value': -5.619631290435791, 'ave_value': -6.322460743888105, 'soft_opc': nan} step=4128




2022-04-21 23:57.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:57.45 [info     ] FQE_20220421235703: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016609388728474462, 'time_algorithm_update': 0.008602461842603461, 'loss': 0.1552417037174712, 'time_step': 0.008839623872623888, 'init_value': -6.086974620819092, 'ave_value': -6.955187672442077, 'soft_opc': nan} step=4472




2022-04-21 23:57.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:57.49 [info     ] FQE_20220421235703: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00017001531844915344, 'time_algorithm_update': 0.008656291767608288, 'loss': 0.1724619854100828, 'time_step': 0.008900075457816901, 'init_value': -6.348899841308594, 'ave_value': -7.354014997227067, 'soft_opc': nan} step=4816




2022-04-21 23:57.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:57.52 [info     ] FQE_20220421235703: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00016962442287178926, 'time_algorithm_update': 0.008609639350758043, 'loss': 0.19081033686674093, 'time_step': 0.008853687103404555, 'init_value': -6.4846415519714355, 'ave_value': -7.617182932505468, 'soft_opc': nan} step=5160




2022-04-21 23:57.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:57.55 [info     ] FQE_20220421235703: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00017038472863130792, 'time_algorithm_update': 0.008565061314161434, 'loss': 0.20440779541900686, 'time_step': 0.008812022763629293, 'init_value': -7.213913917541504, 'ave_value': -8.574597345661742, 'soft_opc': nan} step=5504




2022-04-21 23:57.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:57.59 [info     ] FQE_20220421235703: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.0001683325268501459, 'time_algorithm_update': 0.008688921152159225, 'loss': 0.21736808671334454, 'time_step': 0.0089328947455384, 'init_value': -7.275218963623047, 'ave_value': -8.879446340213857, 'soft_opc': nan} step=5848




2022-04-21 23:57.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:58.02 [info     ] FQE_20220421235703: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00017080611960832462, 'time_algorithm_update': 0.008581514275351237, 'loss': 0.22580359337396574, 'time_step': 0.008826026389765184, 'init_value': -7.4426751136779785, 'ave_value': -9.177295120674613, 'soft_opc': nan} step=6192




2022-04-21 23:58.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:58.05 [info     ] FQE_20220421235703: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00016890431559363076, 'time_algorithm_update': 0.008723142535187477, 'loss': 0.23281551709636872, 'time_step': 0.008968893178673677, 'init_value': -7.751454830169678, 'ave_value': -9.75114398564019, 'soft_opc': nan} step=6536




2022-04-21 23:58.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:58.08 [info     ] FQE_20220421235703: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00016936659812927246, 'time_algorithm_update': 0.00845012761825739, 'loss': 0.2374544650435361, 'time_step': 0.00869273584942485, 'init_value': -7.984935760498047, 'ave_value': -10.118453630903183, 'soft_opc': nan} step=6880




2022-04-21 23:58.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:58.12 [info     ] FQE_20220421235703: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.0001713529575702756, 'time_algorithm_update': 0.008697236693182658, 'loss': 0.24574916578049577, 'time_step': 0.008946267671363299, 'init_value': -8.349567413330078, 'ave_value': -10.735795943480118, 'soft_opc': nan} step=7224




2022-04-21 23:58.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:58.15 [info     ] FQE_20220421235703: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00017015947852023813, 'time_algorithm_update': 0.008683068807734999, 'loss': 0.2490890748463138, 'time_step': 0.00892782072688258, 'init_value': -8.347160339355469, 'ave_value': -10.849084690387722, 'soft_opc': nan} step=7568




2022-04-21 23:58.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:58.18 [info     ] FQE_20220421235703: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00016972491907518963, 'time_algorithm_update': 0.008622110583061395, 'loss': 0.254110173652555, 'time_step': 0.008867492509442706, 'init_value': -8.655506134033203, 'ave_value': -11.202310660132417, 'soft_opc': nan} step=7912




2022-04-21 23:58.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:58.21 [info     ] FQE_20220421235703: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.0001680580682532732, 'time_algorithm_update': 0.008412762436755868, 'loss': 0.2677518819068927, 'time_step': 0.00865367193554723, 'init_value': -9.048501014709473, 'ave_value': -11.786938263088263, 'soft_opc': nan} step=8256




2022-04-21 23:58.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:58.25 [info     ] FQE_20220421235703: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00016899580179258834, 'time_algorithm_update': 0.00849737053693727, 'loss': 0.27917341699520515, 'time_step': 0.00874104194862898, 'init_value': -9.169682502746582, 'ave_value': -11.912332917090824, 'soft_opc': nan} step=8600




2022-04-21 23:58.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:58.28 [info     ] FQE_20220421235703: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00017073265341825263, 'time_algorithm_update': 0.008662581443786621, 'loss': 0.2945609990680634, 'time_step': 0.00890882971674897, 'init_value': -9.356059074401855, 'ave_value': -12.176766135874162, 'soft_opc': nan} step=8944




2022-04-21 23:58.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:58.31 [info     ] FQE_20220421235703: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00016711617625036904, 'time_algorithm_update': 0.008685876463734827, 'loss': 0.30705910234517136, 'time_step': 0.00892628971920457, 'init_value': -9.40337085723877, 'ave_value': -12.085325529147779, 'soft_opc': nan} step=9288




2022-04-21 23:58.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:58.34 [info     ] FQE_20220421235703: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.0001676484595897586, 'time_algorithm_update': 0.008423576521319013, 'loss': 0.32083006144696197, 'time_step': 0.008665429298267809, 'init_value': -9.693399429321289, 'ave_value': -12.427070895583277, 'soft_opc': nan} step=9632




2022-04-21 23:58.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:58.38 [info     ] FQE_20220421235703: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00017064879106920818, 'time_algorithm_update': 0.008635632520498232, 'loss': 0.3437831650195687, 'time_step': 0.00887844867484514, 'init_value': -10.027132034301758, 'ave_value': -12.631142648246541, 'soft_opc': nan} step=9976




2022-04-21 23:58.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:58.41 [info     ] FQE_20220421235703: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00016810727673907612, 'time_algorithm_update': 0.00863996702571248, 'loss': 0.3562758918093561, 'time_step': 0.008881755346475645, 'init_value': -10.525789260864258, 'ave_value': -13.197259760211717, 'soft_opc': nan} step=10320




2022-04-21 23:58.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:58.44 [info     ] FQE_20220421235703: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00017256930817005246, 'time_algorithm_update': 0.008645784716273463, 'loss': 0.38937997104881633, 'time_step': 0.00889240586480429, 'init_value': -10.46949577331543, 'ave_value': -13.032232383733431, 'soft_opc': nan} step=10664




2022-04-21 23:58.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:58.48 [info     ] FQE_20220421235703: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00016682854918546454, 'time_algorithm_update': 0.008438020251518073, 'loss': 0.40746190735723736, 'time_step': 0.008676268333612486, 'init_value': -11.241538047790527, 'ave_value': -13.361209334441469, 'soft_opc': nan} step=11008




2022-04-21 23:58.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:58.51 [info     ] FQE_20220421235703: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00016947749049164528, 'time_algorithm_update': 0.008676745863847955, 'loss': 0.43740694870804114, 'time_step': 0.008917340705561083, 'init_value': -11.574824333190918, 'ave_value': -13.468419781117909, 'soft_opc': nan} step=11352




2022-04-21 23:58.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:58.54 [info     ] FQE_20220421235703: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.0001687255016593046, 'time_algorithm_update': 0.008625389531601307, 'loss': 0.46798921308783425, 'time_step': 0.008868753216987433, 'init_value': -11.864961624145508, 'ave_value': -13.288902937612487, 'soft_opc': nan} step=11696




2022-04-21 23:58.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:58.57 [info     ] FQE_20220421235703: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00016750083413234976, 'time_algorithm_update': 0.008640010689580164, 'loss': 0.5193531465727489, 'time_step': 0.008879935325578202, 'init_value': -12.672752380371094, 'ave_value': -13.801444125193516, 'soft_opc': nan} step=12040




2022-04-21 23:58.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:59.01 [info     ] FQE_20220421235703: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00017130166985267816, 'time_algorithm_update': 0.008410245180130005, 'loss': 0.5502048479338978, 'time_step': 0.008655698493469593, 'init_value': -12.831503868103027, 'ave_value': -13.49791271364199, 'soft_opc': nan} step=12384




2022-04-21 23:59.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:59.04 [info     ] FQE_20220421235703: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00017494794934294944, 'time_algorithm_update': 0.008685600618983423, 'loss': 0.5870054553826015, 'time_step': 0.008937174497648727, 'init_value': -13.637845039367676, 'ave_value': -13.790640363929509, 'soft_opc': nan} step=12728




2022-04-21 23:59.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:59.07 [info     ] FQE_20220421235703: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00017099117123803428, 'time_algorithm_update': 0.008631467126136603, 'loss': 0.6389558614220817, 'time_step': 0.008880749691364377, 'init_value': -14.26724910736084, 'ave_value': -13.77359546755214, 'soft_opc': nan} step=13072




2022-04-21 23:59.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:59.10 [info     ] FQE_20220421235703: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.0001698552176009777, 'time_algorithm_update': 0.00865546839181767, 'loss': 0.6964293143679496, 'time_step': 0.008898778710254403, 'init_value': -14.843279838562012, 'ave_value': -13.939327154845536, 'soft_opc': nan} step=13416




2022-04-21 23:59.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:59.14 [info     ] FQE_20220421235703: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00016836094301800396, 'time_algorithm_update': 0.008384438448174054, 'loss': 0.7495349700573485, 'time_step': 0.008625752704088078, 'init_value': -15.416699409484863, 'ave_value': -13.66012965980288, 'soft_opc': nan} step=13760




2022-04-21 23:59.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:59.17 [info     ] FQE_20220421235703: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00017050047253453456, 'time_algorithm_update': 0.008637278579002204, 'loss': 0.7939127354644412, 'time_step': 0.008881565443305083, 'init_value': -16.43146324157715, 'ave_value': -14.162119159279056, 'soft_opc': nan} step=14104




2022-04-21 23:59.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:59.20 [info     ] FQE_20220421235703: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00017229623572770938, 'time_algorithm_update': 0.008741762748984404, 'loss': 0.8432422592304647, 'time_step': 0.008988127458927244, 'init_value': -16.763355255126953, 'ave_value': -14.236607556215798, 'soft_opc': nan} step=14448




2022-04-21 23:59.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:59.24 [info     ] FQE_20220421235703: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.0001665631005930346, 'time_algorithm_update': 0.008625085963759311, 'loss': 0.8756743287559339, 'time_step': 0.008863250183504682, 'init_value': -17.14047622680664, 'ave_value': -14.182490642014304, 'soft_opc': nan} step=14792




2022-04-21 23:59.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:59.27 [info     ] FQE_20220421235703: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001671355824137843, 'time_algorithm_update': 0.00838654124459555, 'loss': 0.9207163680518089, 'time_step': 0.00862859847933747, 'init_value': -17.624698638916016, 'ave_value': -14.213015263629993, 'soft_opc': nan} step=15136




2022-04-21 23:59.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:59.30 [info     ] FQE_20220421235703: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016729291095290074, 'time_algorithm_update': 0.008619884418886762, 'loss': 0.9396951420795779, 'time_step': 0.00886297018028969, 'init_value': -18.053571701049805, 'ave_value': -14.501883197914776, 'soft_opc': nan} step=15480




2022-04-21 23:59.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:59.33 [info     ] FQE_20220421235703: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00017051641331162563, 'time_algorithm_update': 0.008690592854521995, 'loss': 0.9777394245151257, 'time_step': 0.008934639221013979, 'init_value': -18.59817886352539, 'ave_value': -14.178439497973702, 'soft_opc': nan} step=15824




2022-04-21 23:59.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:59.37 [info     ] FQE_20220421235703: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016787994739621184, 'time_algorithm_update': 0.008661478064781012, 'loss': 1.011331558195051, 'time_step': 0.008904333031454752, 'init_value': -19.290668487548828, 'ave_value': -14.366961064766924, 'soft_opc': nan} step=16168




2022-04-21 23:59.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:59.40 [info     ] FQE_20220421235703: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.0001698690791462743, 'time_algorithm_update': 0.00920310755108678, 'loss': 1.071196720274815, 'time_step': 0.009444669235584348, 'init_value': -20.704435348510742, 'ave_value': -15.361993362762973, 'soft_opc': nan} step=16512




2022-04-21 23:59.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:59.44 [info     ] FQE_20220421235703: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00017305862071902254, 'time_algorithm_update': 0.009344453035398971, 'loss': 1.1109737045413186, 'time_step': 0.009587416122126024, 'init_value': -20.32672119140625, 'ave_value': -14.672323581199624, 'soft_opc': nan} step=16856




2022-04-21 23:59.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-21 23:59.47 [info     ] FQE_20220421235703: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.0001702856185824372, 'time_algorithm_update': 0.009284782548283422, 'loss': 1.149605385428511, 'time_step': 0.009531425875286723, 'init_value': -20.845184326171875, 'ave_value': -14.870228845384055, 'soft_opc': nan} step=17200




2022-04-21 23:59.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220421235703/model_17200.pt
most optimal hyper params for cql at this point:  [0.0029606638451320244, 0.004498437280523096, 5.296004229703394e-05, 3]
search iteration:  6
using hyper params:  [0.009504490821613022, 0.00373598696561434, 8.313477171673746e-05, 1]
2022-04-21 23:59.47 [debug    ] RoundIterator is selected.
2022-04-21 23:59.47 [info     ] Directory is created at d3rlpy_logs/CQL_20220421235947
2022-04-21 23:59.47 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-21 23:59.47 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-21 23:59.47 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220421235947/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True

  minimum = torch.tensor(
  maximum = torch.tensor(


Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:00.08 [info     ] CQL_20220421235947: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.0003143032162175702, 'time_algorithm_update': 0.05788341147362152, 'temp_loss': 4.926037649887835, 'temp': 0.9853843519453368, 'alpha_loss': -17.677838992521252, 'alpha': 1.0176939147745254, 'critic_loss': 26.25370985924164, 'actor_loss': -1.8858433954116236, 'time_step': 0.05828752683077244, 'td_error': 1.2220373348317586, 'init_value': 0.26317331194877625, 'ave_value': 0.34068480600987555} step=346
2022-04-22 00:00.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:00.29 [info     ] CQL_20220421235947: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.000320059715667901, 'time_algorithm_update': 0.05742383072141967, 'temp_loss': 4.831741860836227, 'temp': 0.9575241237361997, 'alpha_loss': -18.33974646419459, 'alpha': 1.0541508046188794, 'critic_loss': 30.261823940828357, 'actor_loss': -1.7846931263890569, 'time_step': 0.057834831965451985, 'td_error': 1.2093719166955517, 'init_value': 0.03229476138949394, 'ave_value': 0.2552370047052786} step=692
2022-04-22 00:00.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:00.51 [info     ] CQL_20220421235947: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.0003242775194906775, 'time_algorithm_update': 0.05868070180705517, 'temp_loss': 4.69987248960947, 'temp': 0.9309307472898781, 'alpha_loss': -19.009684860361794, 'alpha': 1.0924335628575672, 'critic_loss': 39.8128008539277, 'actor_loss': -1.3773009344677016, 'time_step': 0.05910142653250281, 'td_error': 1.2005847276411281, 'init_value': -0.20219334959983826, 'ave_value': 0.11089054612419005} step=1038
2022-04-22 00:00.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:01.12 [info     ] CQL_20220421235947: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.0003248494484521061, 'time_algorithm_update': 0.05886734634465565, 'temp_loss': 4.573499031838654, 'temp': 0.9053659638917515, 'alpha_loss': -19.709928931528435, 'alpha': 1.1326110821238833, 'critic_loss': 51.576344771192254, 'actor_loss': -0.8602065700456242, 'time_step': 0.059288102078299994, 'td_error': 1.2049315511778864, 'init_value': -0.68089759349823, 'ave_value': -0.2933962439412526} step=1384
2022-04-22 00:01.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:01.33 [info     ] CQL_20220421235947: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.00033347041620684497, 'time_algorithm_update': 0.05885262296378957, 'temp_loss': 4.447205361603312, 'temp': 0.8807319352737052, 'alpha_loss': -20.446849773384933, 'alpha': 1.1747236220822859, 'critic_loss': 64.88847854922962, 'actor_loss': -0.3546126179526307, 'time_step': 0.05928036174333164, 'td_error': 1.2011782784589653, 'init_value': -1.0288081169128418, 'ave_value': -0.6023429535707346} step=1730
2022-04-22 00:01.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:01.55 [info     ] CQL_20220421235947: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.00032869033041717, 'time_algorithm_update': 0.06036993189354163, 'temp_loss': 4.328486059442421, 'temp': 0.8569563658251239, 'alpha_loss': -21.21738312010131, 'alpha': 1.2188082699141751, 'critic_loss': 79.5751234506596, 'actor_loss': 0.009242908259304617, 'time_step': 0.06079316345942503, 'td_error': 1.204654544834252, 'init_value': -1.2339452505111694, 'ave_value': -0.7568973347855552} step=2076
2022-04-22 00:01.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:02.17 [info     ] CQL_20220421235947: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.00033214326538791544, 'time_algorithm_update': 0.060135861352689, 'temp_loss': 4.211239085721143, 'temp': 0.8339692152648992, 'alpha_loss': -22.015960665796534, 'alpha': 1.2649023322700765, 'critic_loss': 96.14624063127992, 'actor_loss': 0.4118531488549675, 'time_step': 0.060563755862285636, 'td_error': 1.2080303582394754, 'init_value': -1.5719480514526367, 'ave_value': -1.0737752676250358} step=2422
2022-04-22 00:02.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:02.39 [info     ] CQL_20220421235947: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.0003304240331484403, 'time_algorithm_update': 0.06061155603111135, 'temp_loss': 4.0984619449328825, 'temp': 0.8117206473915564, 'alpha_loss': -22.851999023746203, 'alpha': 1.3130266959267545, 'critic_loss': 116.51991772513858, 'actor_loss': 0.6428263940649226, 'time_step': 0.06103384563688598, 'td_error': 1.2050608040789095, 'init_value': -1.5999577045440674, 'ave_value': -1.175927868315172} step=2768
2022-04-22 00:02.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:03.01 [info     ] CQL_20220421235947: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.00032810324189290837, 'time_algorithm_update': 0.06082265501077465, 'temp_loss': 3.9899585756952365, 'temp': 0.7901530265808105, 'alpha_loss': -23.723313436342803, 'alpha': 1.3632387997787123, 'critic_loss': 144.06420631629194, 'actor_loss': 0.6156735608429578, 'time_step': 0.061243352862451805, 'td_error': 1.2024830399691624, 'init_value': -1.4261865615844727, 'ave_value': -1.0457379182787403} step=3114
2022-04-22 00:03.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:03.24 [info     ] CQL_20220421235947: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.00032832236648294965, 'time_algorithm_update': 0.06089596665663526, 'temp_loss': 3.8844385305581066, 'temp': 0.7692322176315881, 'alpha_loss': -24.631999859231055, 'alpha': 1.4155770040660924, 'critic_loss': 182.8878060930726, 'actor_loss': 0.27399642340235975, 'time_step': 0.061319353952573215, 'td_error': 1.2110123990531365, 'init_value': -1.1507881879806519, 'ave_value': -0.866442216529333} step=3460
2022-04-22 00:03.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:03.46 [info     ] CQL_20220421235947: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.00032328181184096145, 'time_algorithm_update': 0.060464018342122866, 'temp_loss': 3.7828770779460843, 'temp': 0.7489168823109886, 'alpha_loss': -25.570671632799797, 'alpha': 1.4700919124432381, 'critic_loss': 234.92364162378917, 'actor_loss': -0.3788175210895049, 'time_step': 0.06088116334352879, 'td_error': 1.2169270700064856, 'init_value': -0.4690878987312317, 'ave_value': -0.3022912750627374} step=3806
2022-04-22 00:03.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:04.08 [info     ] CQL_20220421235947: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.00032646325282278775, 'time_algorithm_update': 0.060459256172180176, 'temp_loss': 3.682864353146856, 'temp': 0.7291821043270861, 'alpha_loss': -26.551834481299956, 'alpha': 1.5268385947784247, 'critic_loss': 297.2474010665982, 'actor_loss': -1.179359983329828, 'time_step': 0.06088093250473111, 'td_error': 1.2271951007558388, 'init_value': 0.19164545834064484, 'ave_value': 0.28829050867576744} step=4152
2022-04-22 00:04.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:04.30 [info     ] CQL_20220421235947: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.00033154515172704796, 'time_algorithm_update': 0.06066629445621733, 'temp_loss': 3.5857696932864327, 'temp': 0.7100037357021618, 'alpha_loss': -27.58221353960864, 'alpha': 1.5858975400814432, 'critic_loss': 359.5475825491668, 'actor_loss': -1.7791277177071985, 'time_step': 0.06109126799368445, 'td_error': 1.2320477756656874, 'init_value': 0.7876343727111816, 'ave_value': 0.8276797245525533} step=4498
2022-04-22 00:04.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:04.52 [info     ] CQL_20220421235947: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.0003216149490003641, 'time_algorithm_update': 0.06025177550453671, 'temp_loss': 3.4916283360795477, 'temp': 0.6913539489914227, 'alpha_loss': -28.644097912518276, 'alpha': 1.6473345026115462, 'critic_loss': 415.97954334413384, 'actor_loss': -2.2737065564001226, 'time_step': 0.06067010984255399, 'td_error': 1.2337975844269928, 'init_value': 1.2406877279281616, 'ave_value': 1.2720730082126182} step=4844
2022-04-22 00:04.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:05.14 [info     ] CQL_20220421235947: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.00033452745117893107, 'time_algorithm_update': 0.06046418785359818, 'temp_loss': 3.3998711267647717, 'temp': 0.6732155363683756, 'alpha_loss': -29.755913210742047, 'alpha': 1.7112264216290733, 'critic_loss': 469.48629293276394, 'actor_loss': -2.7870692386792575, 'time_step': 0.06089350598395905, 'td_error': 1.2345122294838728, 'init_value': 1.7590627670288086, 'ave_value': 1.7784323560507618} step=5190
2022-04-22 00:05.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:05.36 [info     ] CQL_20220421235947: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.0003278827391607913, 'time_algorithm_update': 0.06026564099196065, 'temp_loss': 3.310054023141806, 'temp': 0.6555688897998346, 'alpha_loss': -30.909372489576395, 'alpha': 1.7776558495670385, 'critic_loss': 525.9138845101946, 'actor_loss': -3.2785731988146125, 'time_step': 0.06068941554582188, 'td_error': 1.2346469661195323, 'init_value': 2.3017616271972656, 'ave_value': 2.3110780173198147} step=5536
2022-04-22 00:05.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:05.57 [info     ] CQL_20220421235947: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.00032976872659142996, 'time_algorithm_update': 0.06011004737346848, 'temp_loss': 3.2243450411482355, 'temp': 0.6383945659405923, 'alpha_loss': -32.114794797290955, 'alpha': 1.8467156711341328, 'critic_loss': 596.1699433960666, 'actor_loss': -3.7526458343329456, 'time_step': 0.06053658579126259, 'td_error': 1.2348604208656353, 'init_value': 2.745957612991333, 'ave_value': 2.7540673811212995} step=5882
2022-04-22 00:05.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:06.19 [info     ] CQL_20220421235947: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.0003265404287790287, 'time_algorithm_update': 0.05976021703268062, 'temp_loss': 3.139104284992108, 'temp': 0.6216779551065037, 'alpha_loss': -33.36267195685061, 'alpha': 1.9185020441264775, 'critic_loss': 671.5238183523189, 'actor_loss': -4.205333099888929, 'time_step': 0.060181721097472086, 'td_error': 1.2355320632291127, 'init_value': 3.2479283809661865, 'ave_value': 3.2551852667258214} step=6228
2022-04-22 00:06.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:06.41 [info     ] CQL_20220421235947: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.0003293911156626795, 'time_algorithm_update': 0.06015850629420639, 'temp_loss': 3.057910898517322, 'temp': 0.6054048147160194, 'alpha_loss': -34.6608571069089, 'alpha': 1.993104226327356, 'critic_loss': 764.212022373442, 'actor_loss': -4.582960532579808, 'time_step': 0.06058436184260198, 'td_error': 1.2366284872629123, 'init_value': 3.5729920864105225, 'ave_value': 3.578723566506134} step=6574
2022-04-22 00:06.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:07.02 [info     ] CQL_20220421235947: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.00033010568232894635, 'time_algorithm_update': 0.05689207804685383, 'temp_loss': 2.977434079082026, 'temp': 0.5895626291718786, 'alpha_loss': -36.00529030154895, 'alpha': 2.0706279705025556, 'critic_loss': 862.3771480494152, 'actor_loss': -4.889803820262754, 'time_step': 0.05731581883623421, 'td_error': 1.2377634348408761, 'init_value': 3.926450252532959, 'ave_value': 3.9323654476643286} step=6920
2022-04-22 00:07.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:07.21 [info     ] CQL_20220421235947: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.0003034551708684491, 'time_algorithm_update': 0.054265015387121654, 'temp_loss': 2.8992817429448827, 'temp': 0.574138593983788, 'alpha_loss': -37.40000036823956, 'alpha': 2.1511692849197828, 'critic_loss': 972.4647959450077, 'actor_loss': -5.190252053255291, 'time_step': 0.05465606940274983, 'td_error': 1.239109807585635, 'init_value': 4.244386196136475, 'ave_value': 4.248246739429566} step=7266
2022-04-22 00:07.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:07.41 [info     ] CQL_20220421235947: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.0002979736107622268, 'time_algorithm_update': 0.05236047954228572, 'temp_loss': 2.823946437394688, 'temp': 0.5591218931482017, 'alpha_loss': -38.85483682224516, 'alpha': 2.2348563760691293, 'critic_loss': 1089.3714818348085, 'actor_loss': -5.416443966716701, 'time_step': 0.052739480327319545, 'td_error': 1.2390508328969618, 'init_value': 4.408870697021484, 'ave_value': 4.413695191794405} step=7612
2022-04-22 00:07.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:07.59 [info     ] CQL_20220421235947: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.00029172855994604916, 'time_algorithm_update': 0.05181712911308156, 'temp_loss': 2.7498644300967974, 'temp': 0.544498406049144, 'alpha_loss': -40.37326885510042, 'alpha': 2.32182001447402, 'critic_loss': 1219.343714013954, 'actor_loss': -5.616741413325933, 'time_step': 0.052188231765879375, 'td_error': 1.2412511649674804, 'init_value': 4.753681659698486, 'ave_value': 4.755896176812546} step=7958
2022-04-22 00:07.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:08.18 [info     ] CQL_20220421235947: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.0002981307189588602, 'time_algorithm_update': 0.05201837644411649, 'temp_loss': 2.6772174435543876, 'temp': 0.5302610915864823, 'alpha_loss': -41.94571087126098, 'alpha': 2.4121817012742763, 'critic_loss': 1341.8110781983833, 'actor_loss': -5.9029889644225895, 'time_step': 0.05239531966303125, 'td_error': 1.240979806502088, 'init_value': 4.948549747467041, 'ave_value': 4.953040994722847} step=8304
2022-04-22 00:08.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:08.37 [info     ] CQL_20220421235947: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.0002990802588490392, 'time_algorithm_update': 0.052479687453694426, 'temp_loss': 2.6077420201604764, 'temp': 0.5163969962583111, 'alpha_loss': -43.57726336352398, 'alpha': 2.5060624731758425, 'critic_loss': 1432.4202256395638, 'actor_loss': -6.2111256232840475, 'time_step': 0.05285847531577755, 'td_error': 1.2432459228585253, 'init_value': 5.319385051727295, 'ave_value': 5.322152870308454} step=8650
2022-04-22 00:08.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:08.57 [info     ] CQL_20220421235947: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.0003024766899946797, 'time_algorithm_update': 0.0527612639300396, 'temp_loss': 2.5398013226558707, 'temp': 0.5028937706196239, 'alpha_loss': -45.27216949903896, 'alpha': 2.6036008754906628, 'critic_loss': 1537.589470482975, 'actor_loss': -6.450740159591499, 'time_step': 0.05314639469102628, 'td_error': 1.2452923299919356, 'init_value': 5.613123893737793, 'ave_value': 5.6165973735907} step=8996
2022-04-22 00:08.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:09.16 [info     ] CQL_20220421235947: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.00030166151895688446, 'time_algorithm_update': 0.05253240552251739, 'temp_loss': 2.473084161736373, 'temp': 0.4897431757925563, 'alpha_loss': -47.03686842063948, 'alpha': 2.7049442681273974, 'critic_loss': 1686.5159075963015, 'actor_loss': -6.724964813000894, 'time_step': 0.05291478482284987, 'td_error': 1.2480400267101028, 'init_value': 5.923031806945801, 'ave_value': 5.923918053820101} step=9342
2022-04-22 00:09.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:09.34 [info     ] CQL_20220421235947: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.00029745060584448666, 'time_algorithm_update': 0.05171599002242777, 'temp_loss': 2.4089413447187127, 'temp': 0.47693955459002124, 'alpha_loss': -48.86388343193627, 'alpha': 2.8102376950269488, 'critic_loss': 1844.0580514896812, 'actor_loss': -6.950339710092269, 'time_step': 0.052093669169210974, 'td_error': 1.2478905487161531, 'init_value': 6.130000114440918, 'ave_value': 6.133610470388782} step=9688
2022-04-22 00:09.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:09.52 [info     ] CQL_20220421235947: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.0003005679632197915, 'time_algorithm_update': 0.04884026160818993, 'temp_loss': 2.3456589851765273, 'temp': 0.4644684528856608, 'alpha_loss': -50.77201730805325, 'alpha': 2.9196353744220183, 'critic_loss': 2001.057063284637, 'actor_loss': -7.2235000491831345, 'time_step': 0.04921977850743112, 'td_error': 1.2491218117576037, 'init_value': 6.35823392868042, 'ave_value': 6.360760645755607} step=10034
2022-04-22 00:09.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:10.10 [info     ] CQL_20220421235947: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.0002965052003805348, 'time_algorithm_update': 0.048986997218490334, 'temp_loss': 2.2842491396589777, 'temp': 0.45232543375106216, 'alpha_loss': -52.74678556629688, 'alpha': 3.0332999670436616, 'critic_loss': 2172.7020722317557, 'actor_loss': -7.41388962585802, 'time_step': 0.049363357483307065, 'td_error': 1.2520993196460533, 'init_value': 6.603306293487549, 'ave_value': 6.6066377202952395} step=10380
2022-04-22 00:10.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:10.28 [info     ] CQL_20220421235947: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.0002944062899991956, 'time_algorithm_update': 0.04912435250475228, 'temp_loss': 2.2241495985516235, 'temp': 0.44050014802830756, 'alpha_loss': -54.79926099391342, 'alpha': 3.151384716089061, 'critic_loss': 2366.622451341221, 'actor_loss': -7.55752197993284, 'time_step': 0.0495047817340476, 'td_error': 1.2547947458545559, 'init_value': 6.847751617431641, 'ave_value': 6.848287779983043} step=10726
2022-04-22 00:10.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:10.46 [info     ] CQL_20220421235947: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.0002991526113080151, 'time_algorithm_update': 0.049074363157239266, 'temp_loss': 2.1666466342231443, 'temp': 0.42898385769369973, 'alpha_loss': -56.93208751788718, 'alpha': 3.2740652767909055, 'critic_loss': 2535.986739494897, 'actor_loss': -7.720815449091741, 'time_step': 0.04945280028216412, 'td_error': 1.2560388582642488, 'init_value': 6.989081859588623, 'ave_value': 6.9894341256732675} step=11072
2022-04-22 00:10.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:11.03 [info     ] CQL_20220421235947: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.0002921082380879132, 'time_algorithm_update': 0.0487563113945757, 'temp_loss': 2.1095501770173883, 'temp': 0.41776913464758436, 'alpha_loss': -59.15614534940334, 'alpha': 3.401528013234883, 'critic_loss': 2669.1300069996387, 'actor_loss': -7.914452348830383, 'time_step': 0.04912910847305563, 'td_error': 1.2585117729720192, 'init_value': 7.227546215057373, 'ave_value': 7.227529645859206} step=11418
2022-04-22 00:11.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:11.21 [info     ] CQL_20220421235947: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.0002899045889088184, 'time_algorithm_update': 0.04899331255455237, 'temp_loss': 2.054896154155621, 'temp': 0.40684651337951594, 'alpha_loss': -61.45213471120493, 'alpha': 3.533952473215974, 'critic_loss': 2874.5325497313042, 'actor_loss': -8.041392410421647, 'time_step': 0.049365309621557336, 'td_error': 1.2585236976921232, 'init_value': 7.3194169998168945, 'ave_value': 7.32158997482799} step=11764
2022-04-22 00:11.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:11.40 [info     ] CQL_20220421235947: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.0003006382484656538, 'time_algorithm_update': 0.05068134503557503, 'temp_loss': 2.000741359126361, 'temp': 0.39620976494571375, 'alpha_loss': -63.84130093128006, 'alpha': 3.6715160987280697, 'critic_loss': 3088.070841706557, 'actor_loss': -8.164455240172458, 'time_step': 0.05106553865995021, 'td_error': 1.2592980363535269, 'init_value': 7.433117389678955, 'ave_value': 7.435375723198954} step=12110
2022-04-22 00:11.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:11.58 [info     ] CQL_20220421235947: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.00030023583097954016, 'time_algorithm_update': 0.05212212573586172, 'temp_loss': 1.9482961565772923, 'temp': 0.3858516090522612, 'alpha_loss': -66.33947171779037, 'alpha': 3.814450272245903, 'critic_loss': 3313.184137774341, 'actor_loss': -8.262526258567854, 'time_step': 0.052504701421439995, 'td_error': 1.2612916848611082, 'init_value': 7.580935001373291, 'ave_value': 7.582698418032564} step=12456
2022-04-22 00:11.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:12.17 [info     ] CQL_20220421235947: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.0002866666441018871, 'time_algorithm_update': 0.05203597807470774, 'temp_loss': 1.8978212103678311, 'temp': 0.3757649434439709, 'alpha_loss': -68.91928389444517, 'alpha': 3.962948166566088, 'critic_loss': 3417.823867356846, 'actor_loss': -8.415831163439448, 'time_step': 0.05240081569362927, 'td_error': 1.2649911452209157, 'init_value': 7.8489484786987305, 'ave_value': 7.848749786782117} step=12802
2022-04-22 00:12.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:12.37 [info     ] CQL_20220421235947: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.00029444143262212676, 'time_algorithm_update': 0.053307339635198514, 'temp_loss': 1.8473283920673966, 'temp': 0.365941523213607, 'alpha_loss': -71.58590094064701, 'alpha': 4.117208325104906, 'critic_loss': 3254.27601932239, 'actor_loss': -8.64408857422757, 'time_step': 0.053681634754114756, 'td_error': 1.2663962003668874, 'init_value': 8.008204460144043, 'ave_value': 8.008120621409446} step=13148
2022-04-22 00:12.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:12.56 [info     ] CQL_20220421235947: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.00029466262442528167, 'time_algorithm_update': 0.05316120144948794, 'temp_loss': 1.7996311380684031, 'temp': 0.3563770034237404, 'alpha_loss': -74.3835270611537, 'alpha': 4.277471547870967, 'critic_loss': 3034.077614844879, 'actor_loss': -8.823057469605022, 'time_step': 0.05353601819517984, 'td_error': 1.2700618242006818, 'init_value': 8.24926471710205, 'ave_value': 8.246603516144118} step=13494
2022-04-22 00:12.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:13.15 [info     ] CQL_20220421235947: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.0002961778916375485, 'time_algorithm_update': 0.05323246104179779, 'temp_loss': 1.7521529094332216, 'temp': 0.34706042758646727, 'alpha_loss': -77.27544971973221, 'alpha': 4.443999685993084, 'critic_loss': 2725.6127344032243, 'actor_loss': -9.028750822034185, 'time_step': 0.05361017395306185, 'td_error': 1.2703915312574379, 'init_value': 8.387932777404785, 'ave_value': 8.387852262828858} step=13840
2022-04-22 00:13.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:13.35 [info     ] CQL_20220421235947: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.0002966319894515021, 'time_algorithm_update': 0.053098493228758, 'temp_loss': 1.7072675307362066, 'temp': 0.3379861804619001, 'alpha_loss': -80.29090398446672, 'alpha': 4.616998089531253, 'critic_loss': 2482.1148469958002, 'actor_loss': -9.19817751404867, 'time_step': 0.05347623025750838, 'td_error': 1.2735270265444474, 'init_value': 8.617511749267578, 'ave_value': 8.616261833456807} step=14186
2022-04-22 00:13.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:13.54 [info     ] CQL_20220421235947: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.00030486018671465747, 'time_algorithm_update': 0.053628491528461435, 'temp_loss': 1.6620861977511059, 'temp': 0.3291502156526367, 'alpha_loss': -83.40752104389874, 'alpha': 4.796734137342155, 'critic_loss': 2394.9881097870757, 'actor_loss': -9.31738339683224, 'time_step': 0.05401228066813739, 'td_error': 1.2730589851913972, 'init_value': 8.685622215270996, 'ave_value': 8.6864533155775} step=14532
2022-04-22 00:13.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:14.15 [info     ] CQL_20220421235947: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.0002967167451891596, 'time_algorithm_update': 0.0571712014303042, 'temp_loss': 1.618493671017575, 'temp': 0.32054605184262885, 'alpha_loss': -86.650709097096, 'alpha': 4.983457300704338, 'critic_loss': 2305.3444231507406, 'actor_loss': -9.467675473648688, 'time_step': 0.05754889642572127, 'td_error': 1.2767114373634982, 'init_value': 8.936262130737305, 'ave_value': 8.936323906992103} step=14878
2022-04-22 00:14.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:14.36 [info     ] CQL_20220421235947: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.0002996466752421649, 'time_algorithm_update': 0.056800152525047345, 'temp_loss': 1.5761139013863712, 'temp': 0.31216764269192093, 'alpha_loss': -90.03676031917506, 'alpha': 5.177455943443872, 'critic_loss': 2056.598427480356, 'actor_loss': -9.725874603139182, 'time_step': 0.057185889668547346, 'td_error': 1.2797484263422105, 'init_value': 9.192934036254883, 'ave_value': 9.193076855472729} step=15224
2022-04-22 00:14.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:14.56 [info     ] CQL_20220421235947: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.00030502005119544234, 'time_algorithm_update': 0.05654488271371478, 'temp_loss': 1.5349901104938088, 'temp': 0.3040067867047525, 'alpha_loss': -93.5297487732992, 'alpha': 5.379011732994477, 'critic_loss': 1832.042022528676, 'actor_loss': -9.988380801470983, 'time_step': 0.05693203589819759, 'td_error': 1.2844498242349165, 'init_value': 9.514669418334961, 'ave_value': 9.513394113669976} step=15570
2022-04-22 00:14.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:15.17 [info     ] CQL_20220421235947: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.00030636580693239423, 'time_algorithm_update': 0.05675480613818747, 'temp_loss': 1.4954117460746985, 'temp': 0.2960588050026425, 'alpha_loss': -97.17366797386566, 'alpha': 5.588417047710088, 'critic_loss': 1643.967736393041, 'actor_loss': -10.23620881648422, 'time_step': 0.05714378811720479, 'td_error': 1.286625562743762, 'init_value': 9.72248649597168, 'ave_value': 9.723651859080865} step=15916
2022-04-22 00:15.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:15.38 [info     ] CQL_20220421235947: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.00029988647196334224, 'time_algorithm_update': 0.057357679212713515, 'temp_loss': 1.456075689696163, 'temp': 0.28831741937323113, 'alpha_loss': -100.96506070263813, 'alpha': 5.805986759979601, 'critic_loss': 1486.5805791072073, 'actor_loss': -10.48128239681266, 'time_step': 0.05773768259610744, 'td_error': 1.292084130725632, 'init_value': 10.029683113098145, 'ave_value': 10.02893908556291} step=16262
2022-04-22 00:15.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:15.59 [info     ] CQL_20220421235947: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.0002977034949153834, 'time_algorithm_update': 0.05691562153700459, 'temp_loss': 1.4184323876579372, 'temp': 0.2807786282087337, 'alpha_loss': -104.88543782757885, 'alpha': 6.032011762519792, 'critic_loss': 1493.5319686625046, 'actor_loss': -10.616651711436365, 'time_step': 0.05728912353515625, 'td_error': 1.2925204908680834, 'init_value': 10.125846862792969, 'ave_value': 10.127077767565218} step=16608
2022-04-22 00:15.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:16.20 [info     ] CQL_20220421235947: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.0002993365932751253, 'time_algorithm_update': 0.0572857615575625, 'temp_loss': 1.3808638660204893, 'temp': 0.2734372873354509, 'alpha_loss': -108.98584985457404, 'alpha': 6.266843292754509, 'critic_loss': 1564.404581940932, 'actor_loss': -10.770571838224555, 'time_step': 0.05766387413002852, 'td_error': 1.2952955427265134, 'init_value': 10.319000244140625, 'ave_value': 10.319501054518705} step=16954
2022-04-22 00:16.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:16.40 [info     ] CQL_20220421235947: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.00029883563863059687, 'time_algorithm_update': 0.05738934891761383, 'temp_loss': 1.3446537349265435, 'temp': 0.2662902166905431, 'alpha_loss': -113.22173873813166, 'alpha': 6.510817319671543, 'critic_loss': 1625.0898899673728, 'actor_loss': -10.933476737468919, 'time_step': 0.05776789491576267, 'td_error': 1.2973927078033525, 'init_value': 10.507728576660156, 'ave_value': 10.509345949560956} step=17300
2022-04-22 00:16.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220421235947/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.5

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 00:16.42 [info     ] FQE_20220422001641: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00015106545873435148, 'time_algorithm_update': 0.009752208928027785, 'loss': 0.008065518080144671, 'time_step': 0.00996849479445492, 'init_value': -0.3621745705604553, 'ave_value': -0.31407157749206094, 'soft_opc': nan} step=166




2022-04-22 00:16.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:16.44 [info     ] FQE_20220422001641: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.0001586747456745929, 'time_algorithm_update': 0.009748543601438224, 'loss': 0.004817259108019342, 'time_step': 0.009968002158475209, 'init_value': -0.41870683431625366, 'ave_value': -0.33727299264288163, 'soft_opc': nan} step=332




2022-04-22 00:16.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:16.46 [info     ] FQE_20220422001641: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00014964069228574453, 'time_algorithm_update': 0.009734680853694319, 'loss': 0.004049643042430012, 'time_step': 0.009953526129205543, 'init_value': -0.44229716062545776, 'ave_value': -0.3473692723793221, 'soft_opc': nan} step=498




2022-04-22 00:16.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:16.48 [info     ] FQE_20220422001641: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.0001451279743608222, 'time_algorithm_update': 0.008698158953563276, 'loss': 0.0037498300129278415, 'time_step': 0.008906690471143607, 'init_value': -0.45873045921325684, 'ave_value': -0.34597062140300466, 'soft_opc': nan} step=664




2022-04-22 00:16.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:16.49 [info     ] FQE_20220422001641: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00015232936445488986, 'time_algorithm_update': 0.009567160204232457, 'loss': 0.0034236691889341876, 'time_step': 0.009787925754685, 'init_value': -0.4703064262866974, 'ave_value': -0.34013394047044687, 'soft_opc': nan} step=830




2022-04-22 00:16.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:16.51 [info     ] FQE_20220422001641: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00015534263059317348, 'time_algorithm_update': 0.009954166699604815, 'loss': 0.003117962474570082, 'time_step': 0.010173027773937547, 'init_value': -0.5063708424568176, 'ave_value': -0.36198991236385997, 'soft_opc': nan} step=996




2022-04-22 00:16.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:16.53 [info     ] FQE_20220422001641: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015045792223459267, 'time_algorithm_update': 0.009812567607465997, 'loss': 0.002961415808692754, 'time_step': 0.010037541389465332, 'init_value': -0.5499331951141357, 'ave_value': -0.3861962848250662, 'soft_opc': nan} step=1162




2022-04-22 00:16.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:16.55 [info     ] FQE_20220422001641: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00015038180064006024, 'time_algorithm_update': 0.009278071932045811, 'loss': 0.0027690457260090276, 'time_step': 0.009491674871329802, 'init_value': -0.5543517470359802, 'ave_value': -0.3775052642886032, 'soft_opc': nan} step=1328




2022-04-22 00:16.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:16.56 [info     ] FQE_20220422001641: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.0001523336732243917, 'time_algorithm_update': 0.009656715105815106, 'loss': 0.002454108820179006, 'time_step': 0.00987142827137407, 'init_value': -0.5944649577140808, 'ave_value': -0.4021576649358345, 'soft_opc': nan} step=1494




2022-04-22 00:16.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:16.58 [info     ] FQE_20220422001641: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00014774483370493693, 'time_algorithm_update': 0.009872751063611135, 'loss': 0.002458753374248115, 'time_step': 0.010089317000055888, 'init_value': -0.6397560834884644, 'ave_value': -0.42986294726279, 'soft_opc': nan} step=1660




2022-04-22 00:16.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.00 [info     ] FQE_20220422001641: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00015059867537165262, 'time_algorithm_update': 0.009739245276853263, 'loss': 0.002386164483305695, 'time_step': 0.009955982127821589, 'init_value': -0.6849749088287354, 'ave_value': -0.46265380550679325, 'soft_opc': nan} step=1826




2022-04-22 00:17.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.02 [info     ] FQE_20220422001641: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.0001511674329458949, 'time_algorithm_update': 0.008572275380054152, 'loss': 0.002290504328844822, 'time_step': 0.008789467524333173, 'init_value': -0.6749438643455505, 'ave_value': -0.4463637019851589, 'soft_opc': nan} step=1992




2022-04-22 00:17.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.03 [info     ] FQE_20220422001641: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00015367657305246377, 'time_algorithm_update': 0.009918869259845779, 'loss': 0.002400122643574646, 'time_step': 0.010136724954628083, 'init_value': -0.745320737361908, 'ave_value': -0.5099921707030353, 'soft_opc': nan} step=2158




2022-04-22 00:17.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.05 [info     ] FQE_20220422001641: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00015535842941468022, 'time_algorithm_update': 0.009922728481062924, 'loss': 0.0025230941390029593, 'time_step': 0.010144505156091896, 'init_value': -0.78757643699646, 'ave_value': -0.5506474947506511, 'soft_opc': nan} step=2324




2022-04-22 00:17.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.07 [info     ] FQE_20220422001641: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00015161410871758518, 'time_algorithm_update': 0.009686294808445206, 'loss': 0.0024337183252111615, 'time_step': 0.009904882994042822, 'init_value': -0.8179673552513123, 'ave_value': -0.5794875612272671, 'soft_opc': nan} step=2490




2022-04-22 00:17.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.09 [info     ] FQE_20220422001641: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00015509703073156886, 'time_algorithm_update': 0.00935382584491408, 'loss': 0.0024816849316661076, 'time_step': 0.009574963385800281, 'init_value': -0.8742807507514954, 'ave_value': -0.6270910020619799, 'soft_opc': nan} step=2656




2022-04-22 00:17.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.11 [info     ] FQE_20220422001641: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00016239752252417873, 'time_algorithm_update': 0.009635026196399367, 'loss': 0.0030761598262245803, 'time_step': 0.009866162955042827, 'init_value': -0.9526021480560303, 'ave_value': -0.6799735266413237, 'soft_opc': nan} step=2822




2022-04-22 00:17.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.12 [info     ] FQE_20220422001641: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00015120621187141142, 'time_algorithm_update': 0.009966162313897926, 'loss': 0.0033576797637162752, 'time_step': 0.010184595383793474, 'init_value': -1.0022691488265991, 'ave_value': -0.7194926070703848, 'soft_opc': nan} step=2988




2022-04-22 00:17.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.14 [info     ] FQE_20220422001641: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00015759611704263342, 'time_algorithm_update': 0.009459550122180617, 'loss': 0.003917115438218981, 'time_step': 0.009684884404561606, 'init_value': -1.0027700662612915, 'ave_value': -0.7153770536339531, 'soft_opc': nan} step=3154




2022-04-22 00:17.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.16 [info     ] FQE_20220422001641: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00015625321721456138, 'time_algorithm_update': 0.009481361113398909, 'loss': 0.004109635254850678, 'time_step': 0.009706686778240893, 'init_value': -1.0166640281677246, 'ave_value': -0.723222617387587, 'soft_opc': nan} step=3320




2022-04-22 00:17.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.18 [info     ] FQE_20220422001641: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00015427261950021767, 'time_algorithm_update': 0.009245325283831861, 'loss': 0.00473333848202012, 'time_step': 0.009468498000179428, 'init_value': -1.0463550090789795, 'ave_value': -0.7389207822438497, 'soft_opc': nan} step=3486




2022-04-22 00:17.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.19 [info     ] FQE_20220422001641: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00015194732022572713, 'time_algorithm_update': 0.00987727527158806, 'loss': 0.005114219570462312, 'time_step': 0.010094073881585914, 'init_value': -1.0579047203063965, 'ave_value': -0.7471900564786215, 'soft_opc': nan} step=3652




2022-04-22 00:17.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.21 [info     ] FQE_20220422001641: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00015606937638248307, 'time_algorithm_update': 0.009689914174826748, 'loss': 0.005648524053002068, 'time_step': 0.009915337505110776, 'init_value': -1.120950698852539, 'ave_value': -0.8146049127104348, 'soft_opc': nan} step=3818




2022-04-22 00:17.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.23 [info     ] FQE_20220422001641: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00015449093048831066, 'time_algorithm_update': 0.009886951331632683, 'loss': 0.005910090099969676, 'time_step': 0.010105124439101621, 'init_value': -1.0687370300292969, 'ave_value': -0.7650334961603287, 'soft_opc': nan} step=3984




2022-04-22 00:17.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.25 [info     ] FQE_20220422001641: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00015348124216838055, 'time_algorithm_update': 0.009247173745948148, 'loss': 0.006768116174673897, 'time_step': 0.009472716285521725, 'init_value': -1.1768302917480469, 'ave_value': -0.8614015751006501, 'soft_opc': nan} step=4150




2022-04-22 00:17.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.26 [info     ] FQE_20220422001641: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.0001511056739163686, 'time_algorithm_update': 0.009534924863332725, 'loss': 0.007321366257628942, 'time_step': 0.009750940713537744, 'init_value': -1.2396800518035889, 'ave_value': -0.9080176632721313, 'soft_opc': nan} step=4316




2022-04-22 00:17.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.28 [info     ] FQE_20220422001641: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00014651683439691383, 'time_algorithm_update': 0.010073559829987675, 'loss': 0.007709461214169523, 'time_step': 0.010287948401577502, 'init_value': -1.2273688316345215, 'ave_value': -0.8923744227451919, 'soft_opc': nan} step=4482




2022-04-22 00:17.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.30 [info     ] FQE_20220422001641: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001447760915181723, 'time_algorithm_update': 0.00982888922633895, 'loss': 0.008733763630538393, 'time_step': 0.01004241891654141, 'init_value': -1.2983593940734863, 'ave_value': -0.9679886042228523, 'soft_opc': nan} step=4648




2022-04-22 00:17.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.32 [info     ] FQE_20220422001641: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00014592653297516237, 'time_algorithm_update': 0.009356698357915303, 'loss': 0.009235922557144728, 'time_step': 0.009568560554320553, 'init_value': -1.3268226385116577, 'ave_value': -0.9582820328967729, 'soft_opc': nan} step=4814




2022-04-22 00:17.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.33 [info     ] FQE_20220422001641: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00015675877950277673, 'time_algorithm_update': 0.00890571094421019, 'loss': 0.010368815398107406, 'time_step': 0.009132125291479639, 'init_value': -1.3085756301879883, 'ave_value': -0.9330983564131767, 'soft_opc': nan} step=4980




2022-04-22 00:17.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.35 [info     ] FQE_20220422001641: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00015199471669024732, 'time_algorithm_update': 0.009591160050357681, 'loss': 0.010562288124992299, 'time_step': 0.009809202458485064, 'init_value': -1.3719813823699951, 'ave_value': -1.0045309838301897, 'soft_opc': nan} step=5146




2022-04-22 00:17.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.37 [info     ] FQE_20220422001641: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00014916816389704324, 'time_algorithm_update': 0.009723794029419681, 'loss': 0.012267247564720371, 'time_step': 0.009937760341598326, 'init_value': -1.4515386819839478, 'ave_value': -1.044723215443292, 'soft_opc': nan} step=5312




2022-04-22 00:17.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.39 [info     ] FQE_20220422001641: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00014680264944053558, 'time_algorithm_update': 0.009250296167580479, 'loss': 0.012124054983864168, 'time_step': 0.009464993534317935, 'init_value': -1.4528391361236572, 'ave_value': -1.0404701145986717, 'soft_opc': nan} step=5478




2022-04-22 00:17.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.40 [info     ] FQE_20220422001641: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00015451103807931924, 'time_algorithm_update': 0.009077824741961008, 'loss': 0.013065394801676768, 'time_step': 0.00930100463959108, 'init_value': -1.5668984651565552, 'ave_value': -1.1481380648491484, 'soft_opc': nan} step=5644




2022-04-22 00:17.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.42 [info     ] FQE_20220422001641: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00015162272625658885, 'time_algorithm_update': 0.009347056768026697, 'loss': 0.013836107507463074, 'time_step': 0.009565344776015684, 'init_value': -1.5656633377075195, 'ave_value': -1.1469660344894406, 'soft_opc': nan} step=5810




2022-04-22 00:17.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.44 [info     ] FQE_20220422001641: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00015180800334516778, 'time_algorithm_update': 0.009552250425499606, 'loss': 0.014205751104661858, 'time_step': 0.009769821741494787, 'init_value': -1.5964093208312988, 'ave_value': -1.185564647389257, 'soft_opc': nan} step=5976




2022-04-22 00:17.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.45 [info     ] FQE_20220422001641: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00014785398919898342, 'time_algorithm_update': 0.0093510538698679, 'loss': 0.015081211253060925, 'time_step': 0.009565752672861857, 'init_value': -1.5053907632827759, 'ave_value': -1.0835771955105031, 'soft_opc': nan} step=6142




2022-04-22 00:17.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.47 [info     ] FQE_20220422001641: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00016001477298966372, 'time_algorithm_update': 0.009079302649900138, 'loss': 0.0158039240919738, 'time_step': 0.009309475680431688, 'init_value': -1.5511056184768677, 'ave_value': -1.1311844232072634, 'soft_opc': nan} step=6308




2022-04-22 00:17.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.49 [info     ] FQE_20220422001641: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00015406867107713078, 'time_algorithm_update': 0.009645635823169386, 'loss': 0.016986619932885586, 'time_step': 0.009865209280726421, 'init_value': -1.5762114524841309, 'ave_value': -1.173339278484183, 'soft_opc': nan} step=6474




2022-04-22 00:17.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.51 [info     ] FQE_20220422001641: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001513239849044616, 'time_algorithm_update': 0.009647807442998311, 'loss': 0.01744895726154805, 'time_step': 0.009870039411337978, 'init_value': -1.599680781364441, 'ave_value': -1.1501881794081077, 'soft_opc': nan} step=6640




2022-04-22 00:17.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.52 [info     ] FQE_20220422001641: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.0001575329217566065, 'time_algorithm_update': 0.009680348706532674, 'loss': 0.018204587010290832, 'time_step': 0.00990631494177393, 'init_value': -1.6544049978256226, 'ave_value': -1.2184247715482572, 'soft_opc': nan} step=6806




2022-04-22 00:17.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.54 [info     ] FQE_20220422001641: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00015201195176825467, 'time_algorithm_update': 0.008940090616065335, 'loss': 0.019449526997751567, 'time_step': 0.009155938424259782, 'init_value': -1.6799540519714355, 'ave_value': -1.2505272864238233, 'soft_opc': nan} step=6972




2022-04-22 00:17.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.56 [info     ] FQE_20220422001641: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00015234085450689476, 'time_algorithm_update': 0.009514428046812495, 'loss': 0.019653700913109043, 'time_step': 0.009733549083571836, 'init_value': -1.6863913536071777, 'ave_value': -1.2268741825444474, 'soft_opc': nan} step=7138




2022-04-22 00:17.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.58 [info     ] FQE_20220422001641: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.0001592707921223468, 'time_algorithm_update': 0.00977259515279747, 'loss': 0.020251261743966558, 'time_step': 0.009998265519199601, 'init_value': -1.796715497970581, 'ave_value': -1.3316273872782518, 'soft_opc': nan} step=7304




2022-04-22 00:17.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:17.59 [info     ] FQE_20220422001641: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.0001508844904152744, 'time_algorithm_update': 0.009105346289025732, 'loss': 0.02137347770411611, 'time_step': 0.009321942386856998, 'init_value': -1.6672918796539307, 'ave_value': -1.2134155662495407, 'soft_opc': nan} step=7470




2022-04-22 00:17.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:18.01 [info     ] FQE_20220422001641: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00015269560986254588, 'time_algorithm_update': 0.00912250237292554, 'loss': 0.022614481528247538, 'time_step': 0.009337953774325818, 'init_value': -1.6101093292236328, 'ave_value': -1.151888086290208, 'soft_opc': nan} step=7636




2022-04-22 00:18.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:18.03 [info     ] FQE_20220422001641: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00014535777540092008, 'time_algorithm_update': 0.008422667721667922, 'loss': 0.0230579223604424, 'time_step': 0.00863609831017184, 'init_value': -1.7056331634521484, 'ave_value': -1.2410596248605665, 'soft_opc': nan} step=7802




2022-04-22 00:18.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:18.04 [info     ] FQE_20220422001641: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00015595878463193593, 'time_algorithm_update': 0.008721835641975862, 'loss': 0.023020933192161596, 'time_step': 0.008942624172532415, 'init_value': -1.7303102016448975, 'ave_value': -1.2449513134236136, 'soft_opc': nan} step=7968




2022-04-22 00:18.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:18.06 [info     ] FQE_20220422001641: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.0001515078257365399, 'time_algorithm_update': 0.008539050458425498, 'loss': 0.024267668619117957, 'time_step': 0.008757377245340002, 'init_value': -1.8419556617736816, 'ave_value': -1.3488884110540689, 'soft_opc': nan} step=8134




2022-04-22 00:18.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:18.07 [info     ] FQE_20220422001641: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00016282409070486045, 'time_algorithm_update': 0.008732649217168969, 'loss': 0.025947040328419352, 'time_step': 0.00896479997290186, 'init_value': -1.79719877243042, 'ave_value': -1.3198138999987628, 'soft_opc': nan} step=8300




2022-04-22 00:18.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001641/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

[ 0.00000000e+00  7.95731469e+08  4.27108923e-02  1.24000047e-02
  1.42999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.34732792e-01  6.00000000e-01  3.37421461e-01]
Read chunk # 39 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.38489108e-01  4.94000047e-02
 -1.56000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -7.49080829e-02  7.04145269e-02]
Read chunk # 40 out of 4999
torch.Size([44400, 6])
2022-04-22 00:18.08 [debug    ] RoundIterator is selected.
2022-04-22 00:18.08 [info     ] Directory is created at d3rlpy_logs/FQE_20220422001808
2022-04-22 00:18.08 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 00:18.08 [debug    ] Building models...
2022-04-22 00:18.08 [debug    ] Models have been built.
2022-04-22 00:18.08 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422001808/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size':

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 00:18.11 [info     ] FQE_20220422001808: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00015301274698834087, 'time_algorithm_update': 0.008585683135099189, 'loss': 0.02478781760518634, 'time_step': 0.008808497772660366, 'init_value': -0.9300236105918884, 'ave_value': -0.9076779364532715, 'soft_opc': nan} step=344




2022-04-22 00:18.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:18.14 [info     ] FQE_20220422001808: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00015771735546200774, 'time_algorithm_update': 0.008489809757055239, 'loss': 0.021814466372868695, 'time_step': 0.00871805604114089, 'init_value': -1.736572265625, 'ave_value': -1.7037275866345243, 'soft_opc': nan} step=688




2022-04-22 00:18.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:18.17 [info     ] FQE_20220422001808: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.0001557808975840724, 'time_algorithm_update': 0.008591136960096137, 'loss': 0.02500329115211444, 'time_step': 0.008817995703497599, 'init_value': -2.7145824432373047, 'ave_value': -2.716445220281949, 'soft_opc': nan} step=1032




2022-04-22 00:18.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:18.21 [info     ] FQE_20220422001808: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.0001591201438460239, 'time_algorithm_update': 0.008694239134012266, 'loss': 0.02776406155249407, 'time_step': 0.008921932342440583, 'init_value': -3.3876051902770996, 'ave_value': -3.485118226680133, 'soft_opc': nan} step=1376




2022-04-22 00:18.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:18.24 [info     ] FQE_20220422001808: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00015490138253500295, 'time_algorithm_update': 0.008568426204282184, 'loss': 0.034533212884589166, 'time_step': 0.008792598580205164, 'init_value': -4.025655269622803, 'ave_value': -4.3513598351730955, 'soft_opc': nan} step=1720




2022-04-22 00:18.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:18.27 [info     ] FQE_20220422001808: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00015352007954619651, 'time_algorithm_update': 0.008298892614453338, 'loss': 0.041932613591576906, 'time_step': 0.008519716734109922, 'init_value': -4.370759963989258, 'ave_value': -5.033567857071086, 'soft_opc': nan} step=2064




2022-04-22 00:18.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:18.30 [info     ] FQE_20220422001808: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00015318255091822423, 'time_algorithm_update': 0.008588796438172806, 'loss': 0.05343934112001037, 'time_step': 0.00880721280741137, 'init_value': -4.740156173706055, 'ave_value': -5.799811922107731, 'soft_opc': nan} step=2408




2022-04-22 00:18.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:18.34 [info     ] FQE_20220422001808: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00015297740004783454, 'time_algorithm_update': 0.008613000775492468, 'loss': 0.06549504144539581, 'time_step': 0.008834725202516068, 'init_value': -4.788619041442871, 'ave_value': -6.3994741942297235, 'soft_opc': nan} step=2752




2022-04-22 00:18.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:18.37 [info     ] FQE_20220422001808: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00015540247739747514, 'time_algorithm_update': 0.008562885051549868, 'loss': 0.07407042823818534, 'time_step': 0.008789474880972575, 'init_value': -5.092874050140381, 'ave_value': -7.205359782748395, 'soft_opc': nan} step=3096




2022-04-22 00:18.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:18.40 [info     ] FQE_20220422001808: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00015073182970978493, 'time_algorithm_update': 0.008243925349656926, 'loss': 0.08922031987458467, 'time_step': 0.008461893297905145, 'init_value': -5.529815673828125, 'ave_value': -8.212254728793024, 'soft_opc': nan} step=3440




2022-04-22 00:18.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:18.43 [info     ] FQE_20220422001808: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.000153383543325025, 'time_algorithm_update': 0.00846362737722175, 'loss': 0.09534795234672898, 'time_step': 0.008685915276061657, 'init_value': -5.56558895111084, 'ave_value': -8.7480355771141, 'soft_opc': nan} step=3784




2022-04-22 00:18.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:18.46 [info     ] FQE_20220422001808: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.0001537335473437642, 'time_algorithm_update': 0.008484019096507582, 'loss': 0.10676119234521202, 'time_step': 0.008705089951670446, 'init_value': -5.852187156677246, 'ave_value': -9.525153662895297, 'soft_opc': nan} step=4128




2022-04-22 00:18.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:18.50 [info     ] FQE_20220422001808: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00015323037324949752, 'time_algorithm_update': 0.008526894242264504, 'loss': 0.11520447004803044, 'time_step': 0.0087497518506161, 'init_value': -5.887312412261963, 'ave_value': -10.126670151109899, 'soft_opc': nan} step=4472




2022-04-22 00:18.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:18.53 [info     ] FQE_20220422001808: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001528658146081969, 'time_algorithm_update': 0.008302091859107795, 'loss': 0.12965012576734258, 'time_step': 0.008520359909811686, 'init_value': -5.978265285491943, 'ave_value': -10.830330637477019, 'soft_opc': nan} step=4816




2022-04-22 00:18.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:18.56 [info     ] FQE_20220422001808: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00015449454618054768, 'time_algorithm_update': 0.008591495281042055, 'loss': 0.1427376211707502, 'time_step': 0.008816432814265406, 'init_value': -6.26508092880249, 'ave_value': -11.550038095380007, 'soft_opc': nan} step=5160




2022-04-22 00:18.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:18.59 [info     ] FQE_20220422001808: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.0001508163851360942, 'time_algorithm_update': 0.008269710596217665, 'loss': 0.1528466132924307, 'time_step': 0.00848663615625958, 'init_value': -6.54844856262207, 'ave_value': -12.291827594314446, 'soft_opc': nan} step=5504




2022-04-22 00:18.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:19.02 [info     ] FQE_20220422001808: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00015094391135282294, 'time_algorithm_update': 0.00852917100107947, 'loss': 0.16800081028148187, 'time_step': 0.008746686369873757, 'init_value': -6.554108619689941, 'ave_value': -12.743357664731285, 'soft_opc': nan} step=5848




2022-04-22 00:19.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:19.06 [info     ] FQE_20220422001808: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00015176520791164663, 'time_algorithm_update': 0.008247220932051193, 'loss': 0.1777998968352412, 'time_step': 0.00846348806869152, 'init_value': -6.724570274353027, 'ave_value': -13.191130409191723, 'soft_opc': nan} step=6192




2022-04-22 00:19.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:19.09 [info     ] FQE_20220422001808: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00015055093654366426, 'time_algorithm_update': 0.008474554433379062, 'loss': 0.1872267879296614, 'time_step': 0.008690406416737757, 'init_value': -6.896589279174805, 'ave_value': -13.618976859653658, 'soft_opc': nan} step=6536




2022-04-22 00:19.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:19.12 [info     ] FQE_20220422001808: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00015524445578109388, 'time_algorithm_update': 0.008496497952660848, 'loss': 0.2000967226922512, 'time_step': 0.008716852859009144, 'init_value': -7.214921474456787, 'ave_value': -14.190561181471288, 'soft_opc': nan} step=6880




2022-04-22 00:19.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:19.15 [info     ] FQE_20220422001808: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.0001508829205535179, 'time_algorithm_update': 0.008465694826702739, 'loss': 0.220988042295239, 'time_step': 0.008682908013809559, 'init_value': -7.5670294761657715, 'ave_value': -14.812247093438867, 'soft_opc': nan} step=7224




2022-04-22 00:19.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:19.18 [info     ] FQE_20220422001808: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00015774507855260096, 'time_algorithm_update': 0.008322432983753294, 'loss': 0.23672344741346532, 'time_step': 0.008545883173166319, 'init_value': -7.746616840362549, 'ave_value': -15.205744593327095, 'soft_opc': nan} step=7568




2022-04-22 00:19.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:19.22 [info     ] FQE_20220422001808: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00015216095502986463, 'time_algorithm_update': 0.008490390555803166, 'loss': 0.24550417048834958, 'time_step': 0.008710935365322024, 'init_value': -8.143956184387207, 'ave_value': -15.94442634813389, 'soft_opc': nan} step=7912




2022-04-22 00:19.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:19.25 [info     ] FQE_20220422001808: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00016179611516553303, 'time_algorithm_update': 0.00855098283568094, 'loss': 0.26334478537183864, 'time_step': 0.008778364159340081, 'init_value': -8.190136909484863, 'ave_value': -16.140845992547032, 'soft_opc': nan} step=8256




2022-04-22 00:19.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:19.28 [info     ] FQE_20220422001808: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00015224897584249807, 'time_algorithm_update': 0.008314116749652597, 'loss': 0.2693927559330193, 'time_step': 0.008533126393029855, 'init_value': -8.562164306640625, 'ave_value': -16.631075199317085, 'soft_opc': nan} step=8600




2022-04-22 00:19.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:19.31 [info     ] FQE_20220422001808: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00015984856805136038, 'time_algorithm_update': 0.008337413848832596, 'loss': 0.277474433654746, 'time_step': 0.008564121501390324, 'init_value': -9.056868553161621, 'ave_value': -17.22074494766793, 'soft_opc': nan} step=8944




2022-04-22 00:19.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:19.34 [info     ] FQE_20220422001808: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.0001568808112033578, 'time_algorithm_update': 0.008562055438063865, 'loss': 0.28735660358743614, 'time_step': 0.008788607148237006, 'init_value': -8.950765609741211, 'ave_value': -17.201819049395716, 'soft_opc': nan} step=9288




2022-04-22 00:19.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:19.38 [info     ] FQE_20220422001808: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.0001549644525661025, 'time_algorithm_update': 0.00853651623393214, 'loss': 0.294643824356933, 'time_step': 0.008760049592616946, 'init_value': -9.600292205810547, 'ave_value': -17.90731525337911, 'soft_opc': nan} step=9632




2022-04-22 00:19.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:19.41 [info     ] FQE_20220422001808: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00015212214270303415, 'time_algorithm_update': 0.008551839479180269, 'loss': 0.2942250582313728, 'time_step': 0.008775665316470834, 'init_value': -9.785530090332031, 'ave_value': -18.23351617160144, 'soft_opc': nan} step=9976




2022-04-22 00:19.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:19.44 [info     ] FQE_20220422001808: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00015442246614500533, 'time_algorithm_update': 0.008291399063065995, 'loss': 0.30295701232348937, 'time_step': 0.00851591659146686, 'init_value': -10.019100189208984, 'ave_value': -18.713088492338276, 'soft_opc': nan} step=10320




2022-04-22 00:19.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:19.47 [info     ] FQE_20220422001808: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.0001530661139377328, 'time_algorithm_update': 0.008559595013773718, 'loss': 0.3052817357075942, 'time_step': 0.008782648762991263, 'init_value': -10.203632354736328, 'ave_value': -19.029357122296105, 'soft_opc': nan} step=10664




2022-04-22 00:19.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:19.50 [info     ] FQE_20220422001808: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00015251303828039834, 'time_algorithm_update': 0.008427867362665575, 'loss': 0.3069228733575708, 'time_step': 0.008645188669825709, 'init_value': -10.339454650878906, 'ave_value': -19.245621256918966, 'soft_opc': nan} step=11008




2022-04-22 00:19.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:19.54 [info     ] FQE_20220422001808: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00015128213305806005, 'time_algorithm_update': 0.00847207598908003, 'loss': 0.30507415494621665, 'time_step': 0.008692008811374044, 'init_value': -10.531729698181152, 'ave_value': -19.526133613607705, 'soft_opc': nan} step=11352




2022-04-22 00:19.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:19.57 [info     ] FQE_20220422001808: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00015333294868469238, 'time_algorithm_update': 0.008230738168539004, 'loss': 0.31234913510487006, 'time_step': 0.00845236764397732, 'init_value': -10.809990882873535, 'ave_value': -19.609148252385967, 'soft_opc': nan} step=11696




2022-04-22 00:19.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:20.00 [info     ] FQE_20220422001808: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00015316245167754417, 'time_algorithm_update': 0.008469715367916018, 'loss': 0.3157283785633829, 'time_step': 0.008690365525179131, 'init_value': -11.025894165039062, 'ave_value': -19.86129740616556, 'soft_opc': nan} step=12040




2022-04-22 00:20.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:20.03 [info     ] FQE_20220422001808: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00015276254609573718, 'time_algorithm_update': 0.008461075466732646, 'loss': 0.33166824439865383, 'time_step': 0.008679606886797173, 'init_value': -11.14999008178711, 'ave_value': -20.065272659366542, 'soft_opc': nan} step=12384




2022-04-22 00:20.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:20.06 [info     ] FQE_20220422001808: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.0001550656418467677, 'time_algorithm_update': 0.008457714735075485, 'loss': 0.3336508995501411, 'time_step': 0.008681378392286079, 'init_value': -11.5394926071167, 'ave_value': -20.488489021452015, 'soft_opc': nan} step=12728




2022-04-22 00:20.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:20.10 [info     ] FQE_20220422001808: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.0001504088557043741, 'time_algorithm_update': 0.008230739554693533, 'loss': 0.33894685862672536, 'time_step': 0.008447319269180298, 'init_value': -11.933629035949707, 'ave_value': -20.874160171420783, 'soft_opc': nan} step=13072




2022-04-22 00:20.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:20.13 [info     ] FQE_20220422001808: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00015190174413281818, 'time_algorithm_update': 0.008458181176074716, 'loss': 0.337151319784827, 'time_step': 0.008683235146278558, 'init_value': -11.43067455291748, 'ave_value': -20.459680839219637, 'soft_opc': nan} step=13416




2022-04-22 00:20.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:20.16 [info     ] FQE_20220422001808: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.0001508461874584819, 'time_algorithm_update': 0.00846505650254183, 'loss': 0.3472648369754816, 'time_step': 0.008686585481776747, 'init_value': -12.037086486816406, 'ave_value': -20.930861156650394, 'soft_opc': nan} step=13760




2022-04-22 00:20.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:20.19 [info     ] FQE_20220422001808: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00015059390733408373, 'time_algorithm_update': 0.00844478399254555, 'loss': 0.3520655989251641, 'time_step': 0.00865997547327086, 'init_value': -12.589753150939941, 'ave_value': -21.40286302499595, 'soft_opc': nan} step=14104




2022-04-22 00:20.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:20.22 [info     ] FQE_20220422001808: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00015480851018151573, 'time_algorithm_update': 0.008277672667836035, 'loss': 0.3693170525420587, 'time_step': 0.008500182351400687, 'init_value': -12.69406509399414, 'ave_value': -21.42232629859417, 'soft_opc': nan} step=14448




2022-04-22 00:20.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:20.25 [info     ] FQE_20220422001808: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00015366285346275153, 'time_algorithm_update': 0.008456921161607255, 'loss': 0.3762597651275005, 'time_step': 0.008676440909851429, 'init_value': -12.94306755065918, 'ave_value': -21.7028461904432, 'soft_opc': nan} step=14792




2022-04-22 00:20.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:20.29 [info     ] FQE_20220422001808: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001515662947366404, 'time_algorithm_update': 0.008490162533383036, 'loss': 0.39546347467152965, 'time_step': 0.008709599112355432, 'init_value': -13.510316848754883, 'ave_value': -21.913475551065698, 'soft_opc': nan} step=15136




2022-04-22 00:20.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:20.32 [info     ] FQE_20220422001808: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00015231343202812728, 'time_algorithm_update': 0.00849116610926251, 'loss': 0.40250987255523457, 'time_step': 0.008715187394341756, 'init_value': -13.6812744140625, 'ave_value': -22.098679534522063, 'soft_opc': nan} step=15480




2022-04-22 00:20.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:20.35 [info     ] FQE_20220422001808: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00015307858932849972, 'time_algorithm_update': 0.008244344661402147, 'loss': 0.4195190429021496, 'time_step': 0.008467634749966998, 'init_value': -14.053890228271484, 'ave_value': -22.473476672008438, 'soft_opc': nan} step=15824




2022-04-22 00:20.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:20.38 [info     ] FQE_20220422001808: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00014878289644108263, 'time_algorithm_update': 0.008398158605708632, 'loss': 0.42158760085192964, 'time_step': 0.008614063262939453, 'init_value': -14.177695274353027, 'ave_value': -22.514490612810647, 'soft_opc': nan} step=16168




2022-04-22 00:20.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:20.41 [info     ] FQE_20220422001808: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00015584673992423125, 'time_algorithm_update': 0.008438589961029763, 'loss': 0.43390105017136005, 'time_step': 0.008664250373840332, 'init_value': -14.884897232055664, 'ave_value': -23.01518089289175, 'soft_opc': nan} step=16512




2022-04-22 00:20.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:20.45 [info     ] FQE_20220422001808: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00015420483988384868, 'time_algorithm_update': 0.00847927636878435, 'loss': 0.4540296540877154, 'time_step': 0.008699842663698418, 'init_value': -14.729531288146973, 'ave_value': -22.850830193856094, 'soft_opc': nan} step=16856




2022-04-22 00:20.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:20.48 [info     ] FQE_20220422001808: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.0001489228980485783, 'time_algorithm_update': 0.008135923119478448, 'loss': 0.4679750151103796, 'time_step': 0.008350324492121851, 'init_value': -15.117456436157227, 'ave_value': -23.247993574855297, 'soft_opc': nan} step=17200




2022-04-22 00:20.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422001808/model_17200.pt
search iteration:  7
using hyper params:  [0.008779887825762032, 0.00533065143117945, 3.190651276054116e-05, 1]
2022-04-22 00:20.48 [debug    ] RoundIterator is selected.
2022-04-22 00:20.48 [info     ] Directory is created at d3rlpy_logs/CQL_20220422002048
2022-04-22 00:20.48 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 00:20.48 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 00:20.48 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422002048/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.008779887825762032, 'actor_optim_factory': {'optim_c

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:21.07 [info     ] CQL_20220422002048: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.00029491551349617844, 'time_algorithm_update': 0.05207331332168138, 'temp_loss': 4.980164070349897, 'temp': 0.9943759088226826, 'alpha_loss': -17.694192434321938, 'alpha': 1.0176902833701558, 'critic_loss': 26.176532276793022, 'actor_loss': -2.0507277894571336, 'time_step': 0.052448737138957646, 'td_error': 1.2192599736871255, 'init_value': 0.19844110310077667, 'ave_value': 0.32283794947777555} step=346
2022-04-22 00:21.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:21.26 [info     ] CQL_20220422002048: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.0003000601178648844, 'time_algorithm_update': 0.052591966066746355, 'temp_loss': 4.963304219218347, 'temp': 0.9834267417819513, 'alpha_loss': -18.337901528860105, 'alpha': 1.0541168516770953, 'critic_loss': 31.304473579274436, 'actor_loss': -1.8945598795234813, 'time_step': 0.05297547062008367, 'td_error': 1.207925708060858, 'init_value': 0.10801005363464355, 'ave_value': 0.3567330407889294} step=692
2022-04-22 00:21.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:21.45 [info     ] CQL_20220422002048: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.00029975072496888266, 'time_algorithm_update': 0.05233070960623681, 'temp_loss': 4.909382825641963, 'temp': 0.9727013617581715, 'alpha_loss': -19.005597395703973, 'alpha': 1.0923785205521335, 'critic_loss': 40.804499576546554, 'actor_loss': -1.5828906549194646, 'time_step': 0.05271484052514754, 'td_error': 1.2110556537882804, 'init_value': -0.2641635239124298, 'ave_value': 0.03850037187210696} step=1038
2022-04-22 00:21.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:22.04 [info     ] CQL_20220422002048: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.0003046968768786833, 'time_algorithm_update': 0.053093642857722466, 'temp_loss': 4.858999917961959, 'temp': 0.9621443686457728, 'alpha_loss': -19.70569715885758, 'alpha': 1.1325448062378547, 'critic_loss': 52.95894453015631, 'actor_loss': -1.151990123222329, 'time_step': 0.05348538112089124, 'td_error': 1.2074202301608352, 'init_value': -0.4992435872554779, 'ave_value': -0.06342525413359665} step=1384
2022-04-22 00:22.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:22.24 [info     ] CQL_20220422002048: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.0002977496626749204, 'time_algorithm_update': 0.05353813295419506, 'temp_loss': 4.8050929742052375, 'temp': 0.9517437359156636, 'alpha_loss': -20.43840120017873, 'alpha': 1.1746443637533683, 'critic_loss': 67.24742849713805, 'actor_loss': -0.7185572700607294, 'time_step': 0.053921507962177255, 'td_error': 1.215173183818365, 'init_value': -0.9770001769065857, 'ave_value': -0.4012330783358848} step=1730
2022-04-22 00:22.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:22.44 [info     ] CQL_20220422002048: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.00030430824081332695, 'time_algorithm_update': 0.054653007171057554, 'temp_loss': 4.754206824164859, 'temp': 0.9414897321621117, 'alpha_loss': -21.2106021759827, 'alpha': 1.2187199130912736, 'critic_loss': 83.34405876997579, 'actor_loss': -0.26601539859063705, 'time_step': 0.055042239971932645, 'td_error': 1.2158712661765676, 'init_value': -1.3981804847717285, 'ave_value': -0.9287060074653033} step=2076
2022-04-22 00:22.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:23.04 [info     ] CQL_20220422002048: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.00029692828999778437, 'time_algorithm_update': 0.05671118518520642, 'temp_loss': 4.703071887782543, 'temp': 0.9313709730702329, 'alpha_loss': -22.005690938475503, 'alpha': 1.2647988903040142, 'critic_loss': 102.41164131385054, 'actor_loss': 0.02505643988792607, 'time_step': 0.05709310969865391, 'td_error': 1.2178003657740444, 'init_value': -1.5787909030914307, 'ave_value': -1.0049405726410303} step=2422
2022-04-22 00:23.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:23.25 [info     ] CQL_20220422002048: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.0002966313003804642, 'time_algorithm_update': 0.05670870521854114, 'temp_loss': 4.653528624187315, 'temp': 0.921381640296451, 'alpha_loss': -22.841522183721466, 'alpha': 1.312916826995122, 'critic_loss': 127.9968702062706, 'actor_loss': 0.04786044305911815, 'time_step': 0.057090059870240316, 'td_error': 1.2218724095586042, 'init_value': -1.431746482849121, 'ave_value': -0.9768052470294035} step=2768
2022-04-22 00:23.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:23.45 [info     ] CQL_20220422002048: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.00030449428999355075, 'time_algorithm_update': 0.05638495001489716, 'temp_loss': 4.60386928106319, 'temp': 0.9115162340202773, 'alpha_loss': -23.71186559599948, 'alpha': 1.363118153431512, 'critic_loss': 167.75864555932193, 'actor_loss': -0.4195938330961962, 'time_step': 0.056773686684625, 'td_error': 1.2094605806699619, 'init_value': -0.8221631050109863, 'ave_value': -0.45716418425524724} step=3114
2022-04-22 00:23.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:24.06 [info     ] CQL_20220422002048: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.0002987329670459549, 'time_algorithm_update': 0.05628418715703005, 'temp_loss': 4.554124749464796, 'temp': 0.9017706473783261, 'alpha_loss': -24.614856135638462, 'alpha': 1.415445110701412, 'critic_loss': 222.21584915425737, 'actor_loss': -1.1194358531794797, 'time_step': 0.05666784537320881, 'td_error': 1.218040418113572, 'init_value': -0.19158999621868134, 'ave_value': 0.04876168822722166} step=3460
2022-04-22 00:24.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:24.26 [info     ] CQL_20220422002048: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.0002971798009266054, 'time_algorithm_update': 0.05644484338043742, 'temp_loss': 4.50586320623497, 'temp': 0.8921391721061199, 'alpha_loss': -25.563271213818147, 'alpha': 1.469951578647415, 'critic_loss': 280.65577124446804, 'actor_loss': -1.7238572922056121, 'time_step': 0.056827108984048656, 'td_error': 1.2231489784938503, 'init_value': 0.3281159996986389, 'ave_value': 0.487535483507027} step=3806
2022-04-22 00:24.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:24.47 [info     ] CQL_20220422002048: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.0002905977943729114, 'time_algorithm_update': 0.055788575569329237, 'temp_loss': 4.4577384422280195, 'temp': 0.8826187611315292, 'alpha_loss': -26.549728691233376, 'alpha': 1.5267074152913396, 'critic_loss': 334.76498589488125, 'actor_loss': -2.3020028757911195, 'time_step': 0.05616182192212584, 'td_error': 1.2301662237166613, 'init_value': 0.87384033203125, 'ave_value': 0.9596320060770226} step=4152
2022-04-22 00:24.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:25.07 [info     ] CQL_20220422002048: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.00030337179327286735, 'time_algorithm_update': 0.056750159732179145, 'temp_loss': 4.409576814298685, 'temp': 0.8732081496646639, 'alpha_loss': -27.574199885991266, 'alpha': 1.5857725139987262, 'critic_loss': 388.18108980917515, 'actor_loss': -2.829474508417824, 'time_step': 0.05714058531502079, 'td_error': 1.2320550953552494, 'init_value': 1.3825786113739014, 'ave_value': 1.4351244928826647} step=4498
2022-04-22 00:25.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:25.28 [info     ] CQL_20220422002048: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.0003018765091206986, 'time_algorithm_update': 0.05623039551553009, 'temp_loss': 4.362910416773978, 'temp': 0.8639018499437784, 'alpha_loss': -28.64404517377732, 'alpha': 1.6472112567438555, 'critic_loss': 444.0937234514711, 'actor_loss': -3.354580339668803, 'time_step': 0.05661530921913985, 'td_error': 1.2332701733731937, 'init_value': 1.8412489891052246, 'ave_value': 1.8751027614316311} step=4844
2022-04-22 00:25.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:25.49 [info     ] CQL_20220422002048: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.0003032704998303011, 'time_algorithm_update': 0.05655779177053815, 'temp_loss': 4.315695639979633, 'temp': 0.8546996474955123, 'alpha_loss': -29.756365572096986, 'alpha': 1.7111111913802306, 'critic_loss': 510.5642913641957, 'actor_loss': -3.8261158714404684, 'time_step': 0.056946954975238426, 'td_error': 1.234257528370664, 'init_value': 2.3529133796691895, 'ave_value': 2.3732301172909436} step=5190
2022-04-22 00:25.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:26.09 [info     ] CQL_20220422002048: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.00030809261895328586, 'time_algorithm_update': 0.056924536049021465, 'temp_loss': 4.269072265294246, 'temp': 0.8455987810399491, 'alpha_loss': -30.908955436221436, 'alpha': 1.7775474507684652, 'critic_loss': 586.2825299742594, 'actor_loss': -4.265392609414338, 'time_step': 0.05731781714224402, 'td_error': 1.2368122712655512, 'init_value': 2.8567490577697754, 'ave_value': 2.865395838494208} step=5536
2022-04-22 00:26.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:26.29 [info     ] CQL_20220422002048: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.0002979749889043025, 'time_algorithm_update': 0.053608440250330576, 'temp_loss': 4.224705913852405, 'temp': 0.8365970252221719, 'alpha_loss': -32.11155155490589, 'alpha': 1.8466048364694407, 'critic_loss': 677.0227153094518, 'actor_loss': -4.627358907909063, 'time_step': 0.05399354551568886, 'td_error': 1.2366214044096402, 'init_value': 3.1762454509735107, 'ave_value': 3.189045490027039} step=5882
2022-04-22 00:26.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:26.48 [info     ] CQL_20220422002048: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.00029694482770269316, 'time_algorithm_update': 0.053111045346783764, 'temp_loss': 4.1804103382750055, 'temp': 0.8276893739410908, 'alpha_loss': -33.36035307294372, 'alpha': 1.9183858550352857, 'critic_loss': 777.8483047044346, 'actor_loss': -4.964130400233186, 'time_step': 0.05349482415039415, 'td_error': 1.2362537518001868, 'init_value': 3.4711921215057373, 'ave_value': 3.4871006063286756} step=6228
2022-04-22 00:26.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:27.07 [info     ] CQL_20220422002048: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.0002979543167731665, 'time_algorithm_update': 0.05300157469821114, 'temp_loss': 4.135371391483814, 'temp': 0.8188796573980696, 'alpha_loss': -34.653949274493094, 'alpha': 1.992980504311578, 'critic_loss': 890.374724459786, 'actor_loss': -5.31650287154093, 'time_step': 0.05338468785919895, 'td_error': 1.2372062104321366, 'init_value': 3.8297951221466064, 'ave_value': 3.843407476213997} step=6574
2022-04-22 00:27.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:27.27 [info     ] CQL_20220422002048: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.0003013066473723836, 'time_algorithm_update': 0.053057714004737105, 'temp_loss': 4.091003291179679, 'temp': 0.8101646660724816, 'alpha_loss': -36.00222819113318, 'alpha': 2.070496011331591, 'critic_loss': 1023.0092256579096, 'actor_loss': -5.577307641850731, 'time_step': 0.053446647748781766, 'td_error': 1.2385449944437297, 'init_value': 4.092054843902588, 'ave_value': 4.103580778591324} step=6920
2022-04-22 00:27.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:27.45 [info     ] CQL_20220422002048: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.000295292435353891, 'time_algorithm_update': 0.05191322213652506, 'temp_loss': 4.0473899193581815, 'temp': 0.8015441339829065, 'alpha_loss': -37.40284113801284, 'alpha': 2.1510424393449905, 'critic_loss': 1159.1042113552203, 'actor_loss': -5.849681229949686, 'time_step': 0.052290768292597954, 'td_error': 1.239494973909223, 'init_value': 4.4008870124816895, 'ave_value': 4.411990692130533} step=7266
2022-04-22 00:27.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:28.04 [info     ] CQL_20220422002048: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.00029372066431651914, 'time_algorithm_update': 0.05146696250562723, 'temp_loss': 4.004871703985798, 'temp': 0.793014449363499, 'alpha_loss': -38.86076808113583, 'alpha': 2.2347403847413254, 'critic_loss': 1314.1010244733336, 'actor_loss': -6.084462072118859, 'time_step': 0.0518380899649824, 'td_error': 1.2393417136778062, 'init_value': 4.561151027679443, 'ave_value': 4.57461149442699} step=7612
2022-04-22 00:28.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:28.23 [info     ] CQL_20220422002048: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.0003024484380821272, 'time_algorithm_update': 0.051388478692556395, 'temp_loss': 3.9621308649206437, 'temp': 0.7845760149762809, 'alpha_loss': -40.372279547542504, 'alpha': 2.3217057981932094, 'critic_loss': 1490.81762413069, 'actor_loss': -6.273951602119931, 'time_step': 0.051771199772123654, 'td_error': 1.2420016982977524, 'init_value': 4.843994140625, 'ave_value': 4.849421597499314} step=7958
2022-04-22 00:28.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:28.41 [info     ] CQL_20220422002048: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.000295631458304521, 'time_algorithm_update': 0.05125910973962332, 'temp_loss': 3.919577738453198, 'temp': 0.7762282725014439, 'alpha_loss': -41.940975542013355, 'alpha': 2.4120558183317238, 'critic_loss': 1684.8541785444138, 'actor_loss': -6.411270130576426, 'time_step': 0.0516342372563533, 'td_error': 1.2411827614552897, 'init_value': 4.965157985687256, 'ave_value': 4.978566665047998} step=8304
2022-04-22 00:28.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:29.00 [info     ] CQL_20220422002048: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.0002993469293406933, 'time_algorithm_update': 0.0514737333176453, 'temp_loss': 3.877211572806959, 'temp': 0.7679703383776494, 'alpha_loss': -43.57791674619465, 'alpha': 2.5059329009469535, 'critic_loss': 1876.1780586022173, 'actor_loss': -6.599911871673055, 'time_step': 0.05185284573218726, 'td_error': 1.2423302303452548, 'init_value': 5.101680755615234, 'ave_value': 5.113051315143019} step=8650
2022-04-22 00:29.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:29.18 [info     ] CQL_20220422002048: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.00029690141622730763, 'time_algorithm_update': 0.050015772009171504, 'temp_loss': 3.8370401135758856, 'temp': 0.7597995669166476, 'alpha_loss': -45.270392676998426, 'alpha': 2.6034661951781697, 'critic_loss': 2100.2312128144195, 'actor_loss': -6.73859935275392, 'time_step': 0.05039574438436872, 'td_error': 1.241979142594632, 'init_value': 5.182157516479492, 'ave_value': 5.197374274030914} step=8996
2022-04-22 00:29.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:29.36 [info     ] CQL_20220422002048: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.0002905454249740336, 'time_algorithm_update': 0.04851430620072205, 'temp_loss': 3.796331050768064, 'temp': 0.751715257505461, 'alpha_loss': -47.03299169595531, 'alpha': 2.7047914431963354, 'critic_loss': 2340.774769689306, 'actor_loss': -6.728866311167017, 'time_step': 0.048884238121826525, 'td_error': 1.2428157192924005, 'init_value': 5.153711795806885, 'ave_value': 5.164221712603508} step=9342
2022-04-22 00:29.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:29.53 [info     ] CQL_20220422002048: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.00028743840366429675, 'time_algorithm_update': 0.04850680221711969, 'temp_loss': 3.755754816049785, 'temp': 0.7437169260716852, 'alpha_loss': -48.863793885776765, 'alpha': 2.8100845338292206, 'critic_loss': 2561.3913778845285, 'actor_loss': -6.672487439447745, 'time_step': 0.048874688286312744, 'td_error': 1.242615993921416, 'init_value': 5.12034273147583, 'ave_value': 5.128875536396357} step=9688
2022-04-22 00:29.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:30.11 [info     ] CQL_20220422002048: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.0002916121069406498, 'time_algorithm_update': 0.04846588862424641, 'temp_loss': 3.7159252194310888, 'temp': 0.7358041698532987, 'alpha_loss': -50.76815261179312, 'alpha': 2.9194795127548923, 'critic_loss': 2845.469078813674, 'actor_loss': -6.633934209801558, 'time_step': 0.04883758525627886, 'td_error': 1.2430397821700927, 'init_value': 5.14531946182251, 'ave_value': 5.152565629044773} step=10034
2022-04-22 00:30.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:30.29 [info     ] CQL_20220422002048: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.00028911698071253783, 'time_algorithm_update': 0.04876912742680897, 'temp_loss': 3.6767757834726678, 'temp': 0.7279743924306307, 'alpha_loss': -52.74005259254764, 'alpha': 3.033134935219164, 'critic_loss': 3161.055250575777, 'actor_loss': -6.533721068001896, 'time_step': 0.0491377756085699, 'td_error': 1.242650456402418, 'init_value': 5.003244876861572, 'ave_value': 5.010339475480682} step=10380
2022-04-22 00:30.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:30.48 [info     ] CQL_20220422002048: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.00029600424573600635, 'time_algorithm_update': 0.05206722675720391, 'temp_loss': 3.636799529797769, 'temp': 0.7202300050355106, 'alpha_loss': -54.79439596495877, 'alpha': 3.1512109902552785, 'critic_loss': 3467.3862671604047, 'actor_loss': -6.39277902366109, 'time_step': 0.05244353327447968, 'td_error': 1.2429639167099351, 'init_value': 4.948020935058594, 'ave_value': 4.950584689719578} step=10726
2022-04-22 00:30.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:31.07 [info     ] CQL_20220422002048: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.000296190983987268, 'time_algorithm_update': 0.052343092212787254, 'temp_loss': 3.5973202713652155, 'temp': 0.7125680377717653, 'alpha_loss': -56.931370773756434, 'alpha': 3.2738915061675056, 'critic_loss': 3743.53870899002, 'actor_loss': -6.347804363063305, 'time_step': 0.05272152864864107, 'td_error': 1.2427059716269238, 'init_value': 4.953876495361328, 'ave_value': 4.957849118154498} step=11072
2022-04-22 00:31.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:31.26 [info     ] CQL_20220422002048: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.0003039609490102426, 'time_algorithm_update': 0.05215027704404269, 'temp_loss': 3.5592204353023815, 'temp': 0.7049887760870719, 'alpha_loss': -59.142205475382724, 'alpha': 3.4013380404841693, 'critic_loss': 3875.854506299675, 'actor_loss': -6.418217311704779, 'time_step': 0.05253791877989135, 'td_error': 1.242597984800878, 'init_value': 4.938841342926025, 'ave_value': 4.94535592464883} step=11418
2022-04-22 00:31.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:31.45 [info     ] CQL_20220422002048: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.0002966960730580236, 'time_algorithm_update': 0.052117308440235995, 'temp_loss': 3.5235276497857417, 'temp': 0.6974870696922258, 'alpha_loss': -61.44747065946546, 'alpha': 3.533743812858714, 'critic_loss': 4189.894012627574, 'actor_loss': -6.372394428087797, 'time_step': 0.05249858109248167, 'td_error': 1.2435609342713128, 'init_value': 5.0671162605285645, 'ave_value': 5.0684510189868535} step=11764
2022-04-22 00:31.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:32.04 [info     ] CQL_20220422002048: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.0002940135195076121, 'time_algorithm_update': 0.05223500935328489, 'temp_loss': 3.4842330679039044, 'temp': 0.6900655262043022, 'alpha_loss': -63.84686008078514, 'alpha': 3.671316160632007, 'critic_loss': 4384.891885922823, 'actor_loss': -6.458913528850313, 'time_step': 0.0526103139612716, 'td_error': 1.2438460706928924, 'init_value': 5.117213249206543, 'ave_value': 5.119517177493157} step=12110
2022-04-22 00:32.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:32.23 [info     ] CQL_20220422002048: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.00030730363261492955, 'time_algorithm_update': 0.05326980800297908, 'temp_loss': 3.4477010972238, 'temp': 0.6827235051317712, 'alpha_loss': -66.32743198196323, 'alpha': 3.8142325023695225, 'critic_loss': 4720.264325268696, 'actor_loss': -6.451463403040274, 'time_step': 0.05366168476942647, 'td_error': 1.2428177359643071, 'init_value': 5.067587375640869, 'ave_value': 5.073324267340814} step=12456
2022-04-22 00:32.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:32.42 [info     ] CQL_20220422002048: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.0003082070047455716, 'time_algorithm_update': 0.05339679759361841, 'temp_loss': 3.4112153969748173, 'temp': 0.6754606903632941, 'alpha_loss': -68.91209173478143, 'alpha': 3.9627171176017364, 'critic_loss': 5128.764742989071, 'actor_loss': -6.445778391953838, 'time_step': 0.05379154847536473, 'td_error': 1.2443565607538978, 'init_value': 5.121329307556152, 'ave_value': 5.121201807675965} step=12802
2022-04-22 00:32.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:33.02 [info     ] CQL_20220422002048: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.0002970130457354419, 'time_algorithm_update': 0.05394621598238201, 'temp_loss': 3.374370411641336, 'temp': 0.6682734231039279, 'alpha_loss': -71.58912888014248, 'alpha': 4.116977402240555, 'critic_loss': 5477.7976935061415, 'actor_loss': -6.366109832862898, 'time_step': 0.054325907216595776, 'td_error': 1.2446231098156553, 'init_value': 5.1079254150390625, 'ave_value': 5.106033383437727} step=13148
2022-04-22 00:33.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:33.22 [info     ] CQL_20220422002048: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.00029632052934238675, 'time_algorithm_update': 0.05387321993105673, 'temp_loss': 3.338469893946124, 'temp': 0.6611642937439715, 'alpha_loss': -74.3650531217542, 'alpha': 4.277230243462359, 'critic_loss': 5482.8643594201585, 'actor_loss': -6.452642661298631, 'time_step': 0.0542544016259254, 'td_error': 1.2491076475405338, 'init_value': 5.508670806884766, 'ave_value': 5.49150175544084} step=13494
2022-04-22 00:33.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:33.41 [info     ] CQL_20220422002048: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.00030177728289124595, 'time_algorithm_update': 0.05342593082802833, 'temp_loss': 3.303425027455898, 'temp': 0.6541306591791913, 'alpha_loss': -77.26796069724023, 'alpha': 4.443723518724386, 'critic_loss': 4521.139192614252, 'actor_loss': -6.873366925068673, 'time_step': 0.053810917573168096, 'td_error': 1.2477187304179662, 'init_value': 5.620386123657227, 'ave_value': 5.615713842472458} step=13840
2022-04-22 00:33.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:34.00 [info     ] CQL_20220422002048: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.00029890799108957277, 'time_algorithm_update': 0.053334015642287416, 'temp_loss': 3.2683699165465514, 'temp': 0.6471697910328131, 'alpha_loss': -80.28042097587806, 'alpha': 4.616723179128129, 'critic_loss': 4013.3241623013005, 'actor_loss': -7.021978028247811, 'time_step': 0.05371815000655334, 'td_error': 1.2495516043869854, 'init_value': 5.855564117431641, 'ave_value': 5.8495872746264554} step=14186
2022-04-22 00:34.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:34.21 [info     ] CQL_20220422002048: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.0003001242014714059, 'time_algorithm_update': 0.056167600471849384, 'temp_loss': 3.2326317357190084, 'temp': 0.6402853257049715, 'alpha_loss': -83.40364464996867, 'alpha': 4.796452243893133, 'critic_loss': 3538.886529646857, 'actor_loss': -7.200716725663643, 'time_step': 0.05655398120769876, 'td_error': 1.2509801102971903, 'init_value': 6.059098720550537, 'ave_value': 6.050237520614921} step=14532
2022-04-22 00:34.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:34.42 [info     ] CQL_20220422002048: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.0003057270380802926, 'time_algorithm_update': 0.05695102118343287, 'temp_loss': 3.1987398470068253, 'temp': 0.633473837134466, 'alpha_loss': -86.65598208918048, 'alpha': 4.983177565425807, 'critic_loss': 3092.9681340035677, 'actor_loss': -7.415289685905324, 'time_step': 0.05733815989742389, 'td_error': 1.2518380189584, 'init_value': 6.254888534545898, 'ave_value': 6.248531614360565} step=14878
2022-04-22 00:34.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:35.02 [info     ] CQL_20220422002048: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.00029972522934048164, 'time_algorithm_update': 0.05670375424313408, 'temp_loss': 3.165351752600918, 'temp': 0.6267340951572263, 'alpha_loss': -90.0170251482484, 'alpha': 5.177158460451689, 'critic_loss': 2805.1893381107748, 'actor_loss': -7.567408330178674, 'time_step': 0.05708533835548886, 'td_error': 1.2531987628796448, 'init_value': 6.448484420776367, 'ave_value': 6.443828039318535} step=15224
2022-04-22 00:35.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:35.23 [info     ] CQL_20220422002048: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.0003070369621232755, 'time_algorithm_update': 0.057037984704695684, 'temp_loss': 3.132150637621135, 'temp': 0.6200643746839093, 'alpha_loss': -93.52032620644982, 'alpha': 5.378683048865699, 'critic_loss': 2459.79519706241, 'actor_loss': -7.798454906210045, 'time_step': 0.05742792311431356, 'td_error': 1.2529172125248194, 'init_value': 6.591568470001221, 'ave_value': 6.592125792930679} step=15570
2022-04-22 00:35.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:35.43 [info     ] CQL_20220422002048: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.00029874192496944716, 'time_algorithm_update': 0.056979695496531584, 'temp_loss': 3.0982846820974626, 'temp': 0.6134663313799511, 'alpha_loss': -97.16975991574326, 'alpha': 5.588059866359468, 'critic_loss': 2247.688736932126, 'actor_loss': -7.954475802493233, 'time_step': 0.05736010680997992, 'td_error': 1.2553622395692503, 'init_value': 6.781366348266602, 'ave_value': 6.780025680260728} step=15916
2022-04-22 00:35.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:36.04 [info     ] CQL_20220422002048: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.00029442765120136946, 'time_algorithm_update': 0.05685619811791216, 'temp_loss': 3.064713261720073, 'temp': 0.6069402482812805, 'alpha_loss': -100.95125747416061, 'alpha': 5.805590418721899, 'critic_loss': 2204.4713205326498, 'actor_loss': -8.046724072770576, 'time_step': 0.05722870854284033, 'td_error': 1.2564128683801812, 'init_value': 6.928264617919922, 'ave_value': 6.925293801575829} step=16262
2022-04-22 00:36.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:36.25 [info     ] CQL_20220422002048: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.0003051592435450912, 'time_algorithm_update': 0.05724375647616524, 'temp_loss': 3.0316070152844996, 'temp': 0.6004845217473245, 'alpha_loss': -104.87790016218418, 'alpha': 6.031606945688325, 'critic_loss': 2250.309690971595, 'actor_loss': -8.078443915857745, 'time_step': 0.057628508937152136, 'td_error': 1.2555504390939227, 'init_value': 6.899390697479248, 'ave_value': 6.901254968815022} step=16608
2022-04-22 00:36.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:36.46 [info     ] CQL_20220422002048: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.00030439092933787085, 'time_algorithm_update': 0.05710715847897392, 'temp_loss': 2.999803486586995, 'temp': 0.5940961813651069, 'alpha_loss': -108.96847913444387, 'alpha': 6.266398478105578, 'critic_loss': 2264.169614935197, 'actor_loss': -8.171803366931188, 'time_step': 0.05749033779078136, 'td_error': 1.2571440050687817, 'init_value': 7.0332489013671875, 'ave_value': 7.032196870037819} step=16954
2022-04-22 00:36.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:37.06 [info     ] CQL_20220422002048: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.0002896324058488614, 'time_algorithm_update': 0.05593751758509288, 'temp_loss': 2.968018797780737, 'temp': 0.5877752216220591, 'alpha_loss': -113.2097402583657, 'alpha': 6.510358509989832, 'critic_loss': 2223.093930635838, 'actor_loss': -8.272515324498876, 'time_step': 0.05630560003953173, 'td_error': 1.257885062362838, 'init_value': 7.1822028160095215, 'ave_value': 7.183936562861465} step=17300
2022-04-22 00:37.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422002048/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.51910049

Epoch 1/50:   0%|          | 0/177 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 00:37.08 [info     ] FQE_20220422003706: epoch=1 step=177 epoch=1 metrics={'time_sample_batch': 0.00014887960617151637, 'time_algorithm_update': 0.009637983505335231, 'loss': 0.006661802660516005, 'time_step': 0.009851136450040139, 'init_value': -0.05866334214806557, 'ave_value': 0.0018942326508663797, 'soft_opc': nan} step=177




2022-04-22 00:37.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_177.pt


Epoch 2/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:37.10 [info     ] FQE_20220422003706: epoch=2 step=354 epoch=2 metrics={'time_sample_batch': 0.00014356570055255782, 'time_algorithm_update': 0.00933587214367538, 'loss': 0.004352905512603242, 'time_step': 0.009540335606720489, 'init_value': -0.15599791705608368, 'ave_value': -0.05972969274039383, 'soft_opc': nan} step=354




2022-04-22 00:37.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_354.pt


Epoch 3/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:37.12 [info     ] FQE_20220422003706: epoch=3 step=531 epoch=3 metrics={'time_sample_batch': 0.00014745178869215108, 'time_algorithm_update': 0.009819118316564183, 'loss': 0.00358022735546222, 'time_step': 0.010031737850210762, 'init_value': -0.2399299591779709, 'ave_value': -0.11500955834403052, 'soft_opc': nan} step=531




2022-04-22 00:37.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_531.pt


Epoch 4/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:37.14 [info     ] FQE_20220422003706: epoch=4 step=708 epoch=4 metrics={'time_sample_batch': 0.0001497066627114506, 'time_algorithm_update': 0.008826302943256615, 'loss': 0.0030426731929025157, 'time_step': 0.009042482591618252, 'init_value': -0.25820183753967285, 'ave_value': -0.11905313292937773, 'soft_opc': nan} step=708




2022-04-22 00:37.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_708.pt


Epoch 5/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:37.16 [info     ] FQE_20220422003706: epoch=5 step=885 epoch=5 metrics={'time_sample_batch': 0.00014831656116550253, 'time_algorithm_update': 0.00992410869921668, 'loss': 0.0026315762372003436, 'time_step': 0.010140241202661547, 'init_value': -0.30107513070106506, 'ave_value': -0.1415168634171153, 'soft_opc': nan} step=885




2022-04-22 00:37.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_885.pt


Epoch 6/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:37.17 [info     ] FQE_20220422003706: epoch=6 step=1062 epoch=6 metrics={'time_sample_batch': 0.00014879339832370565, 'time_algorithm_update': 0.00978675939268985, 'loss': 0.002399743531007151, 'time_step': 0.01000002009720452, 'init_value': -0.297516793012619, 'ave_value': -0.12894858052624059, 'soft_opc': nan} step=1062




2022-04-22 00:37.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_1062.pt


Epoch 7/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:37.19 [info     ] FQE_20220422003706: epoch=7 step=1239 epoch=7 metrics={'time_sample_batch': 0.00014902373491707495, 'time_algorithm_update': 0.009757164507935949, 'loss': 0.0021784700483536232, 'time_step': 0.009967302871962725, 'init_value': -0.32756614685058594, 'ave_value': -0.133579672649279, 'soft_opc': nan} step=1239




2022-04-22 00:37.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_1239.pt


Epoch 8/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:37.21 [info     ] FQE_20220422003706: epoch=8 step=1416 epoch=8 metrics={'time_sample_batch': 0.00015381231146343683, 'time_algorithm_update': 0.009134064959940937, 'loss': 0.001922654586523182, 'time_step': 0.009355415732173597, 'init_value': -0.3666355609893799, 'ave_value': -0.1693559830521678, 'soft_opc': nan} step=1416




2022-04-22 00:37.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_1416.pt


Epoch 9/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:37.23 [info     ] FQE_20220422003706: epoch=9 step=1593 epoch=9 metrics={'time_sample_batch': 0.0001474369717183086, 'time_algorithm_update': 0.009887478445882851, 'loss': 0.001941978528906228, 'time_step': 0.010100491302835066, 'init_value': -0.3501622676849365, 'ave_value': -0.15095628074652798, 'soft_opc': nan} step=1593




2022-04-22 00:37.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_1593.pt


Epoch 10/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:37.25 [info     ] FQE_20220422003706: epoch=10 step=1770 epoch=10 metrics={'time_sample_batch': 0.00015228751015528447, 'time_algorithm_update': 0.009448086474574891, 'loss': 0.0020078054743050727, 'time_step': 0.009669096456409175, 'init_value': -0.3707483112812042, 'ave_value': -0.16510203839713208, 'soft_opc': nan} step=1770




2022-04-22 00:37.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_1770.pt


Epoch 11/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:37.27 [info     ] FQE_20220422003706: epoch=11 step=1947 epoch=11 metrics={'time_sample_batch': 0.00014970127472096245, 'time_algorithm_update': 0.009631338766065695, 'loss': 0.0021585608585237785, 'time_step': 0.009845338972274867, 'init_value': -0.3554683029651642, 'ave_value': -0.1506634884365686, 'soft_opc': nan} step=1947




2022-04-22 00:37.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_1947.pt


Epoch 12/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:37.28 [info     ] FQE_20220422003706: epoch=12 step=2124 epoch=12 metrics={'time_sample_batch': 0.00015127456794350835, 'time_algorithm_update': 0.009248476243962002, 'loss': 0.0022238276956082024, 'time_step': 0.009466173958643681, 'init_value': -0.35322675108909607, 'ave_value': -0.14878468877068152, 'soft_opc': nan} step=2124




2022-04-22 00:37.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_2124.pt


Epoch 13/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:37.30 [info     ] FQE_20220422003706: epoch=13 step=2301 epoch=13 metrics={'time_sample_batch': 0.00015073307489944717, 'time_algorithm_update': 0.009863563850101105, 'loss': 0.0022669267613114525, 'time_step': 0.010084313861394332, 'init_value': -0.4026714265346527, 'ave_value': -0.19317007927341504, 'soft_opc': nan} step=2301




2022-04-22 00:37.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_2301.pt


Epoch 14/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:37.32 [info     ] FQE_20220422003706: epoch=14 step=2478 epoch=14 metrics={'time_sample_batch': 0.0001464684804280599, 'time_algorithm_update': 0.009556659870902024, 'loss': 0.0026031554024469232, 'time_step': 0.009771491174643996, 'init_value': -0.43962135910987854, 'ave_value': -0.22041088491678237, 'soft_opc': nan} step=2478




2022-04-22 00:37.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_2478.pt


Epoch 15/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:37.34 [info     ] FQE_20220422003706: epoch=15 step=2655 epoch=15 metrics={'time_sample_batch': 0.00015117084912661104, 'time_algorithm_update': 0.009894617533279677, 'loss': 0.0027915570427174286, 'time_step': 0.01011170371104095, 'init_value': -0.4726642668247223, 'ave_value': -0.25433253094151215, 'soft_opc': nan} step=2655




2022-04-22 00:37.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_2655.pt


Epoch 16/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:37.36 [info     ] FQE_20220422003706: epoch=16 step=2832 epoch=16 metrics={'time_sample_batch': 0.00015685517909163135, 'time_algorithm_update': 0.009256217439295882, 'loss': 0.00295508970502543, 'time_step': 0.009483839832456773, 'init_value': -0.5101165175437927, 'ave_value': -0.29596489679191385, 'soft_opc': nan} step=2832




2022-04-22 00:37.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_2832.pt


Epoch 17/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:37.38 [info     ] FQE_20220422003706: epoch=17 step=3009 epoch=17 metrics={'time_sample_batch': 0.00015107925328831215, 'time_algorithm_update': 0.009661097984529484, 'loss': 0.003193657601082508, 'time_step': 0.009877707325132554, 'init_value': -0.5588310956954956, 'ave_value': -0.3399273349626644, 'soft_opc': nan} step=3009




2022-04-22 00:37.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_3009.pt


Epoch 18/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:37.40 [info     ] FQE_20220422003706: epoch=18 step=3186 epoch=18 metrics={'time_sample_batch': 0.00015050139130845581, 'time_algorithm_update': 0.009466596915897003, 'loss': 0.0035700501435394216, 'time_step': 0.009684314835543013, 'init_value': -0.54924076795578, 'ave_value': -0.3405012626167353, 'soft_opc': nan} step=3186




2022-04-22 00:37.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_3186.pt


Epoch 19/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:37.41 [info     ] FQE_20220422003706: epoch=19 step=3363 epoch=19 metrics={'time_sample_batch': 0.0001504663693702827, 'time_algorithm_update': 0.010000960301544707, 'loss': 0.003887950271068109, 'time_step': 0.010224953883111813, 'init_value': -0.588320255279541, 'ave_value': -0.37504358363223145, 'soft_opc': nan} step=3363




2022-04-22 00:37.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_3363.pt


Epoch 20/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:37.43 [info     ] FQE_20220422003706: epoch=20 step=3540 epoch=20 metrics={'time_sample_batch': 0.00014882303227139058, 'time_algorithm_update': 0.009274531218965175, 'loss': 0.004169108729880556, 'time_step': 0.009485471046577066, 'init_value': -0.6418406963348389, 'ave_value': -0.4378787271186217, 'soft_opc': nan} step=3540




2022-04-22 00:37.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_3540.pt


Epoch 21/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:37.45 [info     ] FQE_20220422003706: epoch=21 step=3717 epoch=21 metrics={'time_sample_batch': 0.00015012288497666181, 'time_algorithm_update': 0.00925675758534232, 'loss': 0.004482708082272603, 'time_step': 0.009470166459595416, 'init_value': -0.6980724334716797, 'ave_value': -0.48545856335991855, 'soft_opc': nan} step=3717




2022-04-22 00:37.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_3717.pt


Epoch 22/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:37.47 [info     ] FQE_20220422003706: epoch=22 step=3894 epoch=22 metrics={'time_sample_batch': 0.0001500056961835441, 'time_algorithm_update': 0.009409089546419133, 'loss': 0.0043914136943791465, 'time_step': 0.009630926584793349, 'init_value': -0.6893417239189148, 'ave_value': -0.47398130254702525, 'soft_opc': nan} step=3894




2022-04-22 00:37.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_3894.pt


Epoch 23/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:37.49 [info     ] FQE_20220422003706: epoch=23 step=4071 epoch=23 metrics={'time_sample_batch': 0.0001477521691618666, 'time_algorithm_update': 0.009356993066389008, 'loss': 0.005103028430024656, 'time_step': 0.009573772128692454, 'init_value': -0.7548523545265198, 'ave_value': -0.5413740645266242, 'soft_opc': nan} step=4071




2022-04-22 00:37.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_4071.pt


Epoch 24/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:37.50 [info     ] FQE_20220422003706: epoch=24 step=4248 epoch=24 metrics={'time_sample_batch': 0.00014834215412032133, 'time_algorithm_update': 0.008818462070098704, 'loss': 0.005304698984429377, 'time_step': 0.009030849920154292, 'init_value': -0.8104159832000732, 'ave_value': -0.5908118262640886, 'soft_opc': nan} step=4248




2022-04-22 00:37.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_4248.pt


Epoch 25/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:37.52 [info     ] FQE_20220422003706: epoch=25 step=4425 epoch=25 metrics={'time_sample_batch': 0.00014562121892379502, 'time_algorithm_update': 0.009078719521646446, 'loss': 0.004905421361800725, 'time_step': 0.009288861926666087, 'init_value': -0.7944990992546082, 'ave_value': -0.5797748670686741, 'soft_opc': nan} step=4425




2022-04-22 00:37.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_4425.pt


Epoch 26/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:37.54 [info     ] FQE_20220422003706: epoch=26 step=4602 epoch=26 metrics={'time_sample_batch': 0.00014523463060626875, 'time_algorithm_update': 0.009386104378996596, 'loss': 0.0060368866495601305, 'time_step': 0.00959413873273774, 'init_value': -0.7719705700874329, 'ave_value': -0.5611382829937133, 'soft_opc': nan} step=4602




2022-04-22 00:37.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_4602.pt


Epoch 27/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:37.56 [info     ] FQE_20220422003706: epoch=27 step=4779 epoch=27 metrics={'time_sample_batch': 0.0001489563850359728, 'time_algorithm_update': 0.009315804573102186, 'loss': 0.006224418232792273, 'time_step': 0.009534305098366603, 'init_value': -0.8397688865661621, 'ave_value': -0.6229540777993989, 'soft_opc': nan} step=4779




2022-04-22 00:37.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_4779.pt


Epoch 28/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:37.57 [info     ] FQE_20220422003706: epoch=28 step=4956 epoch=28 metrics={'time_sample_batch': 0.00014771310623082738, 'time_algorithm_update': 0.008795382612842625, 'loss': 0.006466675473286344, 'time_step': 0.009007299013730496, 'init_value': -0.8387658596038818, 'ave_value': -0.6012669668888097, 'soft_opc': nan} step=4956




2022-04-22 00:37.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_4956.pt


Epoch 29/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:37.59 [info     ] FQE_20220422003706: epoch=29 step=5133 epoch=29 metrics={'time_sample_batch': 0.00014817512641518803, 'time_algorithm_update': 0.009456304507066974, 'loss': 0.006486043071250647, 'time_step': 0.009669259443121442, 'init_value': -0.8821759223937988, 'ave_value': -0.6300405412263519, 'soft_opc': nan} step=5133




2022-04-22 00:37.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_5133.pt


Epoch 30/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:38.01 [info     ] FQE_20220422003706: epoch=30 step=5310 epoch=30 metrics={'time_sample_batch': 0.00015068188898980953, 'time_algorithm_update': 0.009436091460750601, 'loss': 0.006989132141362939, 'time_step': 0.009652875911044535, 'init_value': -0.9467461705207825, 'ave_value': -0.7005266819753357, 'soft_opc': nan} step=5310




2022-04-22 00:38.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_5310.pt


Epoch 31/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:38.03 [info     ] FQE_20220422003706: epoch=31 step=5487 epoch=31 metrics={'time_sample_batch': 0.00015216897436454472, 'time_algorithm_update': 0.009425063591218938, 'loss': 0.007457556304953319, 'time_step': 0.00964400997269625, 'init_value': -0.9476019740104675, 'ave_value': -0.7083948050667573, 'soft_opc': nan} step=5487




2022-04-22 00:38.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_5487.pt


Epoch 32/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:38.05 [info     ] FQE_20220422003706: epoch=32 step=5664 epoch=32 metrics={'time_sample_batch': 0.00014430116125419316, 'time_algorithm_update': 0.008901594722338315, 'loss': 0.007573438356143796, 'time_step': 0.009108555518974692, 'init_value': -0.984096348285675, 'ave_value': -0.7402274459152967, 'soft_opc': nan} step=5664




2022-04-22 00:38.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_5664.pt


Epoch 33/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:38.06 [info     ] FQE_20220422003706: epoch=33 step=5841 epoch=33 metrics={'time_sample_batch': 0.0001473925207967812, 'time_algorithm_update': 0.00887248475672835, 'loss': 0.007868133481699804, 'time_step': 0.009087118051819882, 'init_value': -0.932017982006073, 'ave_value': -0.6795771818686355, 'soft_opc': nan} step=5841




2022-04-22 00:38.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_5841.pt


Epoch 34/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:38.08 [info     ] FQE_20220422003706: epoch=34 step=6018 epoch=34 metrics={'time_sample_batch': 0.0001497659306068205, 'time_algorithm_update': 0.009344416149591996, 'loss': 0.008222893864050571, 'time_step': 0.0095639363520563, 'init_value': -0.9966058731079102, 'ave_value': -0.7377508808296543, 'soft_opc': nan} step=6018




2022-04-22 00:38.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_6018.pt


Epoch 35/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:38.10 [info     ] FQE_20220422003706: epoch=35 step=6195 epoch=35 metrics={'time_sample_batch': 0.0001507344218970692, 'time_algorithm_update': 0.00941607777008229, 'loss': 0.008697818725311992, 'time_step': 0.009632322074329786, 'init_value': -0.9945755004882812, 'ave_value': -0.7336958337161276, 'soft_opc': nan} step=6195




2022-04-22 00:38.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_6195.pt


Epoch 36/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:38.12 [info     ] FQE_20220422003706: epoch=36 step=6372 epoch=36 metrics={'time_sample_batch': 0.00014643884648037495, 'time_algorithm_update': 0.00885169384843212, 'loss': 0.009212455670822853, 'time_step': 0.009062551509189066, 'init_value': -1.0041375160217285, 'ave_value': -0.7446424281319698, 'soft_opc': nan} step=6372




2022-04-22 00:38.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_6372.pt


Epoch 37/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:38.13 [info     ] FQE_20220422003706: epoch=37 step=6549 epoch=37 metrics={'time_sample_batch': 0.00014741003176586777, 'time_algorithm_update': 0.009330784533656922, 'loss': 0.009385710678821019, 'time_step': 0.009543148137755314, 'init_value': -1.062496542930603, 'ave_value': -0.8035402555331901, 'soft_opc': nan} step=6549




2022-04-22 00:38.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_6549.pt


Epoch 38/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:38.15 [info     ] FQE_20220422003706: epoch=38 step=6726 epoch=38 metrics={'time_sample_batch': 0.0001500595760884258, 'time_algorithm_update': 0.009373681019928496, 'loss': 0.009619376234747049, 'time_step': 0.009587883275780975, 'init_value': -1.1435011625289917, 'ave_value': -0.8764063098330845, 'soft_opc': nan} step=6726




2022-04-22 00:38.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_6726.pt


Epoch 39/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:38.17 [info     ] FQE_20220422003706: epoch=39 step=6903 epoch=39 metrics={'time_sample_batch': 0.00015698179686810337, 'time_algorithm_update': 0.009429667629091079, 'loss': 0.010279969031078015, 'time_step': 0.00965426331859524, 'init_value': -1.1524564027786255, 'ave_value': -0.8956114131185385, 'soft_opc': nan} step=6903




2022-04-22 00:38.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_6903.pt


Epoch 40/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:38.19 [info     ] FQE_20220422003706: epoch=40 step=7080 epoch=40 metrics={'time_sample_batch': 0.0001475339555470957, 'time_algorithm_update': 0.00889956749091714, 'loss': 0.010354547594814682, 'time_step': 0.009111306088118904, 'init_value': -1.1255831718444824, 'ave_value': -0.8609771561638282, 'soft_opc': nan} step=7080




2022-04-22 00:38.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_7080.pt


Epoch 41/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:38.20 [info     ] FQE_20220422003706: epoch=41 step=7257 epoch=41 metrics={'time_sample_batch': 0.00014527908152779618, 'time_algorithm_update': 0.008839820064393813, 'loss': 0.010509571950753761, 'time_step': 0.009051194972237624, 'init_value': -1.1238267421722412, 'ave_value': -0.8577411618967493, 'soft_opc': nan} step=7257




2022-04-22 00:38.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_7257.pt


Epoch 42/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:38.22 [info     ] FQE_20220422003706: epoch=42 step=7434 epoch=42 metrics={'time_sample_batch': 0.00014963931283034848, 'time_algorithm_update': 0.009422283388127041, 'loss': 0.011058002176278897, 'time_step': 0.009637986199330475, 'init_value': -1.2373367547988892, 'ave_value': -0.9497247774791611, 'soft_opc': nan} step=7434




2022-04-22 00:38.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_7434.pt


Epoch 43/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:38.24 [info     ] FQE_20220422003706: epoch=43 step=7611 epoch=43 metrics={'time_sample_batch': 0.0001506738070040773, 'time_algorithm_update': 0.009405507879742122, 'loss': 0.011275526628951874, 'time_step': 0.0096182068862484, 'init_value': -1.2807878255844116, 'ave_value': -0.9835541982796636, 'soft_opc': nan} step=7611




2022-04-22 00:38.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_7611.pt


Epoch 44/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:38.26 [info     ] FQE_20220422003706: epoch=44 step=7788 epoch=44 metrics={'time_sample_batch': 0.00014346332873328257, 'time_algorithm_update': 0.0090327020418846, 'loss': 0.010979899432671934, 'time_step': 0.009238732063164145, 'init_value': -1.3045238256454468, 'ave_value': -1.0092374773600974, 'soft_opc': nan} step=7788




2022-04-22 00:38.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_7788.pt


Epoch 45/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:38.28 [info     ] FQE_20220422003706: epoch=45 step=7965 epoch=45 metrics={'time_sample_batch': 0.00014781009005961445, 'time_algorithm_update': 0.009223031458881615, 'loss': 0.011897519837583463, 'time_step': 0.00944107535195216, 'init_value': -1.3668937683105469, 'ave_value': -1.0563301121541957, 'soft_opc': nan} step=7965




2022-04-22 00:38.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_7965.pt


Epoch 46/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:38.29 [info     ] FQE_20220422003706: epoch=46 step=8142 epoch=46 metrics={'time_sample_batch': 0.0001486061656542417, 'time_algorithm_update': 0.009367285475219037, 'loss': 0.013068151635928131, 'time_step': 0.0095817679065769, 'init_value': -1.4223856925964355, 'ave_value': -1.102284475017686, 'soft_opc': nan} step=8142




2022-04-22 00:38.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_8142.pt


Epoch 47/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:38.31 [info     ] FQE_20220422003706: epoch=47 step=8319 epoch=47 metrics={'time_sample_batch': 0.00015091491957842294, 'time_algorithm_update': 0.009303381214034085, 'loss': 0.012781826939235184, 'time_step': 0.009516047892597435, 'init_value': -1.4619475603103638, 'ave_value': -1.1373357675522118, 'soft_opc': nan} step=8319




2022-04-22 00:38.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_8319.pt


Epoch 48/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:38.33 [info     ] FQE_20220422003706: epoch=48 step=8496 epoch=48 metrics={'time_sample_batch': 0.00014597143830552613, 'time_algorithm_update': 0.009110007582411254, 'loss': 0.013285541323231965, 'time_step': 0.009318165859933628, 'init_value': -1.4619160890579224, 'ave_value': -1.1338864062909966, 'soft_opc': nan} step=8496




2022-04-22 00:38.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_8496.pt


Epoch 49/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:38.35 [info     ] FQE_20220422003706: epoch=49 step=8673 epoch=49 metrics={'time_sample_batch': 0.00015007978105275644, 'time_algorithm_update': 0.008634664244570975, 'loss': 0.01342216373742216, 'time_step': 0.008854457887552552, 'init_value': -1.4819040298461914, 'ave_value': -1.1597369555193742, 'soft_opc': nan} step=8673




2022-04-22 00:38.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_8673.pt


Epoch 50/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 00:38.36 [info     ] FQE_20220422003706: epoch=50 step=8850 epoch=50 metrics={'time_sample_batch': 0.00015180259101134908, 'time_algorithm_update': 0.009382657412081788, 'loss': 0.013742829739750169, 'time_step': 0.009599404146442305, 'init_value': -1.489038109779358, 'ave_value': -1.167046540411743, 'soft_opc': nan} step=8850




2022-04-22 00:38.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003706/model_8850.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

Read chunk # 38 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.27108923e-02  1.24000047e-02
  1.42999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.34732792e-01  6.00000000e-01  3.37421461e-01]
Read chunk # 39 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.38489108e-01  4.94000047e-02
 -1.56000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -7.49080829e-02  7.04145269e-02]
Read chunk # 40 out of 4999
torch.Size([44400, 6])
2022-04-22 00:38.37 [debug    ] RoundIterator is selected.
2022-04-22 00:38.37 [info     ] Directory is created at d3rlpy_logs/FQE_20220422003837
2022-04-22 00:38.37 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 00:38.37 [debug    ] Building models...
2022-04-22 00:38.37 [debug    ] Models have been built.
2022-04-22 00:38.37 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422003837/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'max

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 00:38.40 [info     ] FQE_20220422003837: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00015174926713455554, 'time_algorithm_update': 0.009297477644543315, 'loss': 0.020016541755974813, 'time_step': 0.00951436716456746, 'init_value': -1.2102138996124268, 'ave_value': -1.200059196305973, 'soft_opc': nan} step=344




2022-04-22 00:38.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:38.44 [info     ] FQE_20220422003837: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.0001520403595857842, 'time_algorithm_update': 0.00926892355430958, 'loss': 0.01871533380810518, 'time_step': 0.009482423926508703, 'init_value': -2.07171630859375, 'ave_value': -2.0626278894038887, 'soft_opc': nan} step=688




2022-04-22 00:38.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:38.47 [info     ] FQE_20220422003837: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.0001558481260787609, 'time_algorithm_update': 0.009191788213197575, 'loss': 0.021071573364721655, 'time_step': 0.009417580310688463, 'init_value': -3.095615863800049, 'ave_value': -3.080900291574968, 'soft_opc': nan} step=1032




2022-04-22 00:38.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:38.51 [info     ] FQE_20220422003837: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.0001542394937470902, 'time_algorithm_update': 0.009249457093172295, 'loss': 0.023150160123572447, 'time_step': 0.00946807584097219, 'init_value': -3.9772791862487793, 'ave_value': -3.9303976700112626, 'soft_opc': nan} step=1376




2022-04-22 00:38.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:38.54 [info     ] FQE_20220422003837: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00015204590420390284, 'time_algorithm_update': 0.009294856426327727, 'loss': 0.02959928460971474, 'time_step': 0.009514069141343583, 'init_value': -4.949404716491699, 'ave_value': -4.939437521255768, 'soft_opc': nan} step=1720




2022-04-22 00:38.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:38.58 [info     ] FQE_20220422003837: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00015442523845406465, 'time_algorithm_update': 0.009250471065210741, 'loss': 0.03392683642883893, 'time_step': 0.009472865004872167, 'init_value': -5.568790435791016, 'ave_value': -5.61556871960292, 'soft_opc': nan} step=2064




2022-04-22 00:38.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:39.01 [info     ] FQE_20220422003837: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00015331077021221782, 'time_algorithm_update': 0.009292055008023284, 'loss': 0.03968947746000404, 'time_step': 0.009511075047559516, 'init_value': -6.426346778869629, 'ave_value': -6.589818871960984, 'soft_opc': nan} step=2408




2022-04-22 00:39.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:39.05 [info     ] FQE_20220422003837: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00015271541684172874, 'time_algorithm_update': 0.009190760379613833, 'loss': 0.04683278089248424, 'time_step': 0.009410736172698265, 'init_value': -6.99370002746582, 'ave_value': -7.344752184872155, 'soft_opc': nan} step=2752




2022-04-22 00:39.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:39.08 [info     ] FQE_20220422003837: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.0001521782819614854, 'time_algorithm_update': 0.009384343097376268, 'loss': 0.05446054626440326, 'time_step': 0.00960357660471007, 'init_value': -7.571894645690918, 'ave_value': -8.166878995916864, 'soft_opc': nan} step=3096




2022-04-22 00:39.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:39.12 [info     ] FQE_20220422003837: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00015535049660261288, 'time_algorithm_update': 0.009126892616582472, 'loss': 0.06738578175671052, 'time_step': 0.009350823108540025, 'init_value': -8.430093765258789, 'ave_value': -9.333757042240451, 'soft_opc': nan} step=3440




2022-04-22 00:39.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:39.15 [info     ] FQE_20220422003837: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00015491039253944574, 'time_algorithm_update': 0.009248031433238539, 'loss': 0.07658644980036242, 'time_step': 0.009469434965488523, 'init_value': -9.157047271728516, 'ave_value': -10.389296041576712, 'soft_opc': nan} step=3784




2022-04-22 00:39.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:39.18 [info     ] FQE_20220422003837: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00015767369159432344, 'time_algorithm_update': 0.008746214384256407, 'loss': 0.09268943114249512, 'time_step': 0.008971167165179586, 'init_value': -9.95162582397461, 'ave_value': -11.467292236691122, 'soft_opc': nan} step=4128




2022-04-22 00:39.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:39.22 [info     ] FQE_20220422003837: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00015418127525684445, 'time_algorithm_update': 0.009428889252418695, 'loss': 0.10648911408159535, 'time_step': 0.009649556736613428, 'init_value': -10.648582458496094, 'ave_value': -12.505913752424824, 'soft_opc': nan} step=4472




2022-04-22 00:39.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:39.25 [info     ] FQE_20220422003837: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00015447583309439726, 'time_algorithm_update': 0.009086329576581023, 'loss': 0.12170537310479165, 'time_step': 0.009306093288022418, 'init_value': -11.484125137329102, 'ave_value': -13.580416282716099, 'soft_opc': nan} step=4816




2022-04-22 00:39.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:39.29 [info     ] FQE_20220422003837: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00015435246534125749, 'time_algorithm_update': 0.00916223401247069, 'loss': 0.13706608295852188, 'time_step': 0.009390718715135441, 'init_value': -11.879293441772461, 'ave_value': -14.21935567753809, 'soft_opc': nan} step=5160




2022-04-22 00:39.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:39.32 [info     ] FQE_20220422003837: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00015559168749077376, 'time_algorithm_update': 0.009173052948574687, 'loss': 0.1520153158398475, 'time_step': 0.009395010942636534, 'init_value': -12.921663284301758, 'ave_value': -15.37547903039434, 'soft_opc': nan} step=5504




2022-04-22 00:39.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:39.36 [info     ] FQE_20220422003837: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.0001546331616335137, 'time_algorithm_update': 0.009371705526529356, 'loss': 0.17386121503185742, 'time_step': 0.009593039751052856, 'init_value': -13.326374053955078, 'ave_value': -15.890813089920593, 'soft_opc': nan} step=5848




2022-04-22 00:39.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:39.39 [info     ] FQE_20220422003837: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00015576634296151094, 'time_algorithm_update': 0.00913342556288076, 'loss': 0.18593694769160085, 'time_step': 0.009355773759442706, 'init_value': -13.949155807495117, 'ave_value': -16.62275997368065, 'soft_opc': nan} step=6192




2022-04-22 00:39.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:39.43 [info     ] FQE_20220422003837: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00015516752420469772, 'time_algorithm_update': 0.009188080942908, 'loss': 0.20477917575450658, 'time_step': 0.009411304496055426, 'init_value': -14.529962539672852, 'ave_value': -17.37950842799367, 'soft_opc': nan} step=6536




2022-04-22 00:39.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:39.46 [info     ] FQE_20220422003837: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00015512039495068928, 'time_algorithm_update': 0.009123210990151693, 'loss': 0.21835243981331587, 'time_step': 0.00934435808381369, 'init_value': -15.2113037109375, 'ave_value': -18.126510477925205, 'soft_opc': nan} step=6880




2022-04-22 00:39.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:39.50 [info     ] FQE_20220422003837: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00015650239101676054, 'time_algorithm_update': 0.009499205406322035, 'loss': 0.23801743335545408, 'time_step': 0.009725140277729478, 'init_value': -16.02220916748047, 'ave_value': -19.12275807723269, 'soft_opc': nan} step=7224




2022-04-22 00:39.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:39.53 [info     ] FQE_20220422003837: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00015280690304068632, 'time_algorithm_update': 0.009180911751680596, 'loss': 0.25326591967821643, 'time_step': 0.009402842715729115, 'init_value': -16.270042419433594, 'ave_value': -19.714794124595755, 'soft_opc': nan} step=7568




2022-04-22 00:39.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:39.57 [info     ] FQE_20220422003837: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00015591812688250874, 'time_algorithm_update': 0.009254201900127322, 'loss': 0.27395833995149926, 'time_step': 0.009476588215938834, 'init_value': -17.026992797851562, 'ave_value': -20.63242366445494, 'soft_opc': nan} step=7912




2022-04-22 00:39.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:40.00 [info     ] FQE_20220422003837: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00015314997628677722, 'time_algorithm_update': 0.009145429661107618, 'loss': 0.2930635462926571, 'time_step': 0.009366670320200365, 'init_value': -17.311279296875, 'ave_value': -21.155550674839063, 'soft_opc': nan} step=8256




2022-04-22 00:40.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:40.04 [info     ] FQE_20220422003837: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00015596941460010617, 'time_algorithm_update': 0.009410457555637804, 'loss': 0.3089159652915632, 'time_step': 0.00963199069333631, 'init_value': -17.752700805664062, 'ave_value': -21.704397570603604, 'soft_opc': nan} step=8600




2022-04-22 00:40.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:40.07 [info     ] FQE_20220422003837: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00015081014744071074, 'time_algorithm_update': 0.009131371974945068, 'loss': 0.3260909135258475, 'time_step': 0.009346763754999914, 'init_value': -18.158828735351562, 'ave_value': -22.424521688754496, 'soft_opc': nan} step=8944




2022-04-22 00:40.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:40.11 [info     ] FQE_20220422003837: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.0001537820627523023, 'time_algorithm_update': 0.009082390818484994, 'loss': 0.34400963101286963, 'time_step': 0.009300222923589308, 'init_value': -18.567636489868164, 'ave_value': -23.02352462099479, 'soft_opc': nan} step=9288




2022-04-22 00:40.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:40.14 [info     ] FQE_20220422003837: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.0001523647197457247, 'time_algorithm_update': 0.009099233982174895, 'loss': 0.36354368998734066, 'time_step': 0.009317685005276702, 'init_value': -19.084985733032227, 'ave_value': -23.771247733579024, 'soft_opc': nan} step=9632




2022-04-22 00:40.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:40.18 [info     ] FQE_20220422003837: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00015300858852475188, 'time_algorithm_update': 0.009356836939966955, 'loss': 0.3779023556027908, 'time_step': 0.00957662213680356, 'init_value': -19.33169174194336, 'ave_value': -24.254605925136858, 'soft_opc': nan} step=9976




2022-04-22 00:40.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:40.21 [info     ] FQE_20220422003837: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.0001527576945548834, 'time_algorithm_update': 0.009100742118303166, 'loss': 0.3979176110750454, 'time_step': 0.009320691574451536, 'init_value': -19.52430534362793, 'ave_value': -24.61864417783312, 'soft_opc': nan} step=10320




2022-04-22 00:40.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:40.25 [info     ] FQE_20220422003837: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00015350621800089991, 'time_algorithm_update': 0.009181110664855602, 'loss': 0.4090483174009552, 'time_step': 0.009404780559761579, 'init_value': -19.778114318847656, 'ave_value': -25.15635019250818, 'soft_opc': nan} step=10664




2022-04-22 00:40.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:40.28 [info     ] FQE_20220422003837: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00015155659165493277, 'time_algorithm_update': 0.009117821621340375, 'loss': 0.42544489873161667, 'time_step': 0.009336908196294031, 'init_value': -20.4009952545166, 'ave_value': -25.70692768746668, 'soft_opc': nan} step=11008




2022-04-22 00:40.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:40.31 [info     ] FQE_20220422003837: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00015221293582472691, 'time_algorithm_update': 0.008362372254216394, 'loss': 0.45274065558387094, 'time_step': 0.008583235186199809, 'init_value': -20.767641067504883, 'ave_value': -26.24118984929613, 'soft_opc': nan} step=11352




2022-04-22 00:40.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:40.34 [info     ] FQE_20220422003837: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.000152371650518373, 'time_algorithm_update': 0.008474449778712072, 'loss': 0.4781037056114698, 'time_step': 0.008697637291841729, 'init_value': -21.10495376586914, 'ave_value': -26.650937425687506, 'soft_opc': nan} step=11696




2022-04-22 00:40.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:40.38 [info     ] FQE_20220422003837: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00015056826347528503, 'time_algorithm_update': 0.008210766454075659, 'loss': 0.49770823615971344, 'time_step': 0.008427696172581163, 'init_value': -21.6481990814209, 'ave_value': -27.434579891234904, 'soft_opc': nan} step=12040




2022-04-22 00:40.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:40.41 [info     ] FQE_20220422003837: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00015270363452822664, 'time_algorithm_update': 0.008457423642624256, 'loss': 0.5173589358820991, 'time_step': 0.008675835853399233, 'init_value': -21.871265411376953, 'ave_value': -27.938908154201936, 'soft_opc': nan} step=12384




2022-04-22 00:40.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:40.44 [info     ] FQE_20220422003837: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00015401147132696106, 'time_algorithm_update': 0.008457180372504302, 'loss': 0.5416002422462889, 'time_step': 0.008680733137352521, 'init_value': -22.202980041503906, 'ave_value': -28.656025543916332, 'soft_opc': nan} step=12728




2022-04-22 00:40.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:40.47 [info     ] FQE_20220422003837: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.0001521713511888371, 'time_algorithm_update': 0.008426495762758477, 'loss': 0.559402605935628, 'time_step': 0.008647310179333354, 'init_value': -22.16741371154785, 'ave_value': -28.72454335278756, 'soft_opc': nan} step=13072




2022-04-22 00:40.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:40.50 [info     ] FQE_20220422003837: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00015361711036327274, 'time_algorithm_update': 0.008265417675639307, 'loss': 0.5585317668656624, 'time_step': 0.008490567983582963, 'init_value': -22.03929328918457, 'ave_value': -29.023138471847183, 'soft_opc': nan} step=13416




2022-04-22 00:40.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:40.54 [info     ] FQE_20220422003837: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.0001533149286758068, 'time_algorithm_update': 0.008395709270654722, 'loss': 0.5801952801168312, 'time_step': 0.008613919102868368, 'init_value': -22.568132400512695, 'ave_value': -29.621405771508947, 'soft_opc': nan} step=13760




2022-04-22 00:40.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:40.57 [info     ] FQE_20220422003837: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00015386107356049294, 'time_algorithm_update': 0.008476743171381395, 'loss': 0.5858486365028766, 'time_step': 0.00869669332060703, 'init_value': -22.722545623779297, 'ave_value': -29.943819095019823, 'soft_opc': nan} step=14104




2022-04-22 00:40.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:41.00 [info     ] FQE_20220422003837: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00015143183774726335, 'time_algorithm_update': 0.00846185933711917, 'loss': 0.6182427811869529, 'time_step': 0.008680948684381884, 'init_value': -22.754764556884766, 'ave_value': -30.31991243531575, 'soft_opc': nan} step=14448




2022-04-22 00:41.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:41.03 [info     ] FQE_20220422003837: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00015275214993676475, 'time_algorithm_update': 0.008164960284565771, 'loss': 0.6256500041534647, 'time_step': 0.008386586987695028, 'init_value': -23.35181427001953, 'ave_value': -30.957061999660354, 'soft_opc': nan} step=14792




2022-04-22 00:41.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:41.06 [info     ] FQE_20220422003837: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001559195130370384, 'time_algorithm_update': 0.008466153643852057, 'loss': 0.6393292242617801, 'time_step': 0.008691394744917404, 'init_value': -23.302387237548828, 'ave_value': -31.161426031904984, 'soft_opc': nan} step=15136




2022-04-22 00:41.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:41.09 [info     ] FQE_20220422003837: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00015256571215252544, 'time_algorithm_update': 0.008438517187916956, 'loss': 0.6432032644488784, 'time_step': 0.00865926091061082, 'init_value': -23.140762329101562, 'ave_value': -31.529715938342584, 'soft_opc': nan} step=15480




2022-04-22 00:41.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:41.13 [info     ] FQE_20220422003837: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00015371829964393793, 'time_algorithm_update': 0.00849049174508383, 'loss': 0.6581208556728048, 'time_step': 0.008712284093679384, 'init_value': -23.843406677246094, 'ave_value': -32.260584271531386, 'soft_opc': nan} step=15824




2022-04-22 00:41.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:41.16 [info     ] FQE_20220422003837: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00015226214431052985, 'time_algorithm_update': 0.008193175460017004, 'loss': 0.6516250700549071, 'time_step': 0.008413341849349266, 'init_value': -23.614988327026367, 'ave_value': -32.24851545886145, 'soft_opc': nan} step=16168




2022-04-22 00:41.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:41.19 [info     ] FQE_20220422003837: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.0001502043979112492, 'time_algorithm_update': 0.00841152875922447, 'loss': 0.6356320180361658, 'time_step': 0.008628698975540871, 'init_value': -23.17530059814453, 'ave_value': -32.14279273905464, 'soft_opc': nan} step=16512




2022-04-22 00:41.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:41.22 [info     ] FQE_20220422003837: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.0001473440680392953, 'time_algorithm_update': 0.008322340804477071, 'loss': 0.625882777120095, 'time_step': 0.008535383052604144, 'init_value': -23.224567413330078, 'ave_value': -32.321052493316095, 'soft_opc': nan} step=16856




2022-04-22 00:41.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 00:41.25 [info     ] FQE_20220422003837: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00015169243479883947, 'time_algorithm_update': 0.008426984382230182, 'loss': 0.6257450304124071, 'time_step': 0.008645229561384334, 'init_value': -23.063030242919922, 'ave_value': -32.24356379874118, 'soft_opc': nan} step=17200




2022-04-22 00:41.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422003837/model_17200.pt
search iteration:  8
using hyper params:  [0.00018666094298148454, 0.008215775460759493, 4.812800646243045e-05, 7]
2022-04-22 00:41.25 [debug    ] RoundIterator is selected.
2022-04-22 00:41.25 [info     ] Directory is created at d3rlpy_logs/CQL_20220422004125
2022-04-22 00:41.25 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 00:41.25 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 00:41.25 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422004125/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.00018666094298148454, 'actor_optim_factory': {'op

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:41.44 [info     ] CQL_20220422004125: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.0003742758249271812, 'time_algorithm_update': 0.0522445178445364, 'temp_loss': 3.838243408010185, 'temp': 0.991198639821455, 'alpha_loss': -17.454816007889765, 'alpha': 1.017755287575584, 'critic_loss': 139.76654557685632, 'actor_loss': 8.250829857445693, 'time_step': 0.052703638986355994, 'td_error': 1.4242177719223656, 'init_value': -11.637107849121094, 'ave_value': -10.777017565212187} step=346
2022-04-22 00:41.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:42.04 [info     ] CQL_20220422004125: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.0003725910462395993, 'time_algorithm_update': 0.052482198428556406, 'temp_loss': 4.411731517383818, 'temp': 0.9734277949167814, 'alpha_loss': -18.28501292873669, 'alpha': 1.0544169603744684, 'critic_loss': 246.1316847167263, 'actor_loss': 13.5615432634519, 'time_step': 0.05294627399113826, 'td_error': 1.597337679362242, 'init_value': -15.021381378173828, 'ave_value': -14.070930212535469} step=692
2022-04-22 00:42.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:42.23 [info     ] CQL_20220422004125: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.0003803210451423777, 'time_algorithm_update': 0.052327643929189344, 'temp_loss': 4.548737911819723, 'temp': 0.9562004773603009, 'alpha_loss': -18.978842316335335, 'alpha': 1.092901996449928, 'critic_loss': 521.17287819923, 'actor_loss': 15.327375001300966, 'time_step': 0.05279439170925603, 'td_error': 1.5219921907739713, 'init_value': -14.96209716796875, 'ave_value': -14.136370771289606} step=1038
2022-04-22 00:42.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:42.42 [info     ] CQL_20220422004125: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.00037510408831469585, 'time_algorithm_update': 0.053656331376533285, 'temp_loss': 4.568340311160666, 'temp': 0.9396835691322482, 'alpha_loss': -19.67374252583939, 'alpha': 1.133205663262075, 'critic_loss': 954.9889974428739, 'actor_loss': 11.191912342358187, 'time_step': 0.05411697536534657, 'td_error': 1.3385810623401244, 'init_value': -9.366704940795898, 'ave_value': -9.102266785317317} step=1384
2022-04-22 00:42.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:43.00 [info     ] CQL_20220422004125: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.00037484361946238257, 'time_algorithm_update': 0.05010759072496712, 'temp_loss': 4.548628621018691, 'temp': 0.9237602706244915, 'alpha_loss': -20.38916691465874, 'alpha': 1.1753720492985897, 'critic_loss': 1515.769487502258, 'actor_loss': 6.906412684159472, 'time_step': 0.05057034464929834, 'td_error': 1.3324545679743258, 'init_value': -7.969062328338623, 'ave_value': -7.870495779992506} step=1730
2022-04-22 00:43.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:43.19 [info     ] CQL_20220422004125: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.00037576490744000915, 'time_algorithm_update': 0.050305225945621555, 'temp_loss': 4.506385916230307, 'temp': 0.9083361313866742, 'alpha_loss': -21.160350314454536, 'alpha': 1.2194952582348288, 'critic_loss': 2052.1483514157335, 'actor_loss': 6.698344544868249, 'time_step': 0.050767164698914985, 'td_error': 1.3499083032879584, 'init_value': -8.498954772949219, 'ave_value': -8.43315222056458} step=2076
2022-04-22 00:43.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:43.37 [info     ] CQL_20220422004125: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.00037591719213937746, 'time_algorithm_update': 0.04998192483979153, 'temp_loss': 4.457323872285082, 'temp': 0.8933518844188293, 'alpha_loss': -21.975558727462857, 'alpha': 1.2656382553150198, 'critic_loss': 2537.4604238168354, 'actor_loss': 7.230471276134425, 'time_step': 0.050442552979970946, 'td_error': 1.3705693053487193, 'init_value': -9.236892700195312, 'ave_value': -9.181868421347462} step=2422
2022-04-22 00:43.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:43.55 [info     ] CQL_20220422004125: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.00036800596755364037, 'time_algorithm_update': 0.04981001470819374, 'temp_loss': 4.402310576742095, 'temp': 0.8787542683196206, 'alpha_loss': -22.82312335857766, 'alpha': 1.3138408560973371, 'critic_loss': 3016.409457697345, 'actor_loss': 7.958893624344313, 'time_step': 0.05026387823799442, 'td_error': 1.3914270095891839, 'init_value': -9.956493377685547, 'ave_value': -9.917439349469781} step=2768
2022-04-22 00:43.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:44.13 [info     ] CQL_20220422004125: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.00036716667902951983, 'time_algorithm_update': 0.05005654778783721, 'temp_loss': 4.339347809036343, 'temp': 0.8645042429080588, 'alpha_loss': -23.702371762667088, 'alpha': 1.3641322172446058, 'critic_loss': 3486.652307763954, 'actor_loss': 8.78981432887171, 'time_step': 0.050507655722557464, 'td_error': 1.4154949776326318, 'init_value': -10.79574203491211, 'ave_value': -10.765963730285268} step=3114
2022-04-22 00:44.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:44.31 [info     ] CQL_20220422004125: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.00038055188394006277, 'time_algorithm_update': 0.0501943261637164, 'temp_loss': 4.280641157503073, 'temp': 0.8505717105948167, 'alpha_loss': -24.617690185590977, 'alpha': 1.4165553478147253, 'critic_loss': 3964.3436639157335, 'actor_loss': 9.716597714176068, 'time_step': 0.050658475456899305, 'td_error': 1.4461100831626386, 'init_value': -11.891633033752441, 'ave_value': -11.861853367211753} step=3460
2022-04-22 00:44.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:44.50 [info     ] CQL_20220422004125: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.00037211145279724474, 'time_algorithm_update': 0.05007883372334387, 'temp_loss': 4.213822243530626, 'temp': 0.8369299548898819, 'alpha_loss': -25.57522169013933, 'alpha': 1.4711623612166829, 'critic_loss': 4435.331586716492, 'actor_loss': 10.691295706467821, 'time_step': 0.05053235478483872, 'td_error': 1.4756934547549658, 'init_value': -12.807354927062988, 'ave_value': -12.781242292647454} step=3806
2022-04-22 00:44.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:45.08 [info     ] CQL_20220422004125: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.0003702916161862412, 'time_algorithm_update': 0.05030112597294625, 'temp_loss': 4.151646202010227, 'temp': 0.8235653504470869, 'alpha_loss': -26.565166104046597, 'alpha': 1.5280145513529033, 'critic_loss': 4910.428629086886, 'actor_loss': 11.75599797750484, 'time_step': 0.050755859799467756, 'td_error': 1.5160242834040139, 'init_value': -14.105459213256836, 'ave_value': -14.074752612903028} step=4152
2022-04-22 00:45.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:45.27 [info     ] CQL_20220422004125: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.0003794438577111746, 'time_algorithm_update': 0.053400872759736345, 'temp_loss': 4.089625146347664, 'temp': 0.8104504141848901, 'alpha_loss': -27.59533367267234, 'alpha': 1.587172425206686, 'critic_loss': 5346.232594043533, 'actor_loss': 12.834523250601885, 'time_step': 0.0538647664075642, 'td_error': 1.551415150928869, 'init_value': -15.070631980895996, 'ave_value': -15.046188544322666} step=4498
2022-04-22 00:45.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:45.47 [info     ] CQL_20220422004125: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.00038929550633954175, 'time_algorithm_update': 0.0529504711228299, 'temp_loss': 4.025531096954566, 'temp': 0.7975734468829425, 'alpha_loss': -28.667292258643002, 'alpha': 1.648698410202313, 'critic_loss': 5732.80426131232, 'actor_loss': 13.9666966482394, 'time_step': 0.05342865886026724, 'td_error': 1.5938203321814717, 'init_value': -16.198402404785156, 'ave_value': -16.17923867427777} step=4844
2022-04-22 00:45.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:46.06 [info     ] CQL_20220422004125: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.0003713107522512447, 'time_algorithm_update': 0.05309133860417184, 'temp_loss': 3.9629236197885063, 'temp': 0.7849309673543611, 'alpha_loss': -29.781512221849034, 'alpha': 1.7126820332053079, 'critic_loss': 6127.811859307261, 'actor_loss': 15.173533009655904, 'time_step': 0.053545047092989004, 'td_error': 1.6318304445784049, 'init_value': -17.076528549194336, 'ave_value': -17.071020912886684} step=5190
2022-04-22 00:46.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:46.25 [info     ] CQL_20220422004125: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.0003802535161806669, 'time_algorithm_update': 0.052729273118035644, 'temp_loss': 3.9007678169735596, 'temp': 0.7725070089618594, 'alpha_loss': -30.937734047112436, 'alpha': 1.7791999499232782, 'critic_loss': 6466.106582200596, 'actor_loss': 16.39824833897497, 'time_step': 0.05319415282651868, 'td_error': 1.6830961663653528, 'init_value': -18.32350730895996, 'ave_value': -18.31695441765602} step=5536
2022-04-22 00:46.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:46.44 [info     ] CQL_20220422004125: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.00037154365826204335, 'time_algorithm_update': 0.05274993904753227, 'temp_loss': 3.8393644239172082, 'temp': 0.760293946962136, 'alpha_loss': -32.14270434076386, 'alpha': 1.848338335235684, 'critic_loss': 6813.805029014631, 'actor_loss': 17.678682729687996, 'time_step': 0.05320556659918989, 'td_error': 1.746095601528866, 'init_value': -19.82455062866211, 'ave_value': -19.807810558556042} step=5882
2022-04-22 00:46.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:47.04 [info     ] CQL_20220422004125: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.00037206046154044265, 'time_algorithm_update': 0.05272594352678067, 'temp_loss': 3.779055552675545, 'temp': 0.7482853425031453, 'alpha_loss': -33.3871557092391, 'alpha': 1.9201954124290819, 'critic_loss': 7078.43624825009, 'actor_loss': 18.949704181252187, 'time_step': 0.05317684129483438, 'td_error': 1.8046198244720257, 'init_value': -21.068679809570312, 'ave_value': -21.054399567319635} step=6228
2022-04-22 00:47.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:47.24 [info     ] CQL_20220422004125: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.0003779485735590058, 'time_algorithm_update': 0.05680981399006926, 'temp_loss': 3.719654789549767, 'temp': 0.7364753705228684, 'alpha_loss': -34.69001693946089, 'alpha': 1.9948682729908496, 'critic_loss': 7277.779223491691, 'actor_loss': 20.282353798088998, 'time_step': 0.057264213617137405, 'td_error': 1.8687784726385117, 'init_value': -22.371562957763672, 'ave_value': -22.359240676080407} step=6574
2022-04-22 00:47.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:47.44 [info     ] CQL_20220422004125: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.00037421449760481113, 'time_algorithm_update': 0.055544543817553214, 'temp_loss': 3.661171428730033, 'temp': 0.7248568338465828, 'alpha_loss': -36.0418114744859, 'alpha': 2.072469732664913, 'critic_loss': 7480.152620348627, 'actor_loss': 21.622945862698415, 'time_step': 0.055998639564293656, 'td_error': 1.9411997612580343, 'init_value': -23.80230140686035, 'ave_value': -23.783121261849907} step=6920
2022-04-22 00:47.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:48.05 [info     ] CQL_20220422004125: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.00037563536208489037, 'time_algorithm_update': 0.055482407525784705, 'temp_loss': 3.6037989737670544, 'temp': 0.7134253349951926, 'alpha_loss': -37.44638451813273, 'alpha': 2.1531093802755277, 'critic_loss': 7662.595269881232, 'actor_loss': 22.89140114756678, 'time_step': 0.05593392339055938, 'td_error': 2.0089932362353706, 'init_value': -25.025676727294922, 'ave_value': -25.011676287752806} step=7266
2022-04-22 00:48.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:48.25 [info     ] CQL_20220422004125: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.00038307801836487876, 'time_algorithm_update': 0.05524463942974289, 'temp_loss': 3.546542821591989, 'temp': 0.7021768957893283, 'alpha_loss': -38.90180111482653, 'alpha': 2.2368896048882103, 'critic_loss': 7919.003995156701, 'actor_loss': 24.25306891292506, 'time_step': 0.055707096364456796, 'td_error': 2.091854166510133, 'init_value': -26.50237464904785, 'ave_value': -26.48108639769104} step=7612
2022-04-22 00:48.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:48.45 [info     ] CQL_20220422004125: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.00038834527737832485, 'time_algorithm_update': 0.05532147360674908, 'temp_loss': 3.4900207616001193, 'temp': 0.6911121301912848, 'alpha_loss': -40.40979089902316, 'alpha': 2.323935851885404, 'critic_loss': 8252.899559417901, 'actor_loss': 25.58266296827724, 'time_step': 0.05579461527697613, 'td_error': 2.1662200805037712, 'init_value': -27.735811233520508, 'ave_value': -27.71702168909166} step=7958
2022-04-22 00:48.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:49.05 [info     ] CQL_20220422004125: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.000376114955527245, 'time_algorithm_update': 0.0550095959205848, 'temp_loss': 3.4357163630469, 'temp': 0.6802215138611766, 'alpha_loss': -41.99217912089618, 'alpha': 2.4143781351905336, 'critic_loss': 8645.185572276914, 'actor_loss': 26.892283241183772, 'time_step': 0.05546777785857978, 'td_error': 2.2176181588916912, 'init_value': -28.461463928222656, 'ave_value': -28.468639242733865} step=8304
2022-04-22 00:49.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:49.25 [info     ] CQL_20220422004125: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.000363669643512351, 'time_algorithm_update': 0.054259222366906314, 'temp_loss': 3.3812142941304026, 'temp': 0.6695044225006435, 'alpha_loss': -43.61775977074066, 'alpha': 2.5083463254002476, 'critic_loss': 8951.586809632407, 'actor_loss': 28.061218300306727, 'time_step': 0.054703890932777714, 'td_error': 2.306397866659023, 'init_value': -29.903030395507812, 'ave_value': -29.898139349664767} step=8650
2022-04-22 00:49.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:49.45 [info     ] CQL_20220422004125: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.00037908416262940866, 'time_algorithm_update': 0.05531837347614972, 'temp_loss': 3.3276442199773184, 'temp': 0.6589565709491686, 'alpha_loss': -45.30797080773149, 'alpha': 2.6059617713696697, 'critic_loss': 9280.19203000813, 'actor_loss': 29.189829517651155, 'time_step': 0.055776864117969664, 'td_error': 2.367512063874298, 'init_value': -30.79789924621582, 'ave_value': -30.80022399806479} step=8996
2022-04-22 00:49.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:50.05 [info     ] CQL_20220422004125: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.00037853979650949465, 'time_algorithm_update': 0.055599910675445736, 'temp_loss': 3.2766466636878215, 'temp': 0.6485740959299782, 'alpha_loss': -47.076199272464464, 'alpha': 2.707372293306913, 'critic_loss': 9272.664432238982, 'actor_loss': 30.274243735164575, 'time_step': 0.056063568660978634, 'td_error': 2.44363209713841, 'init_value': -31.900522232055664, 'ave_value': -31.904183432975614} step=9342
2022-04-22 00:50.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:50.25 [info     ] CQL_20220422004125: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.0003856647910410269, 'time_algorithm_update': 0.05527913983846675, 'temp_loss': 3.2243140157247554, 'temp': 0.6383557531530457, 'alpha_loss': -48.89892987157568, 'alpha': 2.8127441681878413, 'critic_loss': 9470.841031995124, 'actor_loss': 31.403246433059604, 'time_step': 0.055750769686836725, 'td_error': 2.5368552597644984, 'init_value': -33.27095413208008, 'ave_value': -33.261875194189614} step=9688
2022-04-22 00:50.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:50.45 [info     ] CQL_20220422004125: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.0003853712467788961, 'time_algorithm_update': 0.05518328867895755, 'temp_loss': 3.1734585954963817, 'temp': 0.6282993248432358, 'alpha_loss': -50.816517692080815, 'alpha': 2.9222129741845104, 'critic_loss': 9498.591712201951, 'actor_loss': 32.50031688447633, 'time_step': 0.055654013777054805, 'td_error': 2.6052231998668325, 'init_value': -34.164554595947266, 'ave_value': -34.16546570518787} step=10034
2022-04-22 00:50.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:51.05 [info     ] CQL_20220422004125: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.00037264892820678005, 'time_algorithm_update': 0.05509021103037575, 'temp_loss': 3.1236323042412026, 'temp': 0.6184023255213148, 'alpha_loss': -52.804425961709434, 'alpha': 3.035980134341069, 'critic_loss': 9561.660192941654, 'actor_loss': 33.60806532402259, 'time_step': 0.05554805463449115, 'td_error': 2.692802802120403, 'init_value': -35.34640121459961, 'ave_value': -35.34262686901717} step=10380
2022-04-22 00:51.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:51.26 [info     ] CQL_20220422004125: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.00037917649814848264, 'time_algorithm_update': 0.05524513349367704, 'temp_loss': 3.074908334395789, 'temp': 0.608659960803269, 'alpha_loss': -54.83756576934991, 'alpha': 3.1541572905689303, 'critic_loss': 9513.528142499097, 'actor_loss': 34.63331846556912, 'time_step': 0.05570816029013926, 'td_error': 2.7390755010095176, 'init_value': -35.854652404785156, 'ave_value': -35.87684644967699} step=10726
2022-04-22 00:51.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:51.45 [info     ] CQL_20220422004125: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.0003739395582607027, 'time_algorithm_update': 0.05469632286556883, 'temp_loss': 3.0269970383947293, 'temp': 0.5990694654470234, 'alpha_loss': -56.98554849349005, 'alpha': 3.276931514629739, 'critic_loss': 7816.19994270457, 'actor_loss': 35.18409791847185, 'time_step': 0.05515624126257924, 'td_error': 2.8018822666340992, 'init_value': -36.709014892578125, 'ave_value': -36.7176974052838} step=11072
2022-04-22 00:51.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:52.04 [info     ] CQL_20220422004125: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.00037026267520265084, 'time_algorithm_update': 0.052110813256633075, 'temp_loss': 2.9778864852265814, 'temp': 0.5896334544771669, 'alpha_loss': -59.1953234038601, 'alpha': 3.4044941243408733, 'critic_loss': 6463.836352397941, 'actor_loss': 36.04372761015258, 'time_step': 0.052568490794628345, 'td_error': 2.884760795645431, 'init_value': -37.77789306640625, 'ave_value': -37.776869876693354} step=11418
2022-04-22 00:52.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:52.23 [info     ] CQL_20220422004125: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.00037281361618483, 'time_algorithm_update': 0.05226723238222861, 'temp_loss': 2.9318364073086336, 'temp': 0.5803459534755332, 'alpha_loss': -61.50314963897529, 'alpha': 3.5370162332678117, 'critic_loss': 5425.6796169391255, 'actor_loss': 36.958852889220836, 'time_step': 0.05272641140601538, 'td_error': 2.949229387592518, 'init_value': -38.54279708862305, 'ave_value': -38.54709949158786} step=11764
2022-04-22 00:52.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:52.43 [info     ] CQL_20220422004125: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.0003717703626335012, 'time_algorithm_update': 0.052281893057630244, 'temp_loss': 2.884628344822481, 'temp': 0.5712046214955391, 'alpha_loss': -63.903449438899926, 'alpha': 3.6747013516508775, 'critic_loss': 5076.439663396405, 'actor_loss': 38.36918244334315, 'time_step': 0.05273875197923252, 'td_error': 3.087135192411291, 'init_value': -40.232269287109375, 'ave_value': -40.2241100747193} step=12110
2022-04-22 00:52.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:53.02 [info     ] CQL_20220422004125: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.0003724752823052379, 'time_algorithm_update': 0.053557554421397306, 'temp_loss': 2.8399197662496842, 'temp': 0.5622079832360923, 'alpha_loss': -66.38414116126265, 'alpha': 3.817734354493246, 'critic_loss': 5472.965553592395, 'actor_loss': 39.77226678484437, 'time_step': 0.05401339903043185, 'td_error': 3.189001890047697, 'init_value': -41.37183380126953, 'ave_value': -41.370298259910555} step=12456
2022-04-22 00:53.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:53.22 [info     ] CQL_20220422004125: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.0003775123915920368, 'time_algorithm_update': 0.05379949690978651, 'temp_loss': 2.7939914754360395, 'temp': 0.5533534961973312, 'alpha_loss': -68.96227405526045, 'alpha': 3.9663391526723872, 'critic_loss': 5683.861330947435, 'actor_loss': 40.93587341749599, 'time_step': 0.054260520576741654, 'td_error': 3.2945326342144887, 'init_value': -42.5794563293457, 'ave_value': -42.56652753024149} step=12802
2022-04-22 00:53.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:53.41 [info     ] CQL_20220422004125: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.00038150900361165837, 'time_algorithm_update': 0.053610281448143754, 'temp_loss': 2.7504940336150243, 'temp': 0.5446397777582179, 'alpha_loss': -71.65703077812415, 'alpha': 4.1207285644002045, 'critic_loss': 5731.624424223266, 'actor_loss': 41.904031533037305, 'time_step': 0.05407412548285688, 'td_error': 3.356095265654539, 'init_value': -43.206485748291016, 'ave_value': -43.21035342010744} step=13148
2022-04-22 00:53.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:54.01 [info     ] CQL_20220422004125: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.00037195778995580066, 'time_algorithm_update': 0.053248654211187636, 'temp_loss': 2.707598943241759, 'temp': 0.5360604991458054, 'alpha_loss': -74.44833753288137, 'alpha': 4.281134634348699, 'critic_loss': 5339.868564848266, 'actor_loss': 42.6288111361465, 'time_step': 0.05370448572787246, 'td_error': 3.438687599675425, 'init_value': -44.14048385620117, 'ave_value': -44.13133858255212} step=13494
2022-04-22 00:54.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:54.20 [info     ] CQL_20220422004125: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.0003722658047097267, 'time_algorithm_update': 0.053505869270060104, 'temp_loss': 2.6648993292295864, 'temp': 0.5276162713938366, 'alpha_loss': -77.35249588806505, 'alpha': 4.44779851533085, 'critic_loss': 4883.4066084492415, 'actor_loss': 43.43448433848475, 'time_step': 0.05395956535559858, 'td_error': 3.5060726155387307, 'init_value': -44.834354400634766, 'ave_value': -44.828385893639485} step=13840
2022-04-22 00:54.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:54.40 [info     ] CQL_20220422004125: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.00037073399979255104, 'time_algorithm_update': 0.05363553107818427, 'temp_loss': 2.622796822145495, 'temp': 0.5193060166229403, 'alpha_loss': -80.3425699575788, 'alpha': 4.620926218914848, 'critic_loss': 4524.789227612446, 'actor_loss': 44.17670876166724, 'time_step': 0.05408902113148243, 'td_error': 3.5818263240682353, 'init_value': -45.63075256347656, 'ave_value': -45.6241883465553} step=14186
2022-04-22 00:54.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:54.59 [info     ] CQL_20220422004125: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.0003756870424127303, 'time_algorithm_update': 0.05358105932356994, 'temp_loss': 2.5816352746390194, 'temp': 0.5111250052217803, 'alpha_loss': -83.47774126350535, 'alpha': 4.800786998230598, 'critic_loss': 4235.559904771044, 'actor_loss': 44.996405154983435, 'time_step': 0.054039791140253146, 'td_error': 3.6594415234963655, 'init_value': -46.43384552001953, 'ave_value': -46.42460624058137} step=14532
2022-04-22 00:54.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:55.19 [info     ] CQL_20220422004125: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.0003741579937797061, 'time_algorithm_update': 0.053692974796184914, 'temp_loss': 2.5401752849534756, 'temp': 0.503075149276353, 'alpha_loss': -86.71886474962179, 'alpha': 4.987647009722759, 'critic_loss': 4032.834190412753, 'actor_loss': 45.80836375462526, 'time_step': 0.05415229990303172, 'td_error': 3.7341920773999324, 'init_value': -47.168819427490234, 'ave_value': -47.1645749981565} step=14878
2022-04-22 00:55.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:55.38 [info     ] CQL_20220422004125: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.00037264892820678005, 'time_algorithm_update': 0.05372992071802216, 'temp_loss': 2.5006964151569875, 'temp': 0.49515180737641507, 'alpha_loss': -90.11827676144638, 'alpha': 5.181786279457842, 'critic_loss': 4066.0106448134934, 'actor_loss': 46.73183047564733, 'time_step': 0.054186700396455095, 'td_error': 3.8247195353905936, 'init_value': -48.08591842651367, 'ave_value': -48.07574320793604} step=15224
2022-04-22 00:55.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:55.58 [info     ] CQL_20220422004125: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.0003833274620805862, 'time_algorithm_update': 0.053608028185849935, 'temp_loss': 2.4617773514951584, 'temp': 0.4873511010856298, 'alpha_loss': -93.61578095717238, 'alpha': 5.383523602017088, 'critic_loss': 4096.953785449783, 'actor_loss': 47.513003244565404, 'time_step': 0.054072284974114745, 'td_error': 3.9137199594075613, 'init_value': -48.97340393066406, 'ave_value': -48.95603685211717} step=15570
2022-04-22 00:55.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:56.17 [info     ] CQL_20220422004125: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.0003701345079896078, 'time_algorithm_update': 0.053864144176417, 'temp_loss': 2.4224477434433953, 'temp': 0.4796758147677934, 'alpha_loss': -97.2927179501925, 'alpha': 5.593108756004731, 'critic_loss': 4050.8150613597363, 'actor_loss': 48.1854416400711, 'time_step': 0.05432472683790791, 'td_error': 3.954838245384319, 'init_value': -49.30681228637695, 'ave_value': -49.30891207264972} step=15916
2022-04-22 00:56.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:56.37 [info     ] CQL_20220422004125: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.0003720942260212981, 'time_algorithm_update': 0.05386228092833061, 'temp_loss': 2.3849685068075366, 'temp': 0.4721197324853412, 'alpha_loss': -101.03847613913476, 'alpha': 5.81083606295503, 'critic_loss': 3757.9949817106212, 'actor_loss': 48.732789397928755, 'time_step': 0.05431487174392435, 'td_error': 4.021201226299308, 'init_value': -49.97593307495117, 'ave_value': -49.97004165034432} step=16262
2022-04-22 00:56.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:56.57 [info     ] CQL_20220422004125: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.0003759151249262639, 'time_algorithm_update': 0.05381205729666473, 'temp_loss': 2.346722488458446, 'temp': 0.46468303710049974, 'alpha_loss': -104.96237266408225, 'alpha': 6.036993084615365, 'critic_loss': 3565.860667675217, 'actor_loss': 49.35634700135689, 'time_step': 0.05426565277783168, 'td_error': 4.092194866281681, 'init_value': -50.65227127075195, 'ave_value': -50.64207504728271} step=16608
2022-04-22 00:56.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:57.17 [info     ] CQL_20220422004125: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.00037992000579833984, 'time_algorithm_update': 0.05668487025134136, 'temp_loss': 2.309964201353878, 'temp': 0.45736297275978705, 'alpha_loss': -109.05928559937229, 'alpha': 6.271968196582243, 'critic_loss': 3669.472990708544, 'actor_loss': 50.19010028949363, 'time_step': 0.05714145009917331, 'td_error': 4.180976892886509, 'init_value': -51.492645263671875, 'ave_value': -51.47854103280108} step=16954
2022-04-22 00:57.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 00:57.38 [info     ] CQL_20220422004125: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.0003808474954153072, 'time_algorithm_update': 0.05691092000531323, 'temp_loss': 2.2733271928191874, 'temp': 0.4501592617675748, 'alpha_loss': -113.29740012725654, 'alpha': 6.516095273067497, 'critic_loss': 3839.2774799324875, 'actor_loss': 50.97860732106115, 'time_step': 0.05737554269029915, 'td_error': 4.271487337237291, 'init_value': -52.3119010925293, 'ave_value': -52.29875445431474} step=17300
2022-04-22 00:57.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422004125/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.519100

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 00:57.40 [info     ] FQE_20220422005738: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00014390715633530216, 'time_algorithm_update': 0.009728572454797217, 'loss': 0.008058748485700014, 'time_step': 0.009936823902359927, 'init_value': 0.15329280495643616, 'ave_value': 0.19920710760193902, 'soft_opc': nan} step=166




2022-04-22 00:57.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:57.42 [info     ] FQE_20220422005738: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00015248591641345657, 'time_algorithm_update': 0.009504820927079901, 'loss': 0.004662653038569962, 'time_step': 0.009721947003559894, 'init_value': 0.04555438458919525, 'ave_value': 0.11834794158191496, 'soft_opc': nan} step=332




2022-04-22 00:57.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:57.43 [info     ] FQE_20220422005738: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00014784968042948158, 'time_algorithm_update': 0.009443886308784944, 'loss': 0.0036132912907513776, 'time_step': 0.009655956762382784, 'init_value': 0.01467776671051979, 'ave_value': 0.0947170757147408, 'soft_opc': nan} step=498




2022-04-22 00:57.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:57.45 [info     ] FQE_20220422005738: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00014563928167504002, 'time_algorithm_update': 0.009059950529810894, 'loss': 0.003162919673423496, 'time_step': 0.009267264101878706, 'init_value': -0.01617591083049774, 'ave_value': 0.07809580766788876, 'soft_opc': nan} step=664




2022-04-22 00:57.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:57.47 [info     ] FQE_20220422005738: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00014794016458902014, 'time_algorithm_update': 0.009694487215524697, 'loss': 0.0027973455768246875, 'time_step': 0.00990778997720006, 'init_value': -0.0598418228328228, 'ave_value': 0.0434391596492984, 'soft_opc': nan} step=830




2022-04-22 00:57.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:57.49 [info     ] FQE_20220422005738: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00014355383723615165, 'time_algorithm_update': 0.00945556594664792, 'loss': 0.002430791272332959, 'time_step': 0.009658095348312194, 'init_value': -0.10408833622932434, 'ave_value': 0.0026586903426964003, 'soft_opc': nan} step=996




2022-04-22 00:57.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:57.50 [info     ] FQE_20220422005738: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.0001495990408472268, 'time_algorithm_update': 0.009809998144586402, 'loss': 0.002201878731503679, 'time_step': 0.010021205408027372, 'init_value': -0.14630094170570374, 'ave_value': -0.032446859552533375, 'soft_opc': nan} step=1162




2022-04-22 00:57.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:57.52 [info     ] FQE_20220422005738: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001431832830589938, 'time_algorithm_update': 0.009126480803432235, 'loss': 0.002015311531834753, 'time_step': 0.009332676967942571, 'init_value': -0.20237241685390472, 'ave_value': -0.07857858746790805, 'soft_opc': nan} step=1328




2022-04-22 00:57.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:57.54 [info     ] FQE_20220422005738: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00014322924326701337, 'time_algorithm_update': 0.009679277259183217, 'loss': 0.0017173372995226185, 'time_step': 0.009887026016970715, 'init_value': -0.22920718789100647, 'ave_value': -0.1019620733331419, 'soft_opc': nan} step=1494




2022-04-22 00:57.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:57.56 [info     ] FQE_20220422005738: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00014597249318318195, 'time_algorithm_update': 0.009734479777784232, 'loss': 0.0016145835695816316, 'time_step': 0.00994724825204137, 'init_value': -0.2661648690700531, 'ave_value': -0.1351986670245727, 'soft_opc': nan} step=1660




2022-04-22 00:57.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:57.57 [info     ] FQE_20220422005738: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.0001522963305553758, 'time_algorithm_update': 0.009836321853729615, 'loss': 0.00140081639808367, 'time_step': 0.010051116885909116, 'init_value': -0.31082683801651, 'ave_value': -0.178371097282968, 'soft_opc': nan} step=1826




2022-04-22 00:57.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:57.59 [info     ] FQE_20220422005738: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00014796314469302994, 'time_algorithm_update': 0.00907139749412077, 'loss': 0.001249664628258289, 'time_step': 0.00928429379520646, 'init_value': -0.35324591398239136, 'ave_value': -0.2162478613436457, 'soft_opc': nan} step=1992




2022-04-22 00:57.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.01 [info     ] FQE_20220422005738: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00014570534947406813, 'time_algorithm_update': 0.009296401437506619, 'loss': 0.0013373464721827262, 'time_step': 0.00950526185782559, 'init_value': -0.40221500396728516, 'ave_value': -0.25931065201901976, 'soft_opc': nan} step=2158




2022-04-22 00:58.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.03 [info     ] FQE_20220422005738: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00014325222337102317, 'time_algorithm_update': 0.009886237512151879, 'loss': 0.0012108076436386209, 'time_step': 0.010091694004564401, 'init_value': -0.45894187688827515, 'ave_value': -0.31742357448800596, 'soft_opc': nan} step=2324




2022-04-22 00:58.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.04 [info     ] FQE_20220422005738: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00014842705554272756, 'time_algorithm_update': 0.0097827624125653, 'loss': 0.001136569834331118, 'time_step': 0.00998950866331537, 'init_value': -0.5050863027572632, 'ave_value': -0.36274489238900176, 'soft_opc': nan} step=2490




2022-04-22 00:58.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.06 [info     ] FQE_20220422005738: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00014599978205669358, 'time_algorithm_update': 0.00904111115329237, 'loss': 0.0012664080876927171, 'time_step': 0.009252258093960315, 'init_value': -0.5840383172035217, 'ave_value': -0.43300563584560076, 'soft_opc': nan} step=2656




2022-04-22 00:58.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.08 [info     ] FQE_20220422005738: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00014635597366884532, 'time_algorithm_update': 0.00923328083681773, 'loss': 0.0013264744207319096, 'time_step': 0.009441941617483116, 'init_value': -0.6170040369033813, 'ave_value': -0.4662369292032242, 'soft_opc': nan} step=2822




2022-04-22 00:58.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.09 [info     ] FQE_20220422005738: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.0001461003200117364, 'time_algorithm_update': 0.009320951369871577, 'loss': 0.0013186956278233599, 'time_step': 0.009532263480037093, 'init_value': -0.6627721786499023, 'ave_value': -0.513861413061753, 'soft_opc': nan} step=2988




2022-04-22 00:58.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.11 [info     ] FQE_20220422005738: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00014793154705001647, 'time_algorithm_update': 0.00939751102263669, 'loss': 0.0013757696135407202, 'time_step': 0.009610885597137084, 'init_value': -0.6988880634307861, 'ave_value': -0.5389711266844995, 'soft_opc': nan} step=3154




2022-04-22 00:58.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.13 [info     ] FQE_20220422005738: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00014271362718329373, 'time_algorithm_update': 0.009122723556426635, 'loss': 0.0014282989591649026, 'time_step': 0.009332039270056299, 'init_value': -0.752349853515625, 'ave_value': -0.5767615678220954, 'soft_opc': nan} step=3320




2022-04-22 00:58.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.14 [info     ] FQE_20220422005738: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.0001452127134943583, 'time_algorithm_update': 0.00855684567646808, 'loss': 0.0014943404151940246, 'time_step': 0.008769069809511483, 'init_value': -0.8111419677734375, 'ave_value': -0.6408289894360948, 'soft_opc': nan} step=3486




2022-04-22 00:58.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.16 [info     ] FQE_20220422005738: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.0001501462545739599, 'time_algorithm_update': 0.009426264877778938, 'loss': 0.001692812110497953, 'time_step': 0.009645854134157479, 'init_value': -0.851345419883728, 'ave_value': -0.6800408631600943, 'soft_opc': nan} step=3652




2022-04-22 00:58.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.18 [info     ] FQE_20220422005738: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.0001496105308992317, 'time_algorithm_update': 0.009404304515884584, 'loss': 0.0017131395234392553, 'time_step': 0.009618980338774532, 'init_value': -0.9013760685920715, 'ave_value': -0.7262355798200981, 'soft_opc': nan} step=3818




2022-04-22 00:58.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.20 [info     ] FQE_20220422005738: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00014543533325195312, 'time_algorithm_update': 0.009348727134336909, 'loss': 0.0018370561159695555, 'time_step': 0.009558788265090391, 'init_value': -0.9147562980651855, 'ave_value': -0.7520465861141749, 'soft_opc': nan} step=3984




2022-04-22 00:58.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.21 [info     ] FQE_20220422005738: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.0001440120030598468, 'time_algorithm_update': 0.008675341146538057, 'loss': 0.0019980613456082427, 'time_step': 0.008881304637495294, 'init_value': -0.9794387221336365, 'ave_value': -0.7875567400190342, 'soft_opc': nan} step=4150




2022-04-22 00:58.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.23 [info     ] FQE_20220422005738: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.0001482992287141731, 'time_algorithm_update': 0.009353018668760737, 'loss': 0.0021775683098127618, 'time_step': 0.009566413350852138, 'init_value': -1.0316827297210693, 'ave_value': -0.8359044656107152, 'soft_opc': nan} step=4316




2022-04-22 00:58.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.25 [info     ] FQE_20220422005738: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00014520553221185524, 'time_algorithm_update': 0.009358091526720897, 'loss': 0.002263991888374068, 'time_step': 0.009567484798201596, 'init_value': -1.0499709844589233, 'ave_value': -0.8503894193081168, 'soft_opc': nan} step=4482




2022-04-22 00:58.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.26 [info     ] FQE_20220422005738: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00014859366129679852, 'time_algorithm_update': 0.009492800896426281, 'loss': 0.0024942773189507873, 'time_step': 0.009707374745104686, 'init_value': -1.056230902671814, 'ave_value': -0.8500303994021001, 'soft_opc': nan} step=4648




2022-04-22 00:58.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.28 [info     ] FQE_20220422005738: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00014766296708440207, 'time_algorithm_update': 0.008259322269853339, 'loss': 0.002404067140066974, 'time_step': 0.008469789861196494, 'init_value': -1.0984845161437988, 'ave_value': -0.8930274677676828, 'soft_opc': nan} step=4814




2022-04-22 00:58.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.30 [info     ] FQE_20220422005738: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00014582599502011953, 'time_algorithm_update': 0.009347025170383683, 'loss': 0.0027337005756043614, 'time_step': 0.00955662669905697, 'init_value': -1.1069095134735107, 'ave_value': -0.9051674921740988, 'soft_opc': nan} step=4980




2022-04-22 00:58.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.31 [info     ] FQE_20220422005738: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00014491828091173288, 'time_algorithm_update': 0.009353924946612623, 'loss': 0.002853850960655766, 'time_step': 0.00956333832568433, 'init_value': -1.153029441833496, 'ave_value': -0.948633231465109, 'soft_opc': nan} step=5146




2022-04-22 00:58.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.33 [info     ] FQE_20220422005738: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.0001475696104118623, 'time_algorithm_update': 0.009478966873812389, 'loss': 0.003104525968027077, 'time_step': 0.00969078311000962, 'init_value': -1.205789566040039, 'ave_value': -0.9979430777410901, 'soft_opc': nan} step=5312




2022-04-22 00:58.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.35 [info     ] FQE_20220422005738: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.0001477879213999553, 'time_algorithm_update': 0.008957390325615206, 'loss': 0.003329209327285391, 'time_step': 0.009168728288397732, 'init_value': -1.2346900701522827, 'ave_value': -1.026072347363004, 'soft_opc': nan} step=5478




2022-04-22 00:58.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.36 [info     ] FQE_20220422005738: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00015185539980968796, 'time_algorithm_update': 0.009309067783585513, 'loss': 0.0034943287098129472, 'time_step': 0.009524786328694907, 'init_value': -1.3218138217926025, 'ave_value': -1.1276790595764445, 'soft_opc': nan} step=5644




2022-04-22 00:58.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.38 [info     ] FQE_20220422005738: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00016475298318518214, 'time_algorithm_update': 0.009525554725922734, 'loss': 0.0036331245297683873, 'time_step': 0.00975891193711614, 'init_value': -1.3863189220428467, 'ave_value': -1.1790277029949803, 'soft_opc': nan} step=5810




2022-04-22 00:58.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.40 [info     ] FQE_20220422005738: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.000151662941438606, 'time_algorithm_update': 0.00930802506136607, 'loss': 0.003855525134643259, 'time_step': 0.009524967297013983, 'init_value': -1.4154462814331055, 'ave_value': -1.1897058829925282, 'soft_opc': nan} step=5976




2022-04-22 00:58.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.42 [info     ] FQE_20220422005738: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00014590929789715503, 'time_algorithm_update': 0.008683305188833949, 'loss': 0.004103303558392316, 'time_step': 0.008893718202430081, 'init_value': -1.4330558776855469, 'ave_value': -1.1924248475421992, 'soft_opc': nan} step=6142




2022-04-22 00:58.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.43 [info     ] FQE_20220422005738: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00015008305928793298, 'time_algorithm_update': 0.009076552218701467, 'loss': 0.004299608681803985, 'time_step': 0.009292829467589596, 'init_value': -1.4113560914993286, 'ave_value': -1.1794166841879823, 'soft_opc': nan} step=6308




2022-04-22 00:58.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.45 [info     ] FQE_20220422005738: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00014745327363531273, 'time_algorithm_update': 0.009476194898766208, 'loss': 0.004422530629538864, 'time_step': 0.009688852781272796, 'init_value': -1.5139143466949463, 'ave_value': -1.2641037430110815, 'soft_opc': nan} step=6474




2022-04-22 00:58.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.47 [info     ] FQE_20220422005738: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.000151033861091338, 'time_algorithm_update': 0.009389018437948572, 'loss': 0.004675654956021806, 'time_step': 0.009605749543890896, 'init_value': -1.5486550331115723, 'ave_value': -1.301788424114856, 'soft_opc': nan} step=6640




2022-04-22 00:58.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.48 [info     ] FQE_20220422005738: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.0001547149865024061, 'time_algorithm_update': 0.009430035051093045, 'loss': 0.0047353948271869825, 'time_step': 0.009650464517524443, 'init_value': -1.582749366760254, 'ave_value': -1.3239355269911792, 'soft_opc': nan} step=6806




2022-04-22 00:58.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.50 [info     ] FQE_20220422005738: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00014451612909156155, 'time_algorithm_update': 0.008747678205191371, 'loss': 0.005064814174947531, 'time_step': 0.008955704160483486, 'init_value': -1.6677582263946533, 'ave_value': -1.397210533480655, 'soft_opc': nan} step=6972




2022-04-22 00:58.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.52 [info     ] FQE_20220422005738: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.0001483868403607104, 'time_algorithm_update': 0.009337659341743193, 'loss': 0.005256793779076964, 'time_step': 0.009551701775516373, 'init_value': -1.6691780090332031, 'ave_value': -1.391537265628912, 'soft_opc': nan} step=7138




2022-04-22 00:58.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.53 [info     ] FQE_20220422005738: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00014573407460408038, 'time_algorithm_update': 0.009416591690247318, 'loss': 0.005237298795998276, 'time_step': 0.009626641330948794, 'init_value': -1.7471520900726318, 'ave_value': -1.4547664062338168, 'soft_opc': nan} step=7304




2022-04-22 00:58.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.55 [info     ] FQE_20220422005738: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.000145517199872488, 'time_algorithm_update': 0.008968699409301022, 'loss': 0.0055418688690301076, 'time_step': 0.00917700974338026, 'init_value': -1.7550873756408691, 'ave_value': -1.4367480190722524, 'soft_opc': nan} step=7470




2022-04-22 00:58.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.57 [info     ] FQE_20220422005738: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00014787696930299322, 'time_algorithm_update': 0.008781539388449795, 'loss': 0.005957868738283673, 'time_step': 0.008994508938617017, 'init_value': -1.8169007301330566, 'ave_value': -1.4864427855372564, 'soft_opc': nan} step=7636




2022-04-22 00:58.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:58.58 [info     ] FQE_20220422005738: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00014631575848682817, 'time_algorithm_update': 0.009295091571578061, 'loss': 0.005963618823092891, 'time_step': 0.009505416973527655, 'init_value': -1.870997667312622, 'ave_value': -1.5386553841701767, 'soft_opc': nan} step=7802




2022-04-22 00:58.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:59.00 [info     ] FQE_20220422005738: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00017151344253356197, 'time_algorithm_update': 0.010426338896693954, 'loss': 0.0062330068448132065, 'time_step': 0.010679400110819253, 'init_value': -1.896721363067627, 'ave_value': -1.5506635552054053, 'soft_opc': nan} step=7968




2022-04-22 00:59.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:59.02 [info     ] FQE_20220422005738: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.0001492586480565818, 'time_algorithm_update': 0.009359862430986151, 'loss': 0.006287308647199141, 'time_step': 0.009573478296578649, 'init_value': -1.9003324508666992, 'ave_value': -1.5332218336159582, 'soft_opc': nan} step=8134




2022-04-22 00:59.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 00:59.04 [info     ] FQE_20220422005738: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00014661737235195665, 'time_algorithm_update': 0.008913215384425887, 'loss': 0.006282428929270862, 'time_step': 0.009124442755457866, 'init_value': -1.98557448387146, 'ave_value': -1.607186448995259, 'soft_opc': nan} step=8300




2022-04-22 00:59.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005738/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

[ 0.00000000e+00  7.95731469e+08  4.27108923e-02  1.24000047e-02
  1.42999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.34732792e-01  6.00000000e-01  3.37421461e-01]
Read chunk # 39 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.38489108e-01  4.94000047e-02
 -1.56000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -7.49080829e-02  7.04145269e-02]
Read chunk # 40 out of 4999
torch.Size([44400, 6])
2022-04-22 00:59.04 [debug    ] RoundIterator is selected.
2022-04-22 00:59.04 [info     ] Directory is created at d3rlpy_logs/FQE_20220422005904
2022-04-22 00:59.04 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 00:59.04 [debug    ] Building models...
2022-04-22 00:59.04 [debug    ] Models have been built.
2022-04-22 00:59.04 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422005904/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size':

Epoch 1/50:   0%|          | 0/355 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 00:59.08 [info     ] FQE_20220422005904: epoch=1 step=355 epoch=1 metrics={'time_sample_batch': 0.00015190755817252145, 'time_algorithm_update': 0.009331316343495544, 'loss': 0.024416118471975057, 'time_step': 0.009550097290898712, 'init_value': -1.0003231763839722, 'ave_value': -0.9762758531088688, 'soft_opc': nan} step=355




2022-04-22 00:59.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_355.pt


Epoch 2/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 00:59.11 [info     ] FQE_20220422005904: epoch=2 step=710 epoch=2 metrics={'time_sample_batch': 0.00015411376953125, 'time_algorithm_update': 0.00897212095663581, 'loss': 0.023073602320862488, 'time_step': 0.009196373442528953, 'init_value': -1.976529836654663, 'ave_value': -1.9410902737343787, 'soft_opc': nan} step=710




2022-04-22 00:59.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_710.pt


Epoch 3/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 00:59.15 [info     ] FQE_20220422005904: epoch=3 step=1065 epoch=3 metrics={'time_sample_batch': 0.00015610372516470895, 'time_algorithm_update': 0.009361244255388286, 'loss': 0.024265168195354268, 'time_step': 0.009581755920195244, 'init_value': -2.37587308883667, 'ave_value': -2.3559802217986747, 'soft_opc': nan} step=1065




2022-04-22 00:59.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_1065.pt


Epoch 4/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 00:59.18 [info     ] FQE_20220422005904: epoch=4 step=1420 epoch=4 metrics={'time_sample_batch': 0.00015665242369745819, 'time_algorithm_update': 0.009184502883696221, 'loss': 0.02935587120255534, 'time_step': 0.009407673419361383, 'init_value': -3.0025763511657715, 'ave_value': -3.127743511701643, 'soft_opc': nan} step=1420




2022-04-22 00:59.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_1420.pt


Epoch 5/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 00:59.22 [info     ] FQE_20220422005904: epoch=5 step=1775 epoch=5 metrics={'time_sample_batch': 0.00015333068202918684, 'time_algorithm_update': 0.009286578944031622, 'loss': 0.03460082650761789, 'time_step': 0.009507024120277083, 'init_value': -3.3023505210876465, 'ave_value': -3.6249698042255876, 'soft_opc': nan} step=1775




2022-04-22 00:59.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_1775.pt


Epoch 6/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 00:59.25 [info     ] FQE_20220422005904: epoch=6 step=2130 epoch=6 metrics={'time_sample_batch': 0.0001570721747170032, 'time_algorithm_update': 0.009193210198845662, 'loss': 0.04425335427037847, 'time_step': 0.009424325781808773, 'init_value': -3.541288375854492, 'ave_value': -4.293551134859887, 'soft_opc': nan} step=2130




2022-04-22 00:59.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_2130.pt


Epoch 7/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 00:59.29 [info     ] FQE_20220422005904: epoch=7 step=2485 epoch=7 metrics={'time_sample_batch': 0.00015580419083716164, 'time_algorithm_update': 0.0094238603618783, 'loss': 0.050273514622953576, 'time_step': 0.009648921456135495, 'init_value': -3.737384557723999, 'ave_value': -4.876235644222073, 'soft_opc': nan} step=2485




2022-04-22 00:59.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_2485.pt


Epoch 8/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 00:59.33 [info     ] FQE_20220422005904: epoch=8 step=2840 epoch=8 metrics={'time_sample_batch': 0.0001540486241730166, 'time_algorithm_update': 0.00920520500398018, 'loss': 0.06707244593072945, 'time_step': 0.009422838184195505, 'init_value': -3.850363254547119, 'ave_value': -5.645055980518211, 'soft_opc': nan} step=2840




2022-04-22 00:59.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_2840.pt


Epoch 9/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 00:59.36 [info     ] FQE_20220422005904: epoch=9 step=3195 epoch=9 metrics={'time_sample_batch': 0.0001535207452908368, 'time_algorithm_update': 0.008807333422378756, 'loss': 0.07806721728452494, 'time_step': 0.009028603325427417, 'init_value': -3.9712157249450684, 'ave_value': -6.326799209617279, 'soft_opc': nan} step=3195




2022-04-22 00:59.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_3195.pt


Epoch 10/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 00:59.39 [info     ] FQE_20220422005904: epoch=10 step=3550 epoch=10 metrics={'time_sample_batch': 0.0001560117157412247, 'time_algorithm_update': 0.007982096201936963, 'loss': 0.09693507260944642, 'time_step': 0.008203471546441736, 'init_value': -4.218573093414307, 'ave_value': -7.113699863377378, 'soft_opc': nan} step=3550




2022-04-22 00:59.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_3550.pt


Epoch 11/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 00:59.42 [info     ] FQE_20220422005904: epoch=11 step=3905 epoch=11 metrics={'time_sample_batch': 0.00015267385563380283, 'time_algorithm_update': 0.008148158436090173, 'loss': 0.10771343081559934, 'time_step': 0.008367153624413719, 'init_value': -4.665462017059326, 'ave_value': -8.092568314666147, 'soft_opc': nan} step=3905




2022-04-22 00:59.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_3905.pt


Epoch 12/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 00:59.45 [info     ] FQE_20220422005904: epoch=12 step=4260 epoch=12 metrics={'time_sample_batch': 0.00015333068202918684, 'time_algorithm_update': 0.008316444343244526, 'loss': 0.12390234146109769, 'time_step': 0.008532289048315773, 'init_value': -4.661689758300781, 'ave_value': -8.450942570410257, 'soft_opc': nan} step=4260




2022-04-22 00:59.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_4260.pt


Epoch 13/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 00:59.49 [info     ] FQE_20220422005904: epoch=13 step=4615 epoch=13 metrics={'time_sample_batch': 0.0001534253778591962, 'time_algorithm_update': 0.00817738049466845, 'loss': 0.1420984033810001, 'time_step': 0.00839925080957547, 'init_value': -5.180474758148193, 'ave_value': -9.197147291019904, 'soft_opc': nan} step=4615




2022-04-22 00:59.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_4615.pt


Epoch 14/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 00:59.52 [info     ] FQE_20220422005904: epoch=14 step=4970 epoch=14 metrics={'time_sample_batch': 0.00015377058109767, 'time_algorithm_update': 0.008308567127711336, 'loss': 0.15758196942403283, 'time_step': 0.008527810137036821, 'init_value': -5.57789945602417, 'ave_value': -9.778221904269229, 'soft_opc': nan} step=4970




2022-04-22 00:59.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_4970.pt


Epoch 15/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 00:59.55 [info     ] FQE_20220422005904: epoch=15 step=5325 epoch=15 metrics={'time_sample_batch': 0.0001507765810254594, 'time_algorithm_update': 0.008047663997596419, 'loss': 0.17679514312513278, 'time_step': 0.008263823683832733, 'init_value': -5.946689605712891, 'ave_value': -10.36871659142592, 'soft_opc': nan} step=5325




2022-04-22 00:59.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_5325.pt


Epoch 16/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 00:59.58 [info     ] FQE_20220422005904: epoch=16 step=5680 epoch=16 metrics={'time_sample_batch': 0.0001531493495887434, 'time_algorithm_update': 0.00835152612605565, 'loss': 0.19714476046427876, 'time_step': 0.008569750315706495, 'init_value': -6.395938873291016, 'ave_value': -10.875700333461339, 'soft_opc': nan} step=5680




2022-04-22 00:59.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_5680.pt


Epoch 17/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:00.01 [info     ] FQE_20220422005904: epoch=17 step=6035 epoch=17 metrics={'time_sample_batch': 0.00016138923000281964, 'time_algorithm_update': 0.00807973431869292, 'loss': 0.21316945590865863, 'time_step': 0.008307791427827216, 'init_value': -6.905506134033203, 'ave_value': -11.494517977934624, 'soft_opc': nan} step=6035




2022-04-22 01:00.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_6035.pt


Epoch 18/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:00.05 [info     ] FQE_20220422005904: epoch=18 step=6390 epoch=18 metrics={'time_sample_batch': 0.0001523937977535624, 'time_algorithm_update': 0.008378890534521828, 'loss': 0.2284575439412409, 'time_step': 0.008598187943579446, 'init_value': -7.253113746643066, 'ave_value': -11.995112877249948, 'soft_opc': nan} step=6390




2022-04-22 01:00.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_6390.pt


Epoch 19/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:00.08 [info     ] FQE_20220422005904: epoch=19 step=6745 epoch=19 metrics={'time_sample_batch': 0.00015691300513039172, 'time_algorithm_update': 0.008008919971089967, 'loss': 0.2508542230142884, 'time_step': 0.00823105489703971, 'init_value': -7.775272369384766, 'ave_value': -12.565746486493826, 'soft_opc': nan} step=6745




2022-04-22 01:00.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_6745.pt


Epoch 20/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:00.11 [info     ] FQE_20220422005904: epoch=20 step=7100 epoch=20 metrics={'time_sample_batch': 0.00015640393109388754, 'time_algorithm_update': 0.008266914394539847, 'loss': 0.24937128830228894, 'time_step': 0.008490084930205009, 'init_value': -7.951210975646973, 'ave_value': -12.715475957656574, 'soft_opc': nan} step=7100




2022-04-22 01:00.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_7100.pt


Epoch 21/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:00.14 [info     ] FQE_20220422005904: epoch=21 step=7455 epoch=21 metrics={'time_sample_batch': 0.00015409294988068056, 'time_algorithm_update': 0.00802562673326949, 'loss': 0.27491212124136133, 'time_step': 0.00824471393101652, 'init_value': -8.430220603942871, 'ave_value': -13.295873521987536, 'soft_opc': nan} step=7455




2022-04-22 01:00.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_7455.pt


Epoch 22/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:00.17 [info     ] FQE_20220422005904: epoch=22 step=7810 epoch=22 metrics={'time_sample_batch': 0.00015329710194762324, 'time_algorithm_update': 0.008333851585925464, 'loss': 0.2904210320267249, 'time_step': 0.008555962334216481, 'init_value': -8.892167091369629, 'ave_value': -13.76141824497526, 'soft_opc': nan} step=7810




2022-04-22 01:00.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_7810.pt


Epoch 23/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:00.20 [info     ] FQE_20220422005904: epoch=23 step=8165 epoch=23 metrics={'time_sample_batch': 0.00015300092562823227, 'time_algorithm_update': 0.008103612443091164, 'loss': 0.30055403083457916, 'time_step': 0.008323281247850875, 'init_value': -9.226506233215332, 'ave_value': -14.19422325885434, 'soft_opc': nan} step=8165




2022-04-22 01:00.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_8165.pt


Epoch 24/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:00.24 [info     ] FQE_20220422005904: epoch=24 step=8520 epoch=24 metrics={'time_sample_batch': 0.00015605536984725736, 'time_algorithm_update': 0.008381589029876279, 'loss': 0.314967316709144, 'time_step': 0.008605770325996506, 'init_value': -9.426358222961426, 'ave_value': -14.541924359577504, 'soft_opc': nan} step=8520




2022-04-22 01:00.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_8520.pt


Epoch 25/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:00.27 [info     ] FQE_20220422005904: epoch=25 step=8875 epoch=25 metrics={'time_sample_batch': 0.00015552413295692123, 'time_algorithm_update': 0.008003191880776849, 'loss': 0.3203593187926101, 'time_step': 0.008227751288615482, 'init_value': -10.105206489562988, 'ave_value': -15.288331591361892, 'soft_opc': nan} step=8875




2022-04-22 01:00.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_8875.pt


Epoch 26/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:00.30 [info     ] FQE_20220422005904: epoch=26 step=9230 epoch=26 metrics={'time_sample_batch': 0.00015333739804549956, 'time_algorithm_update': 0.008341551498628, 'loss': 0.3321961237222586, 'time_step': 0.00856042915666607, 'init_value': -10.632040023803711, 'ave_value': -15.742053014339167, 'soft_opc': nan} step=9230




2022-04-22 01:00.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_9230.pt


Epoch 27/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:00.33 [info     ] FQE_20220422005904: epoch=27 step=9585 epoch=27 metrics={'time_sample_batch': 0.00014995924184020137, 'time_algorithm_update': 0.007985670465818594, 'loss': 0.34268493851725484, 'time_step': 0.008202531975759587, 'init_value': -10.707268714904785, 'ave_value': -15.985085937630103, 'soft_opc': nan} step=9585




2022-04-22 01:00.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_9585.pt


Epoch 28/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:00.36 [info     ] FQE_20220422005904: epoch=28 step=9940 epoch=28 metrics={'time_sample_batch': 0.00015483036847181722, 'time_algorithm_update': 0.00835755039268816, 'loss': 0.34730992224854484, 'time_step': 0.008578874023867325, 'init_value': -11.129216194152832, 'ave_value': -16.4368444971443, 'soft_opc': nan} step=9940




2022-04-22 01:00.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_9940.pt


Epoch 29/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:00.39 [info     ] FQE_20220422005904: epoch=29 step=10295 epoch=29 metrics={'time_sample_batch': 0.00015304256492937115, 'time_algorithm_update': 0.007999012503825443, 'loss': 0.3514140075676038, 'time_step': 0.008215837075676717, 'init_value': -11.400209426879883, 'ave_value': -16.823755244613462, 'soft_opc': nan} step=10295




2022-04-22 01:00.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_10295.pt


Epoch 30/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:00.43 [info     ] FQE_20220422005904: epoch=30 step=10650 epoch=30 metrics={'time_sample_batch': 0.0001521345595238914, 'time_algorithm_update': 0.008348572422081316, 'loss': 0.3521640831420959, 'time_step': 0.008565594444812183, 'init_value': -11.449917793273926, 'ave_value': -17.15184972970489, 'soft_opc': nan} step=10650




2022-04-22 01:00.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_10650.pt


Epoch 31/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:00.46 [info     ] FQE_20220422005904: epoch=31 step=11005 epoch=31 metrics={'time_sample_batch': 0.0001494044988927707, 'time_algorithm_update': 0.00808718439558862, 'loss': 0.35744293563750007, 'time_step': 0.008303162749384491, 'init_value': -12.099523544311523, 'ave_value': -17.945905677170675, 'soft_opc': nan} step=11005




2022-04-22 01:00.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_11005.pt


Epoch 32/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:00.49 [info     ] FQE_20220422005904: epoch=32 step=11360 epoch=32 metrics={'time_sample_batch': 0.0001530506241489464, 'time_algorithm_update': 0.008150156450943209, 'loss': 0.3683879867930647, 'time_step': 0.0083702006810148, 'init_value': -11.897854804992676, 'ave_value': -17.988546872998143, 'soft_opc': nan} step=11360




2022-04-22 01:00.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_11360.pt


Epoch 33/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:00.52 [info     ] FQE_20220422005904: epoch=33 step=11715 epoch=33 metrics={'time_sample_batch': 0.00015233201040348537, 'time_algorithm_update': 0.008219859969448035, 'loss': 0.3711098590970669, 'time_step': 0.008438362202174227, 'init_value': -11.987569808959961, 'ave_value': -18.217221183973237, 'soft_opc': nan} step=11715




2022-04-22 01:00.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_11715.pt


Epoch 34/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:00.55 [info     ] FQE_20220422005904: epoch=34 step=12070 epoch=34 metrics={'time_sample_batch': 0.00015186994848117024, 'time_algorithm_update': 0.008231103252357161, 'loss': 0.3693159535107478, 'time_step': 0.008447791489077285, 'init_value': -11.78159236907959, 'ave_value': -18.16687298400982, 'soft_opc': nan} step=12070




2022-04-22 01:00.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_12070.pt


Epoch 35/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:00.58 [info     ] FQE_20220422005904: epoch=35 step=12425 epoch=35 metrics={'time_sample_batch': 0.00015100626878335443, 'time_algorithm_update': 0.008200684399671957, 'loss': 0.35762351953332694, 'time_step': 0.008417184587935327, 'init_value': -12.051977157592773, 'ave_value': -18.666468952183866, 'soft_opc': nan} step=12425




2022-04-22 01:00.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_12425.pt


Epoch 36/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:01.02 [info     ] FQE_20220422005904: epoch=36 step=12780 epoch=36 metrics={'time_sample_batch': 0.0001509901503442039, 'time_algorithm_update': 0.008099680887141698, 'loss': 0.3553689532716509, 'time_step': 0.008315011145363392, 'init_value': -12.343958854675293, 'ave_value': -19.146540860902697, 'soft_opc': nan} step=12780




2022-04-22 01:01.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_12780.pt


Epoch 37/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:01.05 [info     ] FQE_20220422005904: epoch=37 step=13135 epoch=37 metrics={'time_sample_batch': 0.00015582702529262488, 'time_algorithm_update': 0.008346639552586516, 'loss': 0.3682063111090954, 'time_step': 0.008569517269940444, 'init_value': -12.320244789123535, 'ave_value': -19.583051949118094, 'soft_opc': nan} step=13135




2022-04-22 01:01.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_13135.pt


Epoch 38/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:01.08 [info     ] FQE_20220422005904: epoch=38 step=13490 epoch=38 metrics={'time_sample_batch': 0.000153931093887544, 'time_algorithm_update': 0.008090586057851012, 'loss': 0.36016653548978583, 'time_step': 0.008311007056437747, 'init_value': -11.949609756469727, 'ave_value': -19.401188284611305, 'soft_opc': nan} step=13490




2022-04-22 01:01.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_13490.pt


Epoch 39/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:01.11 [info     ] FQE_20220422005904: epoch=39 step=13845 epoch=39 metrics={'time_sample_batch': 0.0001506321866747359, 'time_algorithm_update': 0.008337830825590751, 'loss': 0.35620527293952836, 'time_step': 0.008554947544151629, 'init_value': -12.091459274291992, 'ave_value': -19.521746645225367, 'soft_opc': nan} step=13845




2022-04-22 01:01.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_13845.pt


Epoch 40/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:01.14 [info     ] FQE_20220422005904: epoch=40 step=14200 epoch=40 metrics={'time_sample_batch': 0.00015008013013383033, 'time_algorithm_update': 0.00807182620948469, 'loss': 0.3600774892592724, 'time_step': 0.00828696976245289, 'init_value': -12.19349193572998, 'ave_value': -19.642111622965015, 'soft_opc': nan} step=14200




2022-04-22 01:01.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_14200.pt


Epoch 41/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:01.18 [info     ] FQE_20220422005904: epoch=41 step=14555 epoch=41 metrics={'time_sample_batch': 0.00015422055419062224, 'time_algorithm_update': 0.008337744188980317, 'loss': 0.3578434635294785, 'time_step': 0.008557431126984072, 'init_value': -11.704496383666992, 'ave_value': -19.352114618607008, 'soft_opc': nan} step=14555




2022-04-22 01:01.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_14555.pt


Epoch 42/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:01.21 [info     ] FQE_20220422005904: epoch=42 step=14910 epoch=42 metrics={'time_sample_batch': 0.00015370006292638645, 'time_algorithm_update': 0.007999526279073367, 'loss': 0.35169947018820635, 'time_step': 0.008220362999069858, 'init_value': -11.984212875366211, 'ave_value': -19.484764751780446, 'soft_opc': nan} step=14910




2022-04-22 01:01.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_14910.pt


Epoch 43/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:01.24 [info     ] FQE_20220422005904: epoch=43 step=15265 epoch=43 metrics={'time_sample_batch': 0.0001522641786387269, 'time_algorithm_update': 0.008264293804974622, 'loss': 0.36002157004943613, 'time_step': 0.008483798067334672, 'init_value': -12.159344673156738, 'ave_value': -19.81780760843628, 'soft_opc': nan} step=15265




2022-04-22 01:01.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_15265.pt


Epoch 44/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:01.27 [info     ] FQE_20220422005904: epoch=44 step=15620 epoch=44 metrics={'time_sample_batch': 0.00015099283675072897, 'time_algorithm_update': 0.008080033853020467, 'loss': 0.3654820956788223, 'time_step': 0.008296419197404889, 'init_value': -12.104487419128418, 'ave_value': -19.907597100688683, 'soft_opc': nan} step=15620




2022-04-22 01:01.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_15620.pt


Epoch 45/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:01.30 [info     ] FQE_20220422005904: epoch=45 step=15975 epoch=45 metrics={'time_sample_batch': 0.00015460605352697238, 'time_algorithm_update': 0.008347336003478143, 'loss': 0.37575230561196804, 'time_step': 0.008567274792093627, 'init_value': -12.218955993652344, 'ave_value': -20.033071999568275, 'soft_opc': nan} step=15975




2022-04-22 01:01.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_15975.pt


Epoch 46/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:01.33 [info     ] FQE_20220422005904: epoch=46 step=16330 epoch=46 metrics={'time_sample_batch': 0.0001452298231527839, 'time_algorithm_update': 0.007928497018948407, 'loss': 0.3811205058660306, 'time_step': 0.008136453091258733, 'init_value': -12.811975479125977, 'ave_value': -20.69733601800877, 'soft_opc': nan} step=16330




2022-04-22 01:01.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_16330.pt


Epoch 47/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:01.37 [info     ] FQE_20220422005904: epoch=47 step=16685 epoch=47 metrics={'time_sample_batch': 0.00013605843127613337, 'time_algorithm_update': 0.008123516029035542, 'loss': 0.4096694954998896, 'time_step': 0.008317467192528953, 'init_value': -12.963194847106934, 'ave_value': -20.66273565691257, 'soft_opc': nan} step=16685




2022-04-22 01:01.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_16685.pt


Epoch 48/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:01.40 [info     ] FQE_20220422005904: epoch=48 step=17040 epoch=48 metrics={'time_sample_batch': 0.00013756953494649537, 'time_algorithm_update': 0.007828627841573366, 'loss': 0.41348141717763853, 'time_step': 0.008024689848993866, 'init_value': -12.990220069885254, 'ave_value': -20.817003411829393, 'soft_opc': nan} step=17040




2022-04-22 01:01.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_17040.pt


Epoch 49/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:01.43 [info     ] FQE_20220422005904: epoch=49 step=17395 epoch=49 metrics={'time_sample_batch': 0.00015609163633534606, 'time_algorithm_update': 0.008317251608405315, 'loss': 0.4099079140737443, 'time_step': 0.008539438247680664, 'init_value': -12.995245933532715, 'ave_value': -20.846989855220112, 'soft_opc': nan} step=17395




2022-04-22 01:01.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_17395.pt


Epoch 50/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 01:01.46 [info     ] FQE_20220422005904: epoch=50 step=17750 epoch=50 metrics={'time_sample_batch': 0.00015002640200332856, 'time_algorithm_update': 0.007934970587072238, 'loss': 0.4306556727291203, 'time_step': 0.008152004027030837, 'init_value': -13.388270378112793, 'ave_value': -21.08927028930939, 'soft_opc': nan} step=17750




2022-04-22 01:01.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422005904/model_17750.pt
search iteration:  9
using hyper params:  [0.0025577782335442683, 0.005851701409385876, 7.229662148356081e-05, 1]
2022-04-22 01:01.46 [debug    ] RoundIterator is selected.
2022-04-22 01:01.46 [info     ] Directory is created at d3rlpy_logs/CQL_20220422010146
2022-04-22 01:01.46 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 01:01.46 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 01:01.46 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422010146/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.0025577782335442683, 'actor_optim_factory': {'opti

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:02.05 [info     ] CQL_20220422010146: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.0002938763943710768, 'time_algorithm_update': 0.051407433658666006, 'temp_loss': 4.832664269243361, 'temp': 0.9869298857416031, 'alpha_loss': -17.660816418642252, 'alpha': 1.017724580847459, 'critic_loss': 25.8187654473189, 'actor_loss': -1.8523842727690074, 'time_step': 0.05178573021309913, 'td_error': 1.2180562605808782, 'init_value': 0.23608636856079102, 'ave_value': 0.4014508358692603} step=346
2022-04-22 01:02.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:02.23 [info     ] CQL_20220422010146: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.0002911559419135827, 'time_algorithm_update': 0.05145823404279058, 'temp_loss': 4.857346281150862, 'temp': 0.962108090606039, 'alpha_loss': -18.345123285503057, 'alpha': 1.054222159647528, 'critic_loss': 31.786552087419985, 'actor_loss': -1.7693649905954483, 'time_step': 0.05183534884039377, 'td_error': 1.1943880080223277, 'init_value': 0.04904969036579132, 'ave_value': 0.3656745248902969} step=692
2022-04-22 01:02.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:02.43 [info     ] CQL_20220422010146: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.0002992539047505814, 'time_algorithm_update': 0.05259909175034892, 'temp_loss': 4.738491379456713, 'temp': 0.9385833166582735, 'alpha_loss': -19.013918363979098, 'alpha': 1.09252599381298, 'critic_loss': 42.41262252895818, 'actor_loss': -1.3935277198435942, 'time_step': 0.05298349278510651, 'td_error': 1.1980583410031576, 'init_value': -0.09705473482608795, 'ave_value': 0.2786702742193366} step=1038
2022-04-22 01:02.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:03.01 [info     ] CQL_20220422010146: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.0002999788074824162, 'time_algorithm_update': 0.05149591933785146, 'temp_loss': 4.623610103750504, 'temp': 0.9159491671992175, 'alpha_loss': -19.716603042073334, 'alpha': 1.132714806609071, 'critic_loss': 55.51878704225397, 'actor_loss': -0.8966668364457312, 'time_step': 0.05188093571304586, 'td_error': 1.2035317012908542, 'init_value': -0.6926815509796143, 'ave_value': -0.2480430843374807} step=1384
2022-04-22 01:03.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:03.19 [info     ] CQL_20220422010146: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.00028881241131380116, 'time_algorithm_update': 0.04977742164810269, 'temp_loss': 4.513881493166003, 'temp': 0.8940739330184253, 'alpha_loss': -20.450535493089973, 'alpha': 1.1748341097997104, 'critic_loss': 70.7432169500803, 'actor_loss': -0.39089455696716474, 'time_step': 0.05015107110745645, 'td_error': 1.213309772744405, 'init_value': -1.3100275993347168, 'ave_value': -0.7482849737347557} step=1730
2022-04-22 01:03.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:03.38 [info     ] CQL_20220422010146: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.00029661476267555547, 'time_algorithm_update': 0.05058619879573756, 'temp_loss': 4.407166256380908, 'temp': 0.8728817101847919, 'alpha_loss': -21.22037882611931, 'alpha': 1.2189245551307766, 'critic_loss': 88.89997367638384, 'actor_loss': 0.015276348918159573, 'time_step': 0.050967011837600974, 'td_error': 1.2061321669566185, 'init_value': -1.4400113821029663, 'ave_value': -0.9087331292625285} step=2076
2022-04-22 01:03.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:03.57 [info     ] CQL_20220422010146: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.0002977179654071786, 'time_algorithm_update': 0.05343745554113664, 'temp_loss': 4.3031184631965065, 'temp': 0.8523161021960264, 'alpha_loss': -22.019720143665467, 'alpha': 1.265022566552796, 'critic_loss': 111.78362073512436, 'actor_loss': 0.27148318479128314, 'time_step': 0.05381970805239815, 'td_error': 1.213914366871618, 'init_value': -1.3289276361465454, 'ave_value': -0.8310527424526305} step=2422
2022-04-22 01:03.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:04.17 [info     ] CQL_20220422010146: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.00030873414408953893, 'time_algorithm_update': 0.05351821535584555, 'temp_loss': 4.201785931008399, 'temp': 0.8323320605162251, 'alpha_loss': -22.8518922232479, 'alpha': 1.3131498525597456, 'critic_loss': 145.68910682545922, 'actor_loss': 0.056460617422368485, 'time_step': 0.05391323704250975, 'td_error': 1.2061305394219983, 'init_value': -1.0245018005371094, 'ave_value': -0.5813805204875778} step=2768
2022-04-22 01:04.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:04.36 [info     ] CQL_20220422010146: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.0002989658730567535, 'time_algorithm_update': 0.053376443124230885, 'temp_loss': 4.10359358374094, 'temp': 0.8128965743359803, 'alpha_loss': -23.72008716164297, 'alpha': 1.3633578674641649, 'critic_loss': 195.65496693870236, 'actor_loss': -0.5820401144143991, 'time_step': 0.0537610612163654, 'td_error': 1.2098066088963384, 'init_value': -0.4231444001197815, 'ave_value': -0.1287312220063126} step=3114
2022-04-22 01:04.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:04.56 [info     ] CQL_20220422010146: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.00030208323043205834, 'time_algorithm_update': 0.05372260071638692, 'temp_loss': 4.008876052205962, 'temp': 0.7939714958212968, 'alpha_loss': -24.622419836893247, 'alpha': 1.4156869264007303, 'critic_loss': 255.8019752722944, 'actor_loss': -1.358897477388382, 'time_step': 0.05410975045551454, 'td_error': 1.2176147484130126, 'init_value': 0.25331616401672363, 'ave_value': 0.4180134489362191} step=3460
2022-04-22 01:04.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:05.15 [info     ] CQL_20220422010146: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.0003049621692282616, 'time_algorithm_update': 0.053431950552615126, 'temp_loss': 3.9155557913587273, 'temp': 0.7755315245231452, 'alpha_loss': -25.56870058092768, 'alpha': 1.4701970670953652, 'critic_loss': 320.2411873878082, 'actor_loss': -2.0217565601271703, 'time_step': 0.053822028154582646, 'td_error': 1.226015218807495, 'init_value': 0.8199803829193115, 'ave_value': 0.9111210514324545} step=3806
2022-04-22 01:05.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:05.35 [info     ] CQL_20220422010146: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.00030241536267230966, 'time_algorithm_update': 0.05352704166676957, 'temp_loss': 3.8257335072996987, 'temp': 0.7575568367291048, 'alpha_loss': -26.555703295448613, 'alpha': 1.526958151704314, 'critic_loss': 382.13258167774, 'actor_loss': -2.6282069938031234, 'time_step': 0.05391501553485848, 'td_error': 1.229137364622786, 'init_value': 1.3402880430221558, 'ave_value': 1.3959633524523012} step=4152
2022-04-22 01:05.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:05.54 [info     ] CQL_20220422010146: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.0003038479413600326, 'time_algorithm_update': 0.053721525765567844, 'temp_loss': 3.7370206283007055, 'temp': 0.7400231723151455, 'alpha_loss': -27.580245690538703, 'alpha': 1.5860230057914821, 'critic_loss': 447.20549699198995, 'actor_loss': -3.157087587896799, 'time_step': 0.05411037268666174, 'td_error': 1.2325168166406582, 'init_value': 1.9873707294464111, 'ave_value': 2.0112422873138422} step=4498
2022-04-22 01:05.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:06.14 [info     ] CQL_20220422010146: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.00030117365666207553, 'time_algorithm_update': 0.05389589932612601, 'temp_loss': 3.650695146852835, 'temp': 0.7229186329193887, 'alpha_loss': -28.649565520314123, 'alpha': 1.6474713576322346, 'critic_loss': 519.5445811541783, 'actor_loss': -3.6581334964388366, 'time_step': 0.05428124576634755, 'td_error': 1.2334372420584812, 'init_value': 2.4756882190704346, 'ave_value': 2.49483661333946} step=4844
2022-04-22 01:06.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:06.33 [info     ] CQL_20220422010146: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.0002977276124017087, 'time_algorithm_update': 0.053609325017543195, 'temp_loss': 3.567487306677537, 'temp': 0.7062230403023648, 'alpha_loss': -29.759996629174733, 'alpha': 1.7113761974207928, 'critic_loss': 604.7176995249841, 'actor_loss': -4.082056088943702, 'time_step': 0.05399422425066115, 'td_error': 1.235332606147063, 'init_value': 2.930178165435791, 'ave_value': 2.9426466717206794} step=5190
2022-04-22 01:06.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:06.53 [info     ] CQL_20220422010146: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.0002964342260636346, 'time_algorithm_update': 0.05390813584961643, 'temp_loss': 3.4845920118982394, 'temp': 0.6899234182228243, 'alpha_loss': -30.915960383553035, 'alpha': 1.777818613314215, 'critic_loss': 702.1857222187725, 'actor_loss': -4.466684781057986, 'time_step': 0.05429034701661568, 'td_error': 1.236186335030569, 'init_value': 3.300952196121216, 'ave_value': 3.3093344452144864} step=5536
2022-04-22 01:06.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:07.13 [info     ] CQL_20220422010146: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.00029706817141847116, 'time_algorithm_update': 0.054243640403527055, 'temp_loss': 3.4046671535238366, 'temp': 0.6740123753602794, 'alpha_loss': -32.11569515955931, 'alpha': 1.8468871075293922, 'critic_loss': 814.1283493482998, 'actor_loss': -4.76098810730642, 'time_step': 0.054626753564514866, 'td_error': 1.2371724825156551, 'init_value': 3.6115639209747314, 'ave_value': 3.6219304315622183} step=5882
2022-04-22 01:07.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:07.32 [info     ] CQL_20220422010146: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.000295528786719879, 'time_algorithm_update': 0.05391399915507763, 'temp_loss': 3.325601955369718, 'temp': 0.6584780877725237, 'alpha_loss': -33.36396124735044, 'alpha': 1.9186739611487857, 'critic_loss': 936.6306062378635, 'actor_loss': -5.042201013234309, 'time_step': 0.05429321355213319, 'td_error': 1.2378993848699955, 'init_value': 3.8894553184509277, 'ave_value': 3.9000849365453236} step=6228
2022-04-22 01:07.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:07.52 [info     ] CQL_20220422010146: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.0002996887085754747, 'time_algorithm_update': 0.05335618857014386, 'temp_loss': 3.249267507150683, 'temp': 0.643305852234019, 'alpha_loss': -34.6587530588139, 'alpha': 1.993278662248843, 'critic_loss': 1071.9802845861182, 'actor_loss': -5.284091728960158, 'time_step': 0.05373827432621421, 'td_error': 1.2363719542862948, 'init_value': 4.113376140594482, 'ave_value': 4.12916311073665} step=6574
2022-04-22 01:07.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:08.11 [info     ] CQL_20220422010146: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.00029377303371539695, 'time_algorithm_update': 0.05434824758871442, 'temp_loss': 3.174187433512914, 'temp': 0.6284865693894425, 'alpha_loss': -36.009964308986774, 'alpha': 2.070805145825954, 'critic_loss': 1222.9058770857794, 'actor_loss': -5.561232475876119, 'time_step': 0.05472079039998137, 'td_error': 1.2401049796431167, 'init_value': 4.454107761383057, 'ave_value': 4.459209206749784} step=6920
2022-04-22 01:08.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:08.31 [info     ] CQL_20220422010146: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.00029911609054300824, 'time_algorithm_update': 0.05521861666199789, 'temp_loss': 3.10104284121122, 'temp': 0.614010960380466, 'alpha_loss': -37.408555973471934, 'alpha': 2.1513655757628425, 'critic_loss': 1392.7354976235097, 'actor_loss': -5.728138375144473, 'time_step': 0.055597523044299525, 'td_error': 1.2410969021247904, 'init_value': 4.661903381347656, 'ave_value': 4.666384066784761} step=7266
2022-04-22 01:08.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:08.52 [info     ] CQL_20220422010146: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.0003020928774265885, 'time_algorithm_update': 0.05535823141219299, 'temp_loss': 3.03060130645774, 'temp': 0.5998711827173399, 'alpha_loss': -38.867528088520025, 'alpha': 2.2350761166886786, 'critic_loss': 1572.638853216447, 'actor_loss': -5.903346775584138, 'time_step': 0.05574225483602182, 'td_error': 1.2434537162875359, 'init_value': 4.934835910797119, 'ave_value': 4.93381325973612} step=7612
2022-04-22 01:08.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:09.12 [info     ] CQL_20220422010146: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.0002993979205974954, 'time_algorithm_update': 0.05550220040227637, 'temp_loss': 2.9602181780545007, 'temp': 0.5860577978495228, 'alpha_loss': -40.37417589308899, 'alpha': 2.322050725104492, 'critic_loss': 1775.6473723836027, 'actor_loss': -6.051418453282704, 'time_step': 0.055885158522280654, 'td_error': 1.2425070565914347, 'init_value': 5.037707805633545, 'ave_value': 5.04376026375591} step=7958
2022-04-22 01:09.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:09.32 [info     ] CQL_20220422010146: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.0002954777954630769, 'time_algorithm_update': 0.05519914489260988, 'temp_loss': 2.891173204245595, 'temp': 0.5725664043357607, 'alpha_loss': -41.950208796242066, 'alpha': 2.4124125759036557, 'critic_loss': 1986.8977509426934, 'actor_loss': -6.210645325611092, 'time_step': 0.055575297057973164, 'td_error': 1.2446814613082815, 'init_value': 5.2665114402771, 'ave_value': 5.267097027689544} step=8304
2022-04-22 01:09.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:09.52 [info     ] CQL_20220422010146: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.0002974836812543042, 'time_algorithm_update': 0.05541677144221488, 'temp_loss': 2.8249095099509796, 'temp': 0.5593881772432713, 'alpha_loss': -43.5810590534541, 'alpha': 2.506303958810134, 'critic_loss': 2204.0436475456104, 'actor_loss': -6.379620166183207, 'time_step': 0.055796023738177526, 'td_error': 1.243396619729433, 'init_value': 5.324184894561768, 'ave_value': 5.333025702462484} step=8650
2022-04-22 01:09.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:10.12 [info     ] CQL_20220422010146: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.00029749126103572075, 'time_algorithm_update': 0.05521458008385807, 'temp_loss': 2.7607396827267774, 'temp': 0.5465098920929639, 'alpha_loss': -45.278846244591506, 'alpha': 2.603854067752816, 'critic_loss': 2435.2787027524387, 'actor_loss': -6.469518773128532, 'time_step': 0.05559084112244534, 'td_error': 1.2467958950953117, 'init_value': 5.540550231933594, 'ave_value': 5.538490694447567} step=8996
2022-04-22 01:10.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:10.32 [info     ] CQL_20220422010146: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.00029660029218376026, 'time_algorithm_update': 0.054657567443186145, 'temp_loss': 2.697214327795657, 'temp': 0.5339277147213158, 'alpha_loss': -47.04660380368977, 'alpha': 2.705204892020694, 'critic_loss': 2716.844463370439, 'actor_loss': -6.431824702058913, 'time_step': 0.05503603419816563, 'td_error': 1.2443063433778765, 'init_value': 5.359316825866699, 'ave_value': 5.364412877908981} step=9342
2022-04-22 01:10.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:10.52 [info     ] CQL_20220422010146: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.0002953199981954056, 'time_algorithm_update': 0.05528754719419975, 'temp_loss': 2.6345549373957464, 'temp': 0.5216372734893954, 'alpha_loss': -48.871519000544026, 'alpha': 2.810513280719691, 'critic_loss': 2988.3080899453576, 'actor_loss': -6.380996720639267, 'time_step': 0.055662640946448884, 'td_error': 1.2425606046219069, 'init_value': 5.298831939697266, 'ave_value': 5.312332647747765} step=9688
2022-04-22 01:10.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:11.12 [info     ] CQL_20220422010146: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.00030613152277951983, 'time_algorithm_update': 0.05500534297413909, 'temp_loss': 2.5742396143819555, 'temp': 0.5096296917840925, 'alpha_loss': -50.77317825493785, 'alpha': 2.9199194322431707, 'critic_loss': 3308.239241583499, 'actor_loss': -6.313110160000751, 'time_step': 0.055394934091953875, 'td_error': 1.244498622817121, 'init_value': 5.313889980316162, 'ave_value': 5.318968214068496} step=10034
2022-04-22 01:11.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:11.32 [info     ] CQL_20220422010146: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.000309889027149002, 'time_algorithm_update': 0.05516807192322836, 'temp_loss': 2.514540488320279, 'temp': 0.4978998923577325, 'alpha_loss': -52.75401571858136, 'alpha': 3.033585820583939, 'critic_loss': 3545.746555218118, 'actor_loss': -6.288477342252786, 'time_step': 0.055557708519731644, 'td_error': 1.244307089770991, 'init_value': 5.345384120941162, 'ave_value': 5.349859318161192} step=10380
2022-04-22 01:11.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:11.52 [info     ] CQL_20220422010146: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.0003122346249618971, 'time_algorithm_update': 0.054762049217444625, 'temp_loss': 2.457054598483047, 'temp': 0.4864397185041725, 'alpha_loss': -54.80337933446631, 'alpha': 3.151682809598184, 'critic_loss': 3737.135229209944, 'actor_loss': -6.315134913935138, 'time_step': 0.05515765523634894, 'td_error': 1.2446404745972974, 'init_value': 5.36583137512207, 'ave_value': 5.370577287741779} step=10726
2022-04-22 01:11.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:12.12 [info     ] CQL_20220422010146: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.00029063155885376683, 'time_algorithm_update': 0.05528829552534688, 'temp_loss': 2.399840884125991, 'temp': 0.4752440828980738, 'alpha_loss': -56.935665130615234, 'alpha': 3.2743627136153295, 'critic_loss': 3591.2516617085894, 'actor_loss': -6.456197259054019, 'time_step': 0.0556617058770505, 'td_error': 1.2471515299948037, 'init_value': 5.581607818603516, 'ave_value': 5.579890217335657} step=11072
2022-04-22 01:12.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:12.32 [info     ] CQL_20220422010146: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.00029606695120045217, 'time_algorithm_update': 0.05504116295390046, 'temp_loss': 2.345351220555388, 'temp': 0.46430533495597065, 'alpha_loss': -59.15292330835596, 'alpha': 3.401829114539086, 'critic_loss': 3470.887823733291, 'actor_loss': -6.478176301614398, 'time_step': 0.05541795802254208, 'td_error': 1.2480917477929918, 'init_value': 5.680822849273682, 'ave_value': 5.676135030161323} step=11418
2022-04-22 01:12.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:12.53 [info     ] CQL_20220422010146: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.0002942167954637825, 'time_algorithm_update': 0.05546191110776339, 'temp_loss': 2.291159859971504, 'temp': 0.45361758940826263, 'alpha_loss': -61.45420683050431, 'alpha': 3.534257155622361, 'critic_loss': 3352.867828192738, 'actor_loss': -6.526756770348962, 'time_step': 0.055834788118483705, 'td_error': 1.2469728057738219, 'init_value': 5.664063930511475, 'ave_value': 5.665991021694873} step=11764
2022-04-22 01:12.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:13.13 [info     ] CQL_20220422010146: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.000295631458304521, 'time_algorithm_update': 0.05589256534686667, 'temp_loss': 2.238340590041497, 'temp': 0.44317681768726064, 'alpha_loss': -63.84535640650402, 'alpha': 3.6718416537852647, 'critic_loss': 3228.283120568777, 'actor_loss': -6.623757744111078, 'time_step': 0.056269183324251563, 'td_error': 1.2478633565404056, 'init_value': 5.7934088706970215, 'ave_value': 5.794101955111754} step=12110
2022-04-22 01:13.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:13.33 [info     ] CQL_20220422010146: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.00030114678289159874, 'time_algorithm_update': 0.05655720881644012, 'temp_loss': 2.1868572503845125, 'temp': 0.4329761661201543, 'alpha_loss': -66.33764511725806, 'alpha': 3.8147788192495447, 'critic_loss': 3036.005525622065, 'actor_loss': -6.78061803779161, 'time_step': 0.056944145632617044, 'td_error': 1.25173211640793, 'init_value': 6.153374671936035, 'ave_value': 6.1468039997837} step=12456
2022-04-22 01:13.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:13.54 [info     ] CQL_20220422010146: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.00030536803206956454, 'time_algorithm_update': 0.05604831193912925, 'temp_loss': 2.13726827864013, 'temp': 0.4230085393424668, 'alpha_loss': -68.92199510783819, 'alpha': 3.9632914190347486, 'critic_loss': 2604.5840789265717, 'actor_loss': -7.030047572417066, 'time_step': 0.05643168901432456, 'td_error': 1.250913054857356, 'init_value': 6.213771820068359, 'ave_value': 6.213843309150143} step=12802
2022-04-22 01:13.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:14.14 [info     ] CQL_20220422010146: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.0003014665118531685, 'time_algorithm_update': 0.056453205257481924, 'temp_loss': 2.0871941161293517, 'temp': 0.4132718815107566, 'alpha_loss': -71.60328885999029, 'alpha': 4.117581637608523, 'critic_loss': 2475.6250945515717, 'actor_loss': -7.161026588065087, 'time_step': 0.05684044250863136, 'td_error': 1.2542698988998242, 'init_value': 6.538039684295654, 'ave_value': 6.531371358222112} step=13148
2022-04-22 01:14.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:14.35 [info     ] CQL_20220422010146: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.00029599459874147627, 'time_algorithm_update': 0.0563442624373243, 'temp_loss': 2.039414676627672, 'temp': 0.4037604710097947, 'alpha_loss': -74.39086389265998, 'alpha': 4.277877367989865, 'critic_loss': 2179.835376188245, 'actor_loss': -7.398766791889433, 'time_step': 0.056721984306511854, 'td_error': 1.256059032655192, 'init_value': 6.7510666847229, 'ave_value': 6.743182900800926} step=13494
2022-04-22 01:14.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:14.55 [info     ] CQL_20220422010146: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.0002950774451900769, 'time_algorithm_update': 0.05440255189906655, 'temp_loss': 1.9920609173058086, 'temp': 0.39446761049976237, 'alpha_loss': -77.2826149119118, 'alpha': 4.4444171017994085, 'critic_loss': 1916.063201022286, 'actor_loss': -7.666825960137252, 'time_step': 0.054779670831095965, 'td_error': 1.2576846315365757, 'init_value': 7.021355152130127, 'ave_value': 7.019942728339241} step=13840
2022-04-22 01:14.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:15.14 [info     ] CQL_20220422010146: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.00029751262223789456, 'time_algorithm_update': 0.0534867689099615, 'temp_loss': 1.9466266390905216, 'temp': 0.3853889161968507, 'alpha_loss': -80.28438945174906, 'alpha': 4.617425432094949, 'critic_loss': 1861.625019051436, 'actor_loss': -7.755958394508141, 'time_step': 0.05386889807750724, 'td_error': 1.2561849600382848, 'init_value': 7.0027546882629395, 'ave_value': 7.004870879949694} step=14186
2022-04-22 01:15.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:15.34 [info     ] CQL_20220422010146: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.0002977193435492543, 'time_algorithm_update': 0.053551609116482594, 'temp_loss': 1.901694791509926, 'temp': 0.3765179756921151, 'alpha_loss': -83.42079025885963, 'alpha': 4.797178092030432, 'critic_loss': 2038.7510739365066, 'actor_loss': -7.794343311662619, 'time_step': 0.053930424541407236, 'td_error': 1.2579237410216275, 'init_value': 7.175201416015625, 'ave_value': 7.176634120127216} step=14532
2022-04-22 01:15.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:15.53 [info     ] CQL_20220422010146: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.00029768695721047464, 'time_algorithm_update': 0.05319911000356509, 'temp_loss': 1.857873365713682, 'temp': 0.36785260368289285, 'alpha_loss': -86.66960161683187, 'alpha': 4.9839280299368625, 'critic_loss': 2150.189904361791, 'actor_loss': -7.960271329549006, 'time_step': 0.053576684411550536, 'td_error': 1.2558183479749983, 'init_value': 7.148593902587891, 'ave_value': 7.1617843529125595} step=14878
2022-04-22 01:15.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:16.13 [info     ] CQL_20220422010146: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.0002995274659526141, 'time_algorithm_update': 0.05355348407877663, 'temp_loss': 1.8156663968383921, 'temp': 0.3593847647395437, 'alpha_loss': -90.03500128067986, 'alpha': 5.177954148694959, 'critic_loss': 2276.577573919572, 'actor_loss': -8.01752136484047, 'time_step': 0.053934616849601616, 'td_error': 1.2600556306227506, 'init_value': 7.444060325622559, 'ave_value': 7.447735704297192} step=15224
2022-04-22 01:16.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:16.31 [info     ] CQL_20220422010146: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.0002928104014754984, 'time_algorithm_update': 0.05091458800211118, 'temp_loss': 1.7735165698679884, 'temp': 0.351111805852438, 'alpha_loss': -93.53794521265637, 'alpha': 5.379534102588719, 'critic_loss': 2309.8228343456467, 'actor_loss': -8.190260428224684, 'time_step': 0.051287883968022516, 'td_error': 1.262126937291925, 'init_value': 7.578833103179932, 'ave_value': 7.577801348237191} step=15570
2022-04-22 01:16.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:16.49 [info     ] CQL_20220422010146: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.0003035040949121376, 'time_algorithm_update': 0.049438590259221246, 'temp_loss': 1.7324917602401249, 'temp': 0.34303060880286157, 'alpha_loss': -97.19283524000576, 'alpha': 5.588964974949126, 'critic_loss': 2433.1809096143425, 'actor_loss': -8.308022598310702, 'time_step': 0.04982468640873198, 'td_error': 1.2621945380537198, 'init_value': 7.695323944091797, 'ave_value': 7.700208294318151} step=15916
2022-04-22 01:16.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:17.07 [info     ] CQL_20220422010146: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.00029654585557176887, 'time_algorithm_update': 0.05007029613318471, 'temp_loss': 1.6929045981065387, 'temp': 0.33513506135844084, 'alpha_loss': -100.96508899313866, 'alpha': 5.806536923943227, 'critic_loss': 2376.9302703328217, 'actor_loss': -8.550490685281037, 'time_step': 0.05044717733570606, 'td_error': 1.266190273976171, 'init_value': 7.996535301208496, 'ave_value': 7.99689607263007} step=16262
2022-04-22 01:17.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:17.25 [info     ] CQL_20220422010146: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.0002947060359006672, 'time_algorithm_update': 0.049877230831653394, 'temp_loss': 1.653853831952707, 'temp': 0.32742139430059863, 'alpha_loss': -104.89976869704407, 'alpha': 6.032558387414569, 'critic_loss': 2280.958906758038, 'actor_loss': -8.762653290191826, 'time_step': 0.05025148047188114, 'td_error': 1.270554372490729, 'init_value': 8.306000709533691, 'ave_value': 8.300256014616357} step=16608
2022-04-22 01:17.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:17.43 [info     ] CQL_20220422010146: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.00030825248343407073, 'time_algorithm_update': 0.04998986982885813, 'temp_loss': 1.6155058121405586, 'temp': 0.31988456638562196, 'alpha_loss': -108.97785345529545, 'alpha': 6.267399650088625, 'critic_loss': 2342.5145073157514, 'actor_loss': -8.854444139954671, 'time_step': 0.050382262709512876, 'td_error': 1.267490942289768, 'init_value': 8.209815979003906, 'ave_value': 8.215740050021479} step=16954
2022-04-22 01:17.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:18.02 [info     ] CQL_20220422010146: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.0002957982134956845, 'time_algorithm_update': 0.05032848140407849, 'temp_loss': 1.5787293718040334, 'temp': 0.3125212372555209, 'alpha_loss': -113.22646197280443, 'alpha': 6.511389325808928, 'critic_loss': 2464.0712587213243, 'actor_loss': -8.887242832624842, 'time_step': 0.050702183921902164, 'td_error': 1.2661556290218334, 'init_value': 8.1971435546875, 'ave_value': 8.208251329144352} step=17300
2022-04-22 01:18.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422010146/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.519100

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 01:18.03 [info     ] FQE_20220422011802: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.0001404385968863246, 'time_algorithm_update': 0.0076573728078819184, 'loss': 0.007108418136296502, 'time_step': 0.007860254092388842, 'init_value': -0.33693602681159973, 'ave_value': -0.30125667997510047, 'soft_opc': nan} step=166




2022-04-22 01:18.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.05 [info     ] FQE_20220422011802: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00014605579606021744, 'time_algorithm_update': 0.007982772516917033, 'loss': 0.004414091373412275, 'time_step': 0.00819563578410321, 'init_value': -0.4079822897911072, 'ave_value': -0.3450039503642836, 'soft_opc': nan} step=332




2022-04-22 01:18.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.06 [info     ] FQE_20220422011802: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.0001419323036469609, 'time_algorithm_update': 0.007793756852667016, 'loss': 0.0037074318542762333, 'time_step': 0.008000595023833126, 'init_value': -0.4295884370803833, 'ave_value': -0.3660326383236024, 'soft_opc': nan} step=498




2022-04-22 01:18.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.08 [info     ] FQE_20220422011802: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00013941454600138837, 'time_algorithm_update': 0.007916650140141866, 'loss': 0.003390173749655979, 'time_step': 0.008118477212377342, 'init_value': -0.48539644479751587, 'ave_value': -0.40360734087524114, 'soft_opc': nan} step=664




2022-04-22 01:18.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.09 [info     ] FQE_20220422011802: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.0001437778932502471, 'time_algorithm_update': 0.007736165839505483, 'loss': 0.0032207448230439193, 'time_step': 0.007939608700304147, 'init_value': -0.5080737471580505, 'ave_value': -0.40208740698861645, 'soft_opc': nan} step=830




2022-04-22 01:18.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.11 [info     ] FQE_20220422011802: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00014834231640919145, 'time_algorithm_update': 0.00795257522399167, 'loss': 0.0029879151654137993, 'time_step': 0.00816821908376303, 'init_value': -0.5445244312286377, 'ave_value': -0.4252491535065142, 'soft_opc': nan} step=996




2022-04-22 01:18.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.12 [info     ] FQE_20220422011802: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00014700803412012308, 'time_algorithm_update': 0.007943332913410232, 'loss': 0.0028904124327194707, 'time_step': 0.008154643587319248, 'init_value': -0.6014243364334106, 'ave_value': -0.46906487019480886, 'soft_opc': nan} step=1162




2022-04-22 01:18.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.13 [info     ] FQE_20220422011802: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00013986409428607985, 'time_algorithm_update': 0.007692079946219203, 'loss': 0.002503651179547186, 'time_step': 0.007897016513778502, 'init_value': -0.6109263300895691, 'ave_value': -0.4654019451490394, 'soft_opc': nan} step=1328




2022-04-22 01:18.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.15 [info     ] FQE_20220422011802: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00014419009886592268, 'time_algorithm_update': 0.007870287780302116, 'loss': 0.0022740395569960667, 'time_step': 0.008084801306207496, 'init_value': -0.6198641061782837, 'ave_value': -0.46685713059059136, 'soft_opc': nan} step=1494




2022-04-22 01:18.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.16 [info     ] FQE_20220422011802: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00014986762081284122, 'time_algorithm_update': 0.007958876081259853, 'loss': 0.002147005217329386, 'time_step': 0.008175074336040452, 'init_value': -0.6609492897987366, 'ave_value': -0.4889851805191856, 'soft_opc': nan} step=1660




2022-04-22 01:18.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.18 [info     ] FQE_20220422011802: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00014504610774028733, 'time_algorithm_update': 0.007908130266580236, 'loss': 0.002020301691786644, 'time_step': 0.008121871086488286, 'init_value': -0.7111629247665405, 'ave_value': -0.5286599498208578, 'soft_opc': nan} step=1826




2022-04-22 01:18.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.19 [info     ] FQE_20220422011802: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00014038401913930135, 'time_algorithm_update': 0.007241595222289304, 'loss': 0.0018291686775852892, 'time_step': 0.007447682231305593, 'init_value': -0.7485756874084473, 'ave_value': -0.5590078879315574, 'soft_opc': nan} step=1992




2022-04-22 01:18.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.21 [info     ] FQE_20220422011802: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00014206731175801842, 'time_algorithm_update': 0.0073564325470522225, 'loss': 0.0018212563566532526, 'time_step': 0.007560817592115287, 'init_value': -0.7975692749023438, 'ave_value': -0.589021037230352, 'soft_opc': nan} step=2158




2022-04-22 01:18.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.22 [info     ] FQE_20220422011802: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.0001445089478090585, 'time_algorithm_update': 0.007457983062927981, 'loss': 0.001821383937326518, 'time_step': 0.007667610444218279, 'init_value': -0.818893313407898, 'ave_value': -0.5893332399159401, 'soft_opc': nan} step=2324




2022-04-22 01:18.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.23 [info     ] FQE_20220422011802: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00014192799487745906, 'time_algorithm_update': 0.00744483269840838, 'loss': 0.001722925662060257, 'time_step': 0.007649368550404009, 'init_value': -0.8509698510169983, 'ave_value': -0.6081199282931314, 'soft_opc': nan} step=2490




2022-04-22 01:18.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.25 [info     ] FQE_20220422011802: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00014227556895060712, 'time_algorithm_update': 0.007484394383717732, 'loss': 0.0017382927520862635, 'time_step': 0.007691535605005471, 'init_value': -0.8983136415481567, 'ave_value': -0.6461183925134104, 'soft_opc': nan} step=2656




2022-04-22 01:18.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.26 [info     ] FQE_20220422011802: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001414253051022449, 'time_algorithm_update': 0.007402773363044463, 'loss': 0.0017427630597691849, 'time_step': 0.0076063799570841965, 'init_value': -0.9361525774002075, 'ave_value': -0.6812608575028879, 'soft_opc': nan} step=2822




2022-04-22 01:18.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.27 [info     ] FQE_20220422011802: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00015394371676157755, 'time_algorithm_update': 0.007596504257385989, 'loss': 0.0017733565862683288, 'time_step': 0.007813729435564524, 'init_value': -0.953557550907135, 'ave_value': -0.6916859318812688, 'soft_opc': nan} step=2988




2022-04-22 01:18.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.29 [info     ] FQE_20220422011802: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00015090747051928416, 'time_algorithm_update': 0.007502281522176352, 'loss': 0.0018044873656205699, 'time_step': 0.007722900574465832, 'init_value': -0.9925928115844727, 'ave_value': -0.720535541567448, 'soft_opc': nan} step=3154




2022-04-22 01:18.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.30 [info     ] FQE_20220422011802: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00014605292354721622, 'time_algorithm_update': 0.007573498300759189, 'loss': 0.001839746960932501, 'time_step': 0.007784103772726403, 'init_value': -1.0299019813537598, 'ave_value': -0.7493151852273726, 'soft_opc': nan} step=3320




2022-04-22 01:18.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.32 [info     ] FQE_20220422011802: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00014374055058123116, 'time_algorithm_update': 0.007345929203263248, 'loss': 0.0018657211919808872, 'time_step': 0.007555415831416486, 'init_value': -1.0610332489013672, 'ave_value': -0.7725462495945058, 'soft_opc': nan} step=3486




2022-04-22 01:18.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.33 [info     ] FQE_20220422011802: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00014099299189556077, 'time_algorithm_update': 0.007496639906641948, 'loss': 0.0019411023016121373, 'time_step': 0.007703605904636613, 'init_value': -1.1216214895248413, 'ave_value': -0.8342714884662413, 'soft_opc': nan} step=3652




2022-04-22 01:18.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.34 [info     ] FQE_20220422011802: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00014870999807334808, 'time_algorithm_update': 0.0075723191341721865, 'loss': 0.0019366742334053684, 'time_step': 0.00779252310833299, 'init_value': -1.1046384572982788, 'ave_value': -0.8136389849355092, 'soft_opc': nan} step=3818




2022-04-22 01:18.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.36 [info     ] FQE_20220422011802: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00014370320791221527, 'time_algorithm_update': 0.007415593388568924, 'loss': 0.0021369322965760344, 'time_step': 0.00763106058879071, 'init_value': -1.1811347007751465, 'ave_value': -0.8799294233389265, 'soft_opc': nan} step=3984




2022-04-22 01:18.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.37 [info     ] FQE_20220422011802: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.0001436960266297122, 'time_algorithm_update': 0.007436937596424517, 'loss': 0.0021077724926156856, 'time_step': 0.007641082786651979, 'init_value': -1.2269439697265625, 'ave_value': -0.9131223016896763, 'soft_opc': nan} step=4150




2022-04-22 01:18.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.39 [info     ] FQE_20220422011802: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00014242924839617257, 'time_algorithm_update': 0.007436862911086485, 'loss': 0.0021109219888752573, 'time_step': 0.0076431107808308425, 'init_value': -1.2672897577285767, 'ave_value': -0.9420510318499428, 'soft_opc': nan} step=4316




2022-04-22 01:18.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.40 [info     ] FQE_20220422011802: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00014934769595961972, 'time_algorithm_update': 0.007400239806577384, 'loss': 0.002195816992007824, 'time_step': 0.007612202540937677, 'init_value': -1.2979977130889893, 'ave_value': -0.9673265401710262, 'soft_opc': nan} step=4482




2022-04-22 01:18.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.41 [info     ] FQE_20220422011802: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00014261021671524966, 'time_algorithm_update': 0.0073754873620458395, 'loss': 0.00217508715479242, 'time_step': 0.007581989449190806, 'init_value': -1.3237388134002686, 'ave_value': -0.9846806385831253, 'soft_opc': nan} step=4648




2022-04-22 01:18.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.43 [info     ] FQE_20220422011802: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.0001444845314485481, 'time_algorithm_update': 0.007433329720094979, 'loss': 0.0024021573409267016, 'time_step': 0.0076516593795224845, 'init_value': -1.40117347240448, 'ave_value': -1.0495276041396029, 'soft_opc': nan} step=4814




2022-04-22 01:18.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.44 [info     ] FQE_20220422011802: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.0001406454178224127, 'time_algorithm_update': 0.007430632430386831, 'loss': 0.002478004711629621, 'time_step': 0.0076348681047738315, 'init_value': -1.4216104745864868, 'ave_value': -1.0575625165088756, 'soft_opc': nan} step=4980




2022-04-22 01:18.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.45 [info     ] FQE_20220422011802: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00014361272375267673, 'time_algorithm_update': 0.007374020944158715, 'loss': 0.0025461112944681354, 'time_step': 0.007585446518587779, 'init_value': -1.471005916595459, 'ave_value': -1.0903204310181978, 'soft_opc': nan} step=5146




2022-04-22 01:18.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.47 [info     ] FQE_20220422011802: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.0001470252691981304, 'time_algorithm_update': 0.007551807955086949, 'loss': 0.0026307036708872094, 'time_step': 0.0077627337122538, 'init_value': -1.5125905275344849, 'ave_value': -1.1180512421034479, 'soft_opc': nan} step=5312




2022-04-22 01:18.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.48 [info     ] FQE_20220422011802: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00014270788215729128, 'time_algorithm_update': 0.007394270724560841, 'loss': 0.002718050900358617, 'time_step': 0.007601980703422822, 'init_value': -1.55327308177948, 'ave_value': -1.1451751059911273, 'soft_opc': nan} step=5478




2022-04-22 01:18.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.50 [info     ] FQE_20220422011802: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00014228993151561324, 'time_algorithm_update': 0.007523291082267302, 'loss': 0.002870476761017926, 'time_step': 0.007727701979947378, 'init_value': -1.5850121974945068, 'ave_value': -1.164933322101563, 'soft_opc': nan} step=5644




2022-04-22 01:18.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.51 [info     ] FQE_20220422011802: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00014065116284841514, 'time_algorithm_update': 0.00732684709939612, 'loss': 0.0028948730072749958, 'time_step': 0.007532335189451654, 'init_value': -1.656497597694397, 'ave_value': -1.225468388736785, 'soft_opc': nan} step=5810




2022-04-22 01:18.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.52 [info     ] FQE_20220422011802: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00014270213713128883, 'time_algorithm_update': 0.007487490952733052, 'loss': 0.003095842971002223, 'time_step': 0.007692628596202436, 'init_value': -1.6743520498275757, 'ave_value': -1.2504898740364625, 'soft_opc': nan} step=5976




2022-04-22 01:18.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.54 [info     ] FQE_20220422011802: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00014300949602241977, 'time_algorithm_update': 0.007416389074670263, 'loss': 0.0032469277224203303, 'time_step': 0.007623505879597491, 'init_value': -1.7135303020477295, 'ave_value': -1.2649592139967927, 'soft_opc': nan} step=6142




2022-04-22 01:18.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.55 [info     ] FQE_20220422011802: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.0001449484422982457, 'time_algorithm_update': 0.00748037430177252, 'loss': 0.0031815067937188074, 'time_step': 0.00769082896680717, 'init_value': -1.7351644039154053, 'ave_value': -1.2845799508395497, 'soft_opc': nan} step=6308




2022-04-22 01:18.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.56 [info     ] FQE_20220422011802: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00014776063252644367, 'time_algorithm_update': 0.007468851215868111, 'loss': 0.0031959687260397517, 'time_step': 0.007681422922984663, 'init_value': -1.7481358051300049, 'ave_value': -1.2792103300491968, 'soft_opc': nan} step=6474




2022-04-22 01:18.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.58 [info     ] FQE_20220422011802: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00014211039945303676, 'time_algorithm_update': 0.007500782070389713, 'loss': 0.00323285327656952, 'time_step': 0.007706528686615358, 'init_value': -1.775491714477539, 'ave_value': -1.300676258472172, 'soft_opc': nan} step=6640




2022-04-22 01:18.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:18.59 [info     ] FQE_20220422011802: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.0001417513353278838, 'time_algorithm_update': 0.007441743310675563, 'loss': 0.003395731464457552, 'time_step': 0.007649681654321142, 'init_value': -1.8085310459136963, 'ave_value': -1.3361064435796695, 'soft_opc': nan} step=6806




2022-04-22 01:18.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:19.01 [info     ] FQE_20220422011802: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00014377214822424464, 'time_algorithm_update': 0.007483108934149684, 'loss': 0.0034968983855362176, 'time_step': 0.007694418171802199, 'init_value': -1.842482089996338, 'ave_value': -1.370097189349634, 'soft_opc': nan} step=6972




2022-04-22 01:19.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:19.02 [info     ] FQE_20220422011802: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00014498578496726164, 'time_algorithm_update': 0.00752732409052102, 'loss': 0.0036054961141395235, 'time_step': 0.007737064936074866, 'init_value': -1.8859981298446655, 'ave_value': -1.3898777572152852, 'soft_opc': nan} step=7138




2022-04-22 01:19.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:19.03 [info     ] FQE_20220422011802: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00014598541949168746, 'time_algorithm_update': 0.007444837007177882, 'loss': 0.003718530311437144, 'time_step': 0.007654941225626382, 'init_value': -1.9628394842147827, 'ave_value': -1.4599106210846085, 'soft_opc': nan} step=7304




2022-04-22 01:19.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:19.05 [info     ] FQE_20220422011802: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00014625112694430063, 'time_algorithm_update': 0.007487917520913733, 'loss': 0.0039112474956950844, 'time_step': 0.007701550621584237, 'init_value': -1.983216643333435, 'ave_value': -1.4648198545516073, 'soft_opc': nan} step=7470




2022-04-22 01:19.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:19.06 [info     ] FQE_20220422011802: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00014259441789374295, 'time_algorithm_update': 0.007561942180955267, 'loss': 0.0038591347603584604, 'time_step': 0.00776911930865552, 'init_value': -2.013638973236084, 'ave_value': -1.488939673527404, 'soft_opc': nan} step=7636




2022-04-22 01:19.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:19.08 [info     ] FQE_20220422011802: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.0001495272280221962, 'time_algorithm_update': 0.00749669017561947, 'loss': 0.003985017301207577, 'time_step': 0.007716841008289751, 'init_value': -2.020582914352417, 'ave_value': -1.488940591304689, 'soft_opc': nan} step=7802




2022-04-22 01:19.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:19.09 [info     ] FQE_20220422011802: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00014328669352703784, 'time_algorithm_update': 0.007522853024034615, 'loss': 0.0040784938376977565, 'time_step': 0.007733537490109363, 'init_value': -2.0228538513183594, 'ave_value': -1.4799762395722371, 'soft_opc': nan} step=7968




2022-04-22 01:19.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:19.10 [info     ] FQE_20220422011802: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00014455203550407686, 'time_algorithm_update': 0.007491867226290415, 'loss': 0.003777527175486054, 'time_step': 0.007700654397527856, 'init_value': -2.05190110206604, 'ave_value': -1.497354819055076, 'soft_opc': nan} step=8134




2022-04-22 01:19.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:19.12 [info     ] FQE_20220422011802: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00014094703168754117, 'time_algorithm_update': 0.007478741278131324, 'loss': 0.0041637412645483774, 'time_step': 0.007689109767775938, 'init_value': -1.9980006217956543, 'ave_value': -1.4548536613717808, 'soft_opc': nan} step=8300




2022-04-22 01:19.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011802/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

start
[ 0.00000000e+00  7.95731469e+08 -4.38489108e-01  4.94000047e-02
 -1.56000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -7.49080829e-02  7.04145269e-02]
Read chunk # 40 out of 4999
torch.Size([44400, 6])
2022-04-22 01:19.12 [debug    ] RoundIterator is selected.
2022-04-22 01:19.12 [info     ] Directory is created at d3rlpy_logs/FQE_20220422011912
2022-04-22 01:19.12 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 01:19.12 [debug    ] Building models...
2022-04-22 01:19.12 [debug    ] Models have been built.
2022-04-22 01:19.12 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422011912/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 01:19.15 [info     ] FQE_20220422011912: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.0001518719418104305, 'time_algorithm_update': 0.007542472939158595, 'loss': 0.024851166180248352, 'time_step': 0.007761681495710861, 'init_value': -1.2559611797332764, 'ave_value': -1.252314731391432, 'soft_opc': nan} step=344




2022-04-22 01:19.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:19.18 [info     ] FQE_20220422011912: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00014788466830586278, 'time_algorithm_update': 0.007453628057657286, 'loss': 0.022442547812906288, 'time_step': 0.0076643394869427344, 'init_value': -2.151902198791504, 'ave_value': -2.154959179622096, 'soft_opc': nan} step=688




2022-04-22 01:19.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:19.21 [info     ] FQE_20220422011912: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.0001540398874948191, 'time_algorithm_update': 0.007731540258540664, 'loss': 0.026242983164698926, 'time_step': 0.007955910854561384, 'init_value': -3.178628921508789, 'ave_value': -3.2127518107090984, 'soft_opc': nan} step=1032




2022-04-22 01:19.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:19.24 [info     ] FQE_20220422011912: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.0001517159994258437, 'time_algorithm_update': 0.00833663690921872, 'loss': 0.028909004953971435, 'time_step': 0.00855516001235607, 'init_value': -3.915825128555298, 'ave_value': -4.014262359448382, 'soft_opc': nan} step=1376




2022-04-22 01:19.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:19.27 [info     ] FQE_20220422011912: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00015100698138392247, 'time_algorithm_update': 0.008029810911001162, 'loss': 0.035257537491879494, 'time_step': 0.008246186860772066, 'init_value': -4.8799943923950195, 'ave_value': -5.067672613236281, 'soft_opc': nan} step=1720




2022-04-22 01:19.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:19.30 [info     ] FQE_20220422011912: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00015259690062944278, 'time_algorithm_update': 0.008385556381802226, 'loss': 0.03950712848238127, 'time_step': 0.008604760086813639, 'init_value': -5.386087894439697, 'ave_value': -5.6510647208304015, 'soft_opc': nan} step=2064




2022-04-22 01:19.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:19.33 [info     ] FQE_20220422011912: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00015310700549635777, 'time_algorithm_update': 0.007800698280334473, 'loss': 0.04671240989985161, 'time_step': 0.008017999488253926, 'init_value': -6.23330020904541, 'ave_value': -6.662572030360634, 'soft_opc': nan} step=2408




2022-04-22 01:19.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:19.36 [info     ] FQE_20220422011912: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00015086559362189713, 'time_algorithm_update': 0.008289263692013053, 'loss': 0.05379787927255208, 'time_step': 0.008506770743880161, 'init_value': -6.501307010650635, 'ave_value': -7.13111959191056, 'soft_opc': nan} step=2752




2022-04-22 01:19.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:19.40 [info     ] FQE_20220422011912: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.000148091898408047, 'time_algorithm_update': 0.00804139916286912, 'loss': 0.06225239422692116, 'time_step': 0.00825747570326162, 'init_value': -6.875359535217285, 'ave_value': -7.766683053782394, 'soft_opc': nan} step=3096




2022-04-22 01:19.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:19.43 [info     ] FQE_20220422011912: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00014977815539337868, 'time_algorithm_update': 0.008347319309101549, 'loss': 0.07105127381442418, 'time_step': 0.008563820012780122, 'init_value': -7.291836261749268, 'ave_value': -8.597917154297098, 'soft_opc': nan} step=3440




2022-04-22 01:19.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:19.46 [info     ] FQE_20220422011912: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00014911210814187693, 'time_algorithm_update': 0.008095821668935377, 'loss': 0.07759600832549379, 'time_step': 0.008312176826388337, 'init_value': -7.476752281188965, 'ave_value': -9.172196315806191, 'soft_opc': nan} step=3784




2022-04-22 01:19.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:19.49 [info     ] FQE_20220422011912: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00015176520791164663, 'time_algorithm_update': 0.008312673069709954, 'loss': 0.08836429678499265, 'time_step': 0.008532001528629037, 'init_value': -7.618109703063965, 'ave_value': -9.893130969168904, 'soft_opc': nan} step=4128




2022-04-22 01:19.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:19.52 [info     ] FQE_20220422011912: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00015509821647821472, 'time_algorithm_update': 0.008290250634038172, 'loss': 0.09583014466929747, 'time_step': 0.008513940628184828, 'init_value': -7.5661091804504395, 'ave_value': -10.351331173702404, 'soft_opc': nan} step=4472




2022-04-22 01:19.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:19.55 [info     ] FQE_20220422011912: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00015227600585582645, 'time_algorithm_update': 0.008231046587921852, 'loss': 0.10271627887498674, 'time_step': 0.008448399776636167, 'init_value': -7.400176048278809, 'ave_value': -10.930703981636881, 'soft_opc': nan} step=4816




2022-04-22 01:19.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:19.58 [info     ] FQE_20220422011912: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00015295868696168412, 'time_algorithm_update': 0.008341059435245603, 'loss': 0.11491900311003245, 'time_step': 0.008562946735426437, 'init_value': -7.291714191436768, 'ave_value': -11.412029702733228, 'soft_opc': nan} step=5160




2022-04-22 01:19.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:20.01 [info     ] FQE_20220422011912: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.0001521318457847418, 'time_algorithm_update': 0.008025009964787683, 'loss': 0.12440077288531114, 'time_step': 0.008242860089900881, 'init_value': -7.627752780914307, 'ave_value': -12.322610038062473, 'soft_opc': nan} step=5504




2022-04-22 01:20.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:20.05 [info     ] FQE_20220422011912: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.0001527355160824088, 'time_algorithm_update': 0.008423498203588087, 'loss': 0.1381297381161604, 'time_step': 0.008642864088679468, 'init_value': -7.349477767944336, 'ave_value': -12.615994242310256, 'soft_opc': nan} step=5848




2022-04-22 01:20.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:20.08 [info     ] FQE_20220422011912: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00015366354654001635, 'time_algorithm_update': 0.008089227731837782, 'loss': 0.14476018834339324, 'time_step': 0.008309952048368232, 'init_value': -7.306881904602051, 'ave_value': -13.11152041314153, 'soft_opc': nan} step=6192




2022-04-22 01:20.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:20.11 [info     ] FQE_20220422011912: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00015489583791688432, 'time_algorithm_update': 0.009291882431784342, 'loss': 0.1592416726540081, 'time_step': 0.009513258240943732, 'init_value': -7.6693267822265625, 'ave_value': -13.680850264967026, 'soft_opc': nan} step=6536




2022-04-22 01:20.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:20.15 [info     ] FQE_20220422011912: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.0001535609711048215, 'time_algorithm_update': 0.009199301863825598, 'loss': 0.1644242376543928, 'time_step': 0.009419755187145499, 'init_value': -7.966860771179199, 'ave_value': -14.202756587089132, 'soft_opc': nan} step=6880




2022-04-22 01:20.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:20.18 [info     ] FQE_20220422011912: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00015495682871618935, 'time_algorithm_update': 0.009373867927595626, 'loss': 0.16947985973295776, 'time_step': 0.009595360173735508, 'init_value': -8.213497161865234, 'ave_value': -14.752890804257277, 'soft_opc': nan} step=7224




2022-04-22 01:20.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:20.21 [info     ] FQE_20220422011912: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00015398859977722168, 'time_algorithm_update': 0.008940460376961286, 'loss': 0.1721672500527009, 'time_step': 0.009159481109574784, 'init_value': -8.289466857910156, 'ave_value': -15.039406699165367, 'soft_opc': nan} step=7568




2022-04-22 01:20.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:20.25 [info     ] FQE_20220422011912: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.0001567823942317519, 'time_algorithm_update': 0.009391282880029012, 'loss': 0.17471581679054124, 'time_step': 0.009613256121790686, 'init_value': -8.394243240356445, 'ave_value': -15.38532054819175, 'soft_opc': nan} step=7912




2022-04-22 01:20.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:20.28 [info     ] FQE_20220422011912: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.0001555833705635958, 'time_algorithm_update': 0.008851813715557719, 'loss': 0.17855042175303193, 'time_step': 0.009074848058611848, 'init_value': -8.547297477722168, 'ave_value': -15.604387657028447, 'soft_opc': nan} step=8256




2022-04-22 01:20.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:20.32 [info     ] FQE_20220422011912: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00015383057816084042, 'time_algorithm_update': 0.009411274693733039, 'loss': 0.1880340536888472, 'time_step': 0.00963242109431777, 'init_value': -8.896206855773926, 'ave_value': -16.142441366150816, 'soft_opc': nan} step=8600




2022-04-22 01:20.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:20.35 [info     ] FQE_20220422011912: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00014975389768910963, 'time_algorithm_update': 0.00894385437632716, 'loss': 0.20131449049425332, 'time_step': 0.009158785260000895, 'init_value': -9.298234939575195, 'ave_value': -16.634103701715482, 'soft_opc': nan} step=8944




2022-04-22 01:20.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:20.39 [info     ] FQE_20220422011912: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00015667080879211426, 'time_algorithm_update': 0.009450195833694103, 'loss': 0.21041102819469606, 'time_step': 0.009673965531726217, 'init_value': -9.480361938476562, 'ave_value': -16.789687387038416, 'soft_opc': nan} step=9288




2022-04-22 01:20.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:20.42 [info     ] FQE_20220422011912: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00015457009160241416, 'time_algorithm_update': 0.009061872959136963, 'loss': 0.22239155000140673, 'time_step': 0.009282336678615836, 'init_value': -9.727973937988281, 'ave_value': -17.253161684621904, 'soft_opc': nan} step=9632




2022-04-22 01:20.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:20.46 [info     ] FQE_20220422011912: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00015659942183383676, 'time_algorithm_update': 0.009408428918483645, 'loss': 0.22921083354239546, 'time_step': 0.009632999813833903, 'init_value': -9.625250816345215, 'ave_value': -17.40963194314097, 'soft_opc': nan} step=9976




2022-04-22 01:20.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:20.49 [info     ] FQE_20220422011912: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.0001554107943246531, 'time_algorithm_update': 0.00888601915780888, 'loss': 0.2339508406287276, 'time_step': 0.009108015271120293, 'init_value': -9.838729858398438, 'ave_value': -17.62209138322647, 'soft_opc': nan} step=10320




2022-04-22 01:20.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:20.53 [info     ] FQE_20220422011912: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00015010944632596747, 'time_algorithm_update': 0.009393450825713402, 'loss': 0.2524725406073294, 'time_step': 0.00961370870124462, 'init_value': -9.823213577270508, 'ave_value': -17.600848474730487, 'soft_opc': nan} step=10664




2022-04-22 01:20.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:20.56 [info     ] FQE_20220422011912: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00015446820924448412, 'time_algorithm_update': 0.009073658044948134, 'loss': 0.2646677151625586, 'time_step': 0.009298465972722963, 'init_value': -9.853677749633789, 'ave_value': -17.535227142073012, 'soft_opc': nan} step=11008




2022-04-22 01:20.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:21.00 [info     ] FQE_20220422011912: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00015213739040286043, 'time_algorithm_update': 0.009316613507825275, 'loss': 0.280041947045775, 'time_step': 0.009535653646602187, 'init_value': -10.02418327331543, 'ave_value': -17.803780077340768, 'soft_opc': nan} step=11352




2022-04-22 01:21.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:21.03 [info     ] FQE_20220422011912: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00015131886615309605, 'time_algorithm_update': 0.008879667797753977, 'loss': 0.2915803924105455, 'time_step': 0.009100532808969186, 'init_value': -10.518306732177734, 'ave_value': -18.365885470763924, 'soft_opc': nan} step=11696




2022-04-22 01:21.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:21.07 [info     ] FQE_20220422011912: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00015486880790355594, 'time_algorithm_update': 0.009364700594613718, 'loss': 0.31534868228617446, 'time_step': 0.00958790612775226, 'init_value': -10.636474609375, 'ave_value': -18.719037038436042, 'soft_opc': nan} step=12040




2022-04-22 01:21.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:21.10 [info     ] FQE_20220422011912: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.0001525698706161144, 'time_algorithm_update': 0.009237627650416174, 'loss': 0.32524035478194874, 'time_step': 0.009456303923629051, 'init_value': -10.410143852233887, 'ave_value': -18.651321653548948, 'soft_opc': nan} step=12384




2022-04-22 01:21.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:21.14 [info     ] FQE_20220422011912: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00015616001084793444, 'time_algorithm_update': 0.009256870940674183, 'loss': 0.3444334437615823, 'time_step': 0.009480577568675196, 'init_value': -10.785664558410645, 'ave_value': -19.172173111343355, 'soft_opc': nan} step=12728




2022-04-22 01:21.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:21.17 [info     ] FQE_20220422011912: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.0001539158266644145, 'time_algorithm_update': 0.008993574353151543, 'loss': 0.3543200304536799, 'time_step': 0.009214566197506217, 'init_value': -10.452882766723633, 'ave_value': -18.636803397273663, 'soft_opc': nan} step=13072




2022-04-22 01:21.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:21.21 [info     ] FQE_20220422011912: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00015520356422246887, 'time_algorithm_update': 0.009230230436768643, 'loss': 0.37517685912073007, 'time_step': 0.009455596984818924, 'init_value': -10.310138702392578, 'ave_value': -18.531750132717605, 'soft_opc': nan} step=13416




2022-04-22 01:21.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:21.24 [info     ] FQE_20220422011912: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00015253313752107843, 'time_algorithm_update': 0.009370200162710146, 'loss': 0.38533536593299794, 'time_step': 0.009589886942575144, 'init_value': -10.561402320861816, 'ave_value': -18.79683975987556, 'soft_opc': nan} step=13760




2022-04-22 01:21.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:21.28 [info     ] FQE_20220422011912: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00015230927356453829, 'time_algorithm_update': 0.009218668521836747, 'loss': 0.39051775503795333, 'time_step': 0.009437851434530214, 'init_value': -10.821435928344727, 'ave_value': -18.998629252908348, 'soft_opc': nan} step=14104




2022-04-22 01:21.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:21.31 [info     ] FQE_20220422011912: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.000153632358063099, 'time_algorithm_update': 0.009160499933154084, 'loss': 0.42010856163146537, 'time_step': 0.009383019319800444, 'init_value': -10.769553184509277, 'ave_value': -19.1155306073386, 'soft_opc': nan} step=14448




2022-04-22 01:21.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:21.34 [info     ] FQE_20220422011912: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.0001514401546744413, 'time_algorithm_update': 0.0090926282627638, 'loss': 0.42933421724118553, 'time_step': 0.009311461171438528, 'init_value': -10.589651107788086, 'ave_value': -18.840667417360184, 'soft_opc': nan} step=14792




2022-04-22 01:21.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:21.38 [info     ] FQE_20220422011912: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00015388671741929164, 'time_algorithm_update': 0.009398858907610871, 'loss': 0.44616514813631425, 'time_step': 0.009621647208235985, 'init_value': -10.44680404663086, 'ave_value': -18.861254362224365, 'soft_opc': nan} step=15136




2022-04-22 01:21.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:21.41 [info     ] FQE_20220422011912: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.0001529420531073282, 'time_algorithm_update': 0.00904629674068717, 'loss': 0.4593684910579996, 'time_step': 0.009266493625419085, 'init_value': -10.717423439025879, 'ave_value': -18.81183667684683, 'soft_opc': nan} step=15480




2022-04-22 01:21.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:21.45 [info     ] FQE_20220422011912: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00015273274377334948, 'time_algorithm_update': 0.009158225946648176, 'loss': 0.4731274592787634, 'time_step': 0.00937903273937314, 'init_value': -10.760021209716797, 'ave_value': -18.85598042452863, 'soft_opc': nan} step=15824




2022-04-22 01:21.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:21.48 [info     ] FQE_20220422011912: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.000153416117956472, 'time_algorithm_update': 0.009110616390095201, 'loss': 0.4929496660168001, 'time_step': 0.009329617716545282, 'init_value': -10.830995559692383, 'ave_value': -18.933417651399566, 'soft_opc': nan} step=16168




2022-04-22 01:21.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:21.52 [info     ] FQE_20220422011912: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00015544198280157044, 'time_algorithm_update': 0.00935490602670714, 'loss': 0.5019339101746418, 'time_step': 0.009578976520272188, 'init_value': -11.084185600280762, 'ave_value': -19.028839525544328, 'soft_opc': nan} step=16512




2022-04-22 01:21.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:21.55 [info     ] FQE_20220422011912: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.0001533932464067326, 'time_algorithm_update': 0.009196023608362951, 'loss': 0.5221575298346579, 'time_step': 0.009416530298632245, 'init_value': -10.808332443237305, 'ave_value': -18.89293005408862, 'soft_opc': nan} step=16856




2022-04-22 01:21.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:21.59 [info     ] FQE_20220422011912: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00015599644461343455, 'time_algorithm_update': 0.009104444537051889, 'loss': 0.5266210629962125, 'time_step': 0.0093305415885393, 'init_value': -11.176867485046387, 'ave_value': -19.018722337430663, 'soft_opc': nan} step=17200




2022-04-22 01:21.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422011912/model_17200.pt
search iteration:  10
using hyper params:  [0.0042193474820164155, 0.002983850073262814, 7.281155503866694e-05, 1]
2022-04-22 01:21.59 [debug    ] RoundIterator is selected.
2022-04-22 01:21.59 [info     ] Directory is created at d3rlpy_logs/CQL_20220422012159
2022-04-22 01:21.59 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 01:21.59 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 01:21.59 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422012159/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.0042193474820164155, 'actor_optim_factory': {'opt

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:22.19 [info     ] CQL_20220422012159: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.00030140656267287416, 'time_algorithm_update': 0.05555419081208334, 'temp_loss': 4.896023894321023, 'temp': 0.9869960875524951, 'alpha_loss': -17.661567125706313, 'alpha': 1.0177413566264113, 'critic_loss': 26.686941659519437, 'actor_loss': -1.9080426183050079, 'time_step': 0.05593942906815193, 'td_error': 1.2159221220291099, 'init_value': 0.2655371129512787, 'ave_value': 0.4513043081256282} step=346
2022-04-22 01:22.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:22.40 [info     ] CQL_20220422012159: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.0003159735244133569, 'time_algorithm_update': 0.05605121844076697, 'temp_loss': 4.8561533806640975, 'temp': 0.9622834808909135, 'alpha_loss': -18.337596860235138, 'alpha': 1.0542306865570863, 'critic_loss': 30.97557973034809, 'actor_loss': -1.8120184971417994, 'time_step': 0.056453505692454435, 'td_error': 1.208931640879127, 'init_value': -0.07605674117803574, 'ave_value': 0.22374745697955942} step=692
2022-04-22 01:22.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:23.00 [info     ] CQL_20220422012159: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.0002977758473743593, 'time_algorithm_update': 0.05702179773694518, 'temp_loss': 4.737943590031883, 'temp': 0.9387075430740511, 'alpha_loss': -19.003111017921757, 'alpha': 1.0925214779859334, 'critic_loss': 40.85936918975301, 'actor_loss': -1.4123334686535631, 'time_step': 0.05740546215476328, 'td_error': 1.2074691364534713, 'init_value': -0.4717085063457489, 'ave_value': -0.06521579058052183} step=1038
2022-04-22 01:23.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:23.21 [info     ] CQL_20220422012159: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.0002992297872642561, 'time_algorithm_update': 0.05701056036645966, 'temp_loss': 4.623143841076448, 'temp': 0.915972183033221, 'alpha_loss': -19.705033533834996, 'alpha': 1.1327061670363983, 'critic_loss': 53.626518227461446, 'actor_loss': -0.9815596631496628, 'time_step': 0.057396853590287226, 'td_error': 1.204004479808047, 'init_value': -0.6003909111022949, 'ave_value': -0.18013679640391472} step=1384
2022-04-22 01:23.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:23.42 [info     ] CQL_20220422012159: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.00030361710256234756, 'time_algorithm_update': 0.057046953653324545, 'temp_loss': 4.512198169796453, 'temp': 0.893981673641701, 'alpha_loss': -20.44096029424943, 'alpha': 1.174825891593977, 'critic_loss': 69.02610636584332, 'actor_loss': -0.5561755380749358, 'time_step': 0.0574370464148549, 'td_error': 1.2115020656718873, 'init_value': -1.0771409273147583, 'ave_value': -0.571388083242053} step=1730
2022-04-22 01:23.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:24.03 [info     ] CQL_20220422012159: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.0002992325435484076, 'time_algorithm_update': 0.05731886590836365, 'temp_loss': 4.406295559998882, 'temp': 0.8726664209641473, 'alpha_loss': -21.205051008676516, 'alpha': 1.2189099836900743, 'critic_loss': 87.43275162801577, 'actor_loss': -0.2435134546562082, 'time_step': 0.05770544509667193, 'td_error': 1.2112032514926652, 'init_value': -1.276505470275879, 'ave_value': -0.7939902963218332} step=2076
2022-04-22 01:24.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:24.23 [info     ] CQL_20220422012159: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.00030832690310616026, 'time_algorithm_update': 0.057177364481666874, 'temp_loss': 4.302401356614394, 'temp': 0.8519761617473095, 'alpha_loss': -22.005923414505975, 'alpha': 1.2649987401989844, 'critic_loss': 110.60937041354318, 'actor_loss': -0.10167571668367768, 'time_step': 0.05757415225740113, 'td_error': 1.2095397362306943, 'init_value': -1.1411887407302856, 'ave_value': -0.6998573734557928} step=2422
2022-04-22 01:24.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:24.44 [info     ] CQL_20220422012159: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.00030623488343519973, 'time_algorithm_update': 0.05719810965433286, 'temp_loss': 4.201125816113687, 'temp': 0.8318719608935318, 'alpha_loss': -22.84261248153069, 'alpha': 1.3131277709338016, 'critic_loss': 141.40081112371016, 'actor_loss': -0.2651266792810635, 'time_step': 0.05759266552897547, 'td_error': 1.2099739713087216, 'init_value': -0.7609565854072571, 'ave_value': -0.4122510405302387} step=2768
2022-04-22 01:24.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:25.05 [info     ] CQL_20220422012159: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.0002989197052972165, 'time_algorithm_update': 0.05706383313746811, 'temp_loss': 4.102289551255331, 'temp': 0.8123154352510595, 'alpha_loss': -23.71114792024469, 'alpha': 1.3633407778133546, 'critic_loss': 180.85216244383355, 'actor_loss': -0.6996863965871017, 'time_step': 0.057449553054192165, 'td_error': 1.2117447346531969, 'init_value': -0.37556883692741394, 'ave_value': -0.0673622288484605} step=3114
2022-04-22 01:25.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:25.26 [info     ] CQL_20220422012159: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.0003051985205942496, 'time_algorithm_update': 0.05769765928301508, 'temp_loss': 4.006903270076465, 'temp': 0.7932798619559734, 'alpha_loss': -24.617614084585554, 'alpha': 1.4156788966559262, 'critic_loss': 226.20809676330214, 'actor_loss': -1.223348764325842, 'time_step': 0.058090796360390724, 'td_error': 1.2157993829564202, 'init_value': 0.07645920664072037, 'ave_value': 0.29667559186108816} step=3460
2022-04-22 01:25.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:25.46 [info     ] CQL_20220422012159: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.00030311683698885705, 'time_algorithm_update': 0.056288227180525056, 'temp_loss': 3.9124072783255164, 'temp': 0.7747378049558298, 'alpha_loss': -25.566123582035132, 'alpha': 1.4702033155915366, 'critic_loss': 271.26581026639553, 'actor_loss': -1.712033313133813, 'time_step': 0.05667696936282119, 'td_error': 1.2217210855640586, 'init_value': 0.4647177755832672, 'ave_value': 0.6473037247524379} step=3806
2022-04-22 01:25.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:26.06 [info     ] CQL_20220422012159: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.00029948887797449366, 'time_algorithm_update': 0.054217956658732684, 'temp_loss': 3.8221501704585346, 'temp': 0.756662936741217, 'alpha_loss': -26.55363571299294, 'alpha': 1.5269731751756173, 'critic_loss': 314.75225750697143, 'actor_loss': -2.18044750504411, 'time_step': 0.054603221099500714, 'td_error': 1.2252279646349982, 'init_value': 0.9261269569396973, 'ave_value': 1.0350914229822137} step=4152
2022-04-22 01:26.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:26.26 [info     ] CQL_20220422012159: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.0002942312659555777, 'time_algorithm_update': 0.05369188744208716, 'temp_loss': 3.732589845023403, 'temp': 0.7390373478390578, 'alpha_loss': -27.578837174211625, 'alpha': 1.5860505669103193, 'critic_loss': 358.60945446918464, 'actor_loss': -2.6595622149506055, 'time_step': 0.05407054851509932, 'td_error': 1.2293284160991946, 'init_value': 1.4187004566192627, 'ave_value': 1.4880899866930735} step=4498
2022-04-22 01:26.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:26.45 [info     ] CQL_20220422012159: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.0002987205637672733, 'time_algorithm_update': 0.05324230511064474, 'temp_loss': 3.645229478102888, 'temp': 0.7218479858657528, 'alpha_loss': -28.649662050897675, 'alpha': 1.647504413058992, 'critic_loss': 406.14254919504157, 'actor_loss': -3.1246453902624935, 'time_step': 0.05362636160988339, 'td_error': 1.2316938922409328, 'init_value': 1.9149210453033447, 'ave_value': 1.95503558239817} step=4844
2022-04-22 01:26.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:27.05 [info     ] CQL_20220422012159: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.0003047712965507728, 'time_algorithm_update': 0.054063185791059724, 'temp_loss': 3.5609777139101415, 'temp': 0.7050723507569704, 'alpha_loss': -29.76637522747062, 'alpha': 1.7114249057852464, 'critic_loss': 456.6280209756311, 'actor_loss': -3.5904065935597944, 'time_step': 0.0544545891657041, 'td_error': 1.2342943372118225, 'init_value': 2.446446180343628, 'ave_value': 2.4709736836810086} step=5190
2022-04-22 01:27.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:27.24 [info     ] CQL_20220422012159: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.00030066167688094124, 'time_algorithm_update': 0.054338529619867404, 'temp_loss': 3.4781839771766885, 'temp': 0.6886971666978273, 'alpha_loss': -30.914598911483854, 'alpha': 1.7778720421598138, 'critic_loss': 514.1752609517533, 'actor_loss': -4.009019343839215, 'time_step': 0.05472790161309215, 'td_error': 1.2348221785975169, 'init_value': 2.840651512145996, 'ave_value': 2.8569469330603616} step=5536
2022-04-22 01:27.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:27.44 [info     ] CQL_20220422012159: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.000303112013491592, 'time_algorithm_update': 0.054212928507369376, 'temp_loss': 3.39791984640794, 'temp': 0.6727123007264441, 'alpha_loss': -32.11948182541511, 'alpha': 1.8469443369463, 'critic_loss': 582.8462591446893, 'actor_loss': -4.418136537419579, 'time_step': 0.05460341403939131, 'td_error': 1.2366548171959508, 'init_value': 3.3271093368530273, 'ave_value': 3.3385888583629604} step=5882
2022-04-22 01:27.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:28.04 [info     ] CQL_20220422012159: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.00029905200693648677, 'time_algorithm_update': 0.054224120399166395, 'temp_loss': 3.3189629240532144, 'temp': 0.6571040143167353, 'alpha_loss': -33.36731921730703, 'alpha': 1.9187414887323544, 'critic_loss': 662.6781810253342, 'actor_loss': -4.7856120498194175, 'time_step': 0.05461118538255636, 'td_error': 1.236570748776456, 'init_value': 3.6323082447052, 'ave_value': 3.6469376485570453} step=6228
2022-04-22 01:28.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:28.23 [info     ] CQL_20220422012159: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.0003043557867149397, 'time_algorithm_update': 0.053142883185017316, 'temp_loss': 3.2419118088793892, 'temp': 0.6418648102035412, 'alpha_loss': -34.65815407416724, 'alpha': 1.9933473030266735, 'critic_loss': 753.8619144858652, 'actor_loss': -5.096186861137435, 'time_step': 0.05353438647496218, 'td_error': 1.237552939230961, 'init_value': 3.9497292041778564, 'ave_value': 3.9648672007338703} step=6574
2022-04-22 01:28.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:28.42 [info     ] CQL_20220422012159: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.0003053955949110792, 'time_algorithm_update': 0.05253392630229796, 'temp_loss': 3.1674944258838718, 'temp': 0.6269804373641924, 'alpha_loss': -36.010790797327296, 'alpha': 2.0708737848811065, 'critic_loss': 855.5323435171491, 'actor_loss': -5.396602663690644, 'time_step': 0.05292626681355383, 'td_error': 1.2390924524709164, 'init_value': 4.328792572021484, 'ave_value': 4.339595226995071} step=6920
2022-04-22 01:28.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:29.02 [info     ] CQL_20220422012159: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.00029743268999750215, 'time_algorithm_update': 0.052919678605360794, 'temp_loss': 3.09282280737265, 'temp': 0.6124467320869423, 'alpha_loss': -37.41618118947641, 'alpha': 2.1514445184972244, 'critic_loss': 967.6980874827832, 'actor_loss': -5.6913148474831114, 'time_step': 0.05330461160295961, 'td_error': 1.2406210756055958, 'init_value': 4.6157732009887695, 'ave_value': 4.623373777236686} step=7266
2022-04-22 01:29.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:29.21 [info     ] CQL_20220422012159: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.00030507724409158517, 'time_algorithm_update': 0.053249112443427817, 'temp_loss': 3.0217058334736464, 'temp': 0.598252484736415, 'alpha_loss': -38.86513425573448, 'alpha': 2.235157025342732, 'critic_loss': 1095.4275487999007, 'actor_loss': -5.90594413376957, 'time_step': 0.053634689033375996, 'td_error': 1.241034728751567, 'init_value': 4.806446552276611, 'ave_value': 4.817520193988082} step=7612
2022-04-22 01:29.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:29.40 [info     ] CQL_20220422012159: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.00028976470748813164, 'time_algorithm_update': 0.05286302594091162, 'temp_loss': 2.95053244326156, 'temp': 0.5843906984852918, 'alpha_loss': -40.37943921612867, 'alpha': 2.3221297491492563, 'critic_loss': 1227.911749161737, 'actor_loss': -6.09903619055114, 'time_step': 0.05323512430135914, 'td_error': 1.2419096565723522, 'init_value': 5.019509792327881, 'ave_value': 5.031354769873585} step=7958
2022-04-22 01:29.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:29.59 [info     ] CQL_20220422012159: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.0002959132883590081, 'time_algorithm_update': 0.053076107377950855, 'temp_loss': 2.883390098637928, 'temp': 0.57085008807265, 'alpha_loss': -41.95237860927692, 'alpha': 2.4125022330036052, 'critic_loss': 1372.5200350546424, 'actor_loss': -6.298398541577289, 'time_step': 0.05345354948429703, 'td_error': 1.243009998967986, 'init_value': 5.211798667907715, 'ave_value': 5.220400385723232} step=8304
2022-04-22 01:30.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:30.19 [info     ] CQL_20220422012159: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.0002942829462834176, 'time_algorithm_update': 0.052913299874763266, 'temp_loss': 2.816856407705759, 'temp': 0.5576198312933045, 'alpha_loss': -43.58420593614523, 'alpha': 2.506388585691507, 'critic_loss': 1538.4247351850388, 'actor_loss': -6.420008707597765, 'time_step': 0.05328790163029136, 'td_error': 1.2438642335931658, 'init_value': 5.339478969573975, 'ave_value': 5.347396502148885} step=8650
2022-04-22 01:30.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:30.38 [info     ] CQL_20220422012159: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.00029971213699076215, 'time_algorithm_update': 0.053140003557150074, 'temp_loss': 2.750793966943818, 'temp': 0.5447001896497142, 'alpha_loss': -45.27773182378339, 'alpha': 2.6039362601462126, 'critic_loss': 1719.0168270044933, 'actor_loss': -6.479672185258369, 'time_step': 0.05352043140830332, 'td_error': 1.244846994863998, 'init_value': 5.440097808837891, 'ave_value': 5.445958262860012} step=8996
2022-04-22 01:30.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:30.58 [info     ] CQL_20220422012159: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.00030090698617042146, 'time_algorithm_update': 0.0550443409495271, 'temp_loss': 2.6875147674814124, 'temp': 0.5320787906991264, 'alpha_loss': -47.04458775823516, 'alpha': 2.7052851481244744, 'critic_loss': 1908.6893222345782, 'actor_loss': -6.5144591028290675, 'time_step': 0.05542627855532431, 'td_error': 1.2446464187815454, 'init_value': 5.454855918884277, 'ave_value': 5.464547502988885} step=9342
2022-04-22 01:30.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:31.19 [info     ] CQL_20220422012159: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.00030989936321456995, 'time_algorithm_update': 0.05603843410580144, 'temp_loss': 2.625600364855948, 'temp': 0.5197508540112159, 'alpha_loss': -48.87097403906673, 'alpha': 2.810596884330573, 'critic_loss': 2118.731822107569, 'actor_loss': -6.52694238403629, 'time_step': 0.05642859990886181, 'td_error': 1.2445579782385219, 'init_value': 5.434466361999512, 'ave_value': 5.442680026389457} step=9688
2022-04-22 01:31.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:31.39 [info     ] CQL_20220422012159: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.0003019495506507124, 'time_algorithm_update': 0.05590450970423704, 'temp_loss': 2.5640811720335415, 'temp': 0.5077085033317522, 'alpha_loss': -50.77487184822215, 'alpha': 2.9200011732950375, 'critic_loss': 2336.2314227330203, 'actor_loss': -6.471957724907495, 'time_step': 0.05628843045648123, 'td_error': 1.2452035895637772, 'init_value': 5.458956241607666, 'ave_value': 5.463503626321039} step=10034
2022-04-22 01:31.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:31.59 [info     ] CQL_20220422012159: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.0003007326511978414, 'time_algorithm_update': 0.05568903374534122, 'temp_loss': 2.5049218511305793, 'temp': 0.4959462147227601, 'alpha_loss': -52.75444154243249, 'alpha': 3.0336807876653067, 'critic_loss': 2571.997133111678, 'actor_loss': -6.413969963272183, 'time_step': 0.05607085351999096, 'td_error': 1.2449181646045753, 'init_value': 5.425718307495117, 'ave_value': 5.431504141046516} step=10380
2022-04-22 01:31.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:32.19 [info     ] CQL_20220422012159: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.0002999987905425143, 'time_algorithm_update': 0.05611456405220693, 'temp_loss': 2.4469081211641344, 'temp': 0.4844556683405286, 'alpha_loss': -54.81074426904579, 'alpha': 3.1517865988560496, 'critic_loss': 2815.1397147647217, 'actor_loss': -6.4158972301924155, 'time_step': 0.05649407268259567, 'td_error': 1.2449517963688894, 'init_value': 5.438892364501953, 'ave_value': 5.445376051517955} step=10726
2022-04-22 01:32.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:32.40 [info     ] CQL_20220422012159: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.0002983291714177655, 'time_algorithm_update': 0.055720721366088515, 'temp_loss': 2.3901020295358117, 'temp': 0.4732319494720139, 'alpha_loss': -56.941802449309066, 'alpha': 3.2744907430141645, 'critic_loss': 3070.093500214505, 'actor_loss': -6.422004597724517, 'time_step': 0.05610338731997275, 'td_error': 1.2457246472401466, 'init_value': 5.526882648468018, 'ave_value': 5.532013692896532} step=11072
2022-04-22 01:32.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:32.59 [info     ] CQL_20220422012159: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.00030027028453143346, 'time_algorithm_update': 0.05221973402651748, 'temp_loss': 2.334662311338965, 'temp': 0.4622685383854574, 'alpha_loss': -59.1558535801882, 'alpha': 3.4019613507166073, 'critic_loss': 3284.919208504561, 'actor_loss': -6.461895350086896, 'time_step': 0.05260112382083959, 'td_error': 1.2461853208477072, 'init_value': 5.573523998260498, 'ave_value': 5.576275546316741} step=11418
2022-04-22 01:32.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:33.18 [info     ] CQL_20220422012159: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.00029296130803279106, 'time_algorithm_update': 0.052758918332226706, 'temp_loss': 2.2810834343033717, 'temp': 0.4515581295497156, 'alpha_loss': -61.451762877447756, 'alpha': 3.5343782130004353, 'critic_loss': 3560.594729384935, 'actor_loss': -6.415100176210348, 'time_step': 0.053136967510157236, 'td_error': 1.246158321385727, 'init_value': 5.543939113616943, 'ave_value': 5.545755420447866} step=11764
2022-04-22 01:33.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:33.37 [info     ] CQL_20220422012159: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.0002953392921844659, 'time_algorithm_update': 0.05385617438079305, 'temp_loss': 2.22789748555663, 'temp': 0.44109616579347954, 'alpha_loss': -63.85412620809037, 'alpha': 3.6719664293906593, 'critic_loss': 3819.715199376806, 'actor_loss': -6.414715888183241, 'time_step': 0.05423252086418902, 'td_error': 1.245649237999862, 'init_value': 5.510608673095703, 'ave_value': 5.5158925977573965} step=12110
2022-04-22 01:33.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:33.57 [info     ] CQL_20220422012159: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.0002961248331676329, 'time_algorithm_update': 0.0537700728873986, 'temp_loss': 2.1762016605090544, 'temp': 0.4308779019151809, 'alpha_loss': -66.33531398442439, 'alpha': 3.814915038257665, 'critic_loss': 4122.767029161399, 'actor_loss': -6.339794310531175, 'time_step': 0.054150014254399116, 'td_error': 1.245489953226577, 'init_value': 5.469882965087891, 'ave_value': 5.47493708625457} step=12456
2022-04-22 01:33.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:34.17 [info     ] CQL_20220422012159: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.0002997307419087845, 'time_algorithm_update': 0.053547090188616274, 'temp_loss': 2.126155370921758, 'temp': 0.42089546743155903, 'alpha_loss': -68.9190049143885, 'alpha': 3.9634283477860377, 'critic_loss': 4426.53511673591, 'actor_loss': -6.245510461013441, 'time_step': 0.05393612040260624, 'td_error': 1.2456862287888932, 'init_value': 5.441305637359619, 'ave_value': 5.442972695380944} step=12802
2022-04-22 01:34.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:34.36 [info     ] CQL_20220422012159: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.00029651415830402706, 'time_algorithm_update': 0.053751182004895516, 'temp_loss': 2.0768828516061597, 'temp': 0.41114306501570463, 'alpha_loss': -71.6019502761047, 'alpha': 4.1177168581527095, 'critic_loss': 4717.809161059429, 'actor_loss': -6.151498432104298, 'time_step': 0.05413419249429868, 'td_error': 1.2440430478834306, 'init_value': 5.2297186851501465, 'ave_value': 5.2365754526788555} step=13148
2022-04-22 01:34.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:34.56 [info     ] CQL_20220422012159: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.00029369310147500454, 'time_algorithm_update': 0.05375340288085056, 'temp_loss': 2.028509188249621, 'temp': 0.4016183279670043, 'alpha_loss': -74.3912324629767, 'alpha': 4.278023051388691, 'critic_loss': 5042.652720545068, 'actor_loss': -6.012708515101085, 'time_step': 0.05412933109812654, 'td_error': 1.2437446224199111, 'init_value': 5.114199161529541, 'ave_value': 5.1187915792053715} step=13494
2022-04-22 01:34.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:35.15 [info     ] CQL_20220422012159: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.0002959532544792043, 'time_algorithm_update': 0.053673580202753146, 'temp_loss': 1.9809731334620129, 'temp': 0.39231580917890363, 'alpha_loss': -77.28582157289361, 'alpha': 4.444559660950148, 'critic_loss': 5444.993858381503, 'actor_loss': -5.81725946740608, 'time_step': 0.054053712442431144, 'td_error': 1.2437630145169731, 'init_value': 5.020425319671631, 'ave_value': 5.0213378803150075} step=13840
2022-04-22 01:35.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:35.35 [info     ] CQL_20220422012159: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.00028957934737894574, 'time_algorithm_update': 0.053548603388615425, 'temp_loss': 1.9351476045013163, 'temp': 0.3832286352539338, 'alpha_loss': -80.29882340624154, 'alpha': 4.617592428460975, 'critic_loss': 5569.019709063403, 'actor_loss': -5.7650251388549805, 'time_step': 0.05391939458130412, 'td_error': 1.2449178745947325, 'init_value': 5.146718502044678, 'ave_value': 5.142960966498211} step=14186
2022-04-22 01:35.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:35.54 [info     ] CQL_20220422012159: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.0002960621277031871, 'time_algorithm_update': 0.05364578100987252, 'temp_loss': 1.8908264271785757, 'temp': 0.37434984528260423, 'alpha_loss': -83.42123615948451, 'alpha': 4.797343886656568, 'critic_loss': 4884.071241081106, 'actor_loss': -6.0685543807255735, 'time_step': 0.05402759458288292, 'td_error': 1.2463781310758473, 'init_value': 5.403268337249756, 'ave_value': 5.39920390804213} step=14532
2022-04-22 01:35.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:36.13 [info     ] CQL_20220422012159: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.0003003750233291891, 'time_algorithm_update': 0.05334041435594503, 'temp_loss': 1.8467198510390486, 'temp': 0.365676559378646, 'alpha_loss': -86.6650775843273, 'alpha': 4.984098105072286, 'critic_loss': 4259.731684513864, 'actor_loss': -6.270876650176296, 'time_step': 0.05372696804862491, 'td_error': 1.2477770636787955, 'init_value': 5.641123294830322, 'ave_value': 5.636086564043677} step=14878
2022-04-22 01:36.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:36.33 [info     ] CQL_20220422012159: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.00029636394081777233, 'time_algorithm_update': 0.05338891806630041, 'temp_loss': 1.8040911000588036, 'temp': 0.357206193611801, 'alpha_loss': -90.03247244509659, 'alpha': 5.178112884477384, 'critic_loss': 3762.8341960576226, 'actor_loss': -6.470113955481204, 'time_step': 0.05376946305953009, 'td_error': 1.2488277876109224, 'init_value': 5.850058078765869, 'ave_value': 5.845443069002197} step=15224
2022-04-22 01:36.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:36.52 [info     ] CQL_20220422012159: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.00029287861950824716, 'time_algorithm_update': 0.05328343162646872, 'temp_loss': 1.761971742776088, 'temp': 0.3489320872491495, 'alpha_loss': -93.53395691358975, 'alpha': 5.37967597129028, 'critic_loss': 3339.4986155956467, 'actor_loss': -6.716655596143249, 'time_step': 0.053660601549754945, 'td_error': 1.2509872168752179, 'init_value': 6.151876449584961, 'ave_value': 6.145918976814049} step=15570
2022-04-22 01:36.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:37.11 [info     ] CQL_20220422012159: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.0003085343134885578, 'time_algorithm_update': 0.05337156243406968, 'temp_loss': 1.7215614994137274, 'temp': 0.34084907380831725, 'alpha_loss': -97.19089609487898, 'alpha': 5.589089990351241, 'critic_loss': 3019.066258072164, 'actor_loss': -6.936466623592928, 'time_step': 0.053764445933303394, 'td_error': 1.2518722747982411, 'init_value': 6.339350700378418, 'ave_value': 6.336515296946888} step=15916
2022-04-22 01:37.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:37.31 [info     ] CQL_20220422012159: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.0002938846632235312, 'time_algorithm_update': 0.054102239581201805, 'temp_loss': 1.6816779230371377, 'temp': 0.33295133365372015, 'alpha_loss': -100.9609863193049, 'alpha': 5.8066627303988945, 'critic_loss': 2678.0600261357476, 'actor_loss': -7.209643453531871, 'time_step': 0.05448171031268346, 'td_error': 1.254791287225203, 'init_value': 6.696854114532471, 'ave_value': 6.69223268245735} step=16262
2022-04-22 01:37.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:37.51 [info     ] CQL_20220422012159: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.0002949313621300493, 'time_algorithm_update': 0.054275389351596724, 'temp_loss': 1.6433035596946761, 'temp': 0.3252365834623403, 'alpha_loss': -104.90360420976761, 'alpha': 6.032705749390442, 'critic_loss': 2397.303341198519, 'actor_loss': -7.516199692136291, 'time_step': 0.05465291690275159, 'td_error': 1.2573347044213363, 'init_value': 7.020208358764648, 'ave_value': 7.016249139504502} step=16608
2022-04-22 01:37.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:38.11 [info     ] CQL_20220422012159: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.00029056816431828316, 'time_algorithm_update': 0.0544001525537127, 'temp_loss': 1.6042070740220176, 'temp': 0.31770113635958963, 'alpha_loss': -108.98774053320031, 'alpha': 6.2675607507628515, 'critic_loss': 2144.946203683842, 'actor_loss': -7.813156585472857, 'time_step': 0.05477440701743771, 'td_error': 1.259961337799583, 'init_value': 7.323796272277832, 'ave_value': 7.320308976062614} step=16954
2022-04-22 01:38.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:38.30 [info     ] CQL_20220422012159: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.00029541095557240394, 'time_algorithm_update': 0.054181524783889684, 'temp_loss': 1.5675373904277823, 'temp': 0.3103422216769588, 'alpha_loss': -113.23143816821148, 'alpha': 6.511552599813208, 'critic_loss': 2009.304780640354, 'actor_loss': -8.035140752792358, 'time_step': 0.054563090291326444, 'td_error': 1.2610613978024816, 'init_value': 7.5062055587768555, 'ave_value': 7.507249036000316} step=17300
2022-04-22 01:38.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422012159/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.51

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 01:38.32 [info     ] FQE_20220422013831: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00014501163758427263, 'time_algorithm_update': 0.008475603827510971, 'loss': 0.00692556696770017, 'time_step': 0.008690617170678564, 'init_value': 0.09591488540172577, 'ave_value': 0.13244478430615755, 'soft_opc': nan} step=166




2022-04-22 01:38.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:38.34 [info     ] FQE_20220422013831: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00012598554772066782, 'time_algorithm_update': 0.00818878484059529, 'loss': 0.0041432573154276935, 'time_step': 0.008370215634265578, 'init_value': 0.025144478306174278, 'ave_value': 0.10092895459383726, 'soft_opc': nan} step=332




2022-04-22 01:38.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:38.35 [info     ] FQE_20220422013831: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00012853777552225502, 'time_algorithm_update': 0.008239832269140037, 'loss': 0.0034643897025127815, 'time_step': 0.008423937372414463, 'init_value': -0.023945990949869156, 'ave_value': 0.07221304665546159, 'soft_opc': nan} step=498




2022-04-22 01:38.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:38.37 [info     ] FQE_20220422013831: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00012649398252188442, 'time_algorithm_update': 0.00819079847220915, 'loss': 0.003116575202971966, 'time_step': 0.0083695635738143, 'init_value': -0.12750738859176636, 'ave_value': -0.0003935342092436176, 'soft_opc': nan} step=664




2022-04-22 01:38.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:38.38 [info     ] FQE_20220422013831: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00012739451534776803, 'time_algorithm_update': 0.008246271007032279, 'loss': 0.0028344414173064373, 'time_step': 0.008431938757379371, 'init_value': -0.20756492018699646, 'ave_value': -0.044503917905023775, 'soft_opc': nan} step=830




2022-04-22 01:38.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:38.40 [info     ] FQE_20220422013831: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00013612120984548545, 'time_algorithm_update': 0.008249581578266189, 'loss': 0.002325948697363352, 'time_step': 0.008443589670112333, 'init_value': -0.2473774403333664, 'ave_value': -0.07009912362775288, 'soft_opc': nan} step=996




2022-04-22 01:38.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:38.41 [info     ] FQE_20220422013831: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00014871717935585114, 'time_algorithm_update': 0.00832268272537783, 'loss': 0.0021923931437566966, 'time_step': 0.008536795535719538, 'init_value': -0.3183358609676361, 'ave_value': -0.10265615416338315, 'soft_opc': nan} step=1162




2022-04-22 01:38.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:38.43 [info     ] FQE_20220422013831: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001464636929063912, 'time_algorithm_update': 0.008187136018132589, 'loss': 0.0019785495603271953, 'time_step': 0.008393942591655686, 'init_value': -0.38170403242111206, 'ave_value': -0.13652212755536441, 'soft_opc': nan} step=1328




2022-04-22 01:38.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:38.44 [info     ] FQE_20220422013831: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.0001460385609822101, 'time_algorithm_update': 0.008424010621495994, 'loss': 0.0017482154032759682, 'time_step': 0.00863408036978848, 'init_value': -0.43901118636131287, 'ave_value': -0.1785182038515068, 'soft_opc': nan} step=1494




2022-04-22 01:38.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:38.46 [info     ] FQE_20220422013831: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00014743316604430416, 'time_algorithm_update': 0.008501525384834013, 'loss': 0.0019459461397771914, 'time_step': 0.008719108190881201, 'init_value': -0.5217738151550293, 'ave_value': -0.22846674383575985, 'soft_opc': nan} step=1660




2022-04-22 01:38.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:38.47 [info     ] FQE_20220422013831: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00014566800680505224, 'time_algorithm_update': 0.00841912016811141, 'loss': 0.0018940034493557122, 'time_step': 0.008632213236337685, 'init_value': -0.5908602476119995, 'ave_value': -0.26646464748962506, 'soft_opc': nan} step=1826




2022-04-22 01:38.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:38.49 [info     ] FQE_20220422013831: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00014495131481124694, 'time_algorithm_update': 0.007939551250044122, 'loss': 0.0020634683998475545, 'time_step': 0.00815621916070042, 'init_value': -0.6665077209472656, 'ave_value': -0.31800297011394757, 'soft_opc': nan} step=1992




2022-04-22 01:38.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:38.50 [info     ] FQE_20220422013831: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00014530463391039744, 'time_algorithm_update': 0.008354745715497488, 'loss': 0.002230626044595749, 'time_step': 0.008564747959734446, 'init_value': -0.7344411611557007, 'ave_value': -0.349561995037974, 'soft_opc': nan} step=2158




2022-04-22 01:38.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:38.52 [info     ] FQE_20220422013831: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00014352798461914062, 'time_algorithm_update': 0.0083632268101336, 'loss': 0.0023886129508673, 'time_step': 0.008571978074958525, 'init_value': -0.8193845152854919, 'ave_value': -0.4108249187805094, 'soft_opc': nan} step=2324




2022-04-22 01:38.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:38.54 [info     ] FQE_20220422013831: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.0001487358506903591, 'time_algorithm_update': 0.008447164512542358, 'loss': 0.0026455233222454316, 'time_step': 0.008659802287457937, 'init_value': -0.8844239711761475, 'ave_value': -0.45191582222007687, 'soft_opc': nan} step=2490




2022-04-22 01:38.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:38.55 [info     ] FQE_20220422013831: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.0001478238278124706, 'time_algorithm_update': 0.007992441395679152, 'loss': 0.0028903133958909123, 'time_step': 0.00820662314633289, 'init_value': -0.9449728727340698, 'ave_value': -0.48230154732460373, 'soft_opc': nan} step=2656




2022-04-22 01:38.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:38.57 [info     ] FQE_20220422013831: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00014618505914527248, 'time_algorithm_update': 0.00836810864597918, 'loss': 0.003085460718082971, 'time_step': 0.008580924516700837, 'init_value': -1.0465474128723145, 'ave_value': -0.574010785799977, 'soft_opc': nan} step=2822




2022-04-22 01:38.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:38.58 [info     ] FQE_20220422013831: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00014710139079266284, 'time_algorithm_update': 0.008445489837462643, 'loss': 0.003422765361414437, 'time_step': 0.00865786046866911, 'init_value': -1.0553975105285645, 'ave_value': -0.555102938010886, 'soft_opc': nan} step=2988




2022-04-22 01:38.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.00 [info     ] FQE_20220422013831: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.0001447760915181723, 'time_algorithm_update': 0.008413277476666921, 'loss': 0.0037635248050960452, 'time_step': 0.008624682943504977, 'init_value': -1.1133817434310913, 'ave_value': -0.5837664560708511, 'soft_opc': nan} step=3154




2022-04-22 01:39.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.01 [info     ] FQE_20220422013831: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00014956457069121212, 'time_algorithm_update': 0.008502322507191854, 'loss': 0.003961676720484912, 'time_step': 0.008718812322042075, 'init_value': -1.1801079511642456, 'ave_value': -0.6190515318298125, 'soft_opc': nan} step=3320




2022-04-22 01:39.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.03 [info     ] FQE_20220422013831: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00014802633997905687, 'time_algorithm_update': 0.008359557174774537, 'loss': 0.004504861806292109, 'time_step': 0.00857407788196242, 'init_value': -1.1807249784469604, 'ave_value': -0.598648595939147, 'soft_opc': nan} step=3486




2022-04-22 01:39.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.04 [info     ] FQE_20220422013831: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.000146294214639319, 'time_algorithm_update': 0.008562188550650355, 'loss': 0.00471125453439154, 'time_step': 0.008776689150247229, 'init_value': -1.3075454235076904, 'ave_value': -0.6846882025384017, 'soft_opc': nan} step=3652




2022-04-22 01:39.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.06 [info     ] FQE_20220422013831: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.0001463703362338514, 'time_algorithm_update': 0.00842160776437047, 'loss': 0.005181943565917045, 'time_step': 0.008634202451591032, 'init_value': -1.3779155015945435, 'ave_value': -0.7213803182012057, 'soft_opc': nan} step=3818




2022-04-22 01:39.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.07 [info     ] FQE_20220422013831: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.000148066555161074, 'time_algorithm_update': 0.007999214781336037, 'loss': 0.005565630333569653, 'time_step': 0.008210880210600704, 'init_value': -1.4104033708572388, 'ave_value': -0.7472598633435261, 'soft_opc': nan} step=3984




2022-04-22 01:39.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.09 [info     ] FQE_20220422013831: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00014427196548645756, 'time_algorithm_update': 0.0084444485514997, 'loss': 0.005565143172265905, 'time_step': 0.008653168218681612, 'init_value': -1.5286028385162354, 'ave_value': -0.8243764052066851, 'soft_opc': nan} step=4150




2022-04-22 01:39.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.11 [info     ] FQE_20220422013831: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00014979437173131, 'time_algorithm_update': 0.00848731362676046, 'loss': 0.006175665090881647, 'time_step': 0.008701725178454295, 'init_value': -1.580078125, 'ave_value': -0.8473372653143497, 'soft_opc': nan} step=4316




2022-04-22 01:39.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.12 [info     ] FQE_20220422013831: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00014486801193421147, 'time_algorithm_update': 0.008447369897221944, 'loss': 0.00675789912175864, 'time_step': 0.008656151323433382, 'init_value': -1.6387667655944824, 'ave_value': -0.8713369757524347, 'soft_opc': nan} step=4482




2022-04-22 01:39.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.14 [info     ] FQE_20220422013831: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.000144836414291198, 'time_algorithm_update': 0.008412869579820749, 'loss': 0.0073181453431992935, 'time_step': 0.008627128888325519, 'init_value': -1.6787464618682861, 'ave_value': -0.9004299997914214, 'soft_opc': nan} step=4648




2022-04-22 01:39.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.15 [info     ] FQE_20220422013831: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00015123062823192183, 'time_algorithm_update': 0.00853297365717141, 'loss': 0.007606139499659044, 'time_step': 0.008750486086650067, 'init_value': -1.7638661861419678, 'ave_value': -0.9498038133396557, 'soft_opc': nan} step=4814




2022-04-22 01:39.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.17 [info     ] FQE_20220422013831: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00015077389866472726, 'time_algorithm_update': 0.008429537336510348, 'loss': 0.00806750399036002, 'time_step': 0.008649316178746971, 'init_value': -1.8448669910430908, 'ave_value': -1.0120232397447098, 'soft_opc': nan} step=4980




2022-04-22 01:39.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.18 [info     ] FQE_20220422013831: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.0001481541668076113, 'time_algorithm_update': 0.008524084665689123, 'loss': 0.008563006216104715, 'time_step': 0.008735490132527179, 'init_value': -1.876076102256775, 'ave_value': -1.0051403816940414, 'soft_opc': nan} step=5146




2022-04-22 01:39.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.20 [info     ] FQE_20220422013831: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.0001463818262858563, 'time_algorithm_update': 0.00817044384508248, 'loss': 0.00891983426573647, 'time_step': 0.00838148737528238, 'init_value': -1.8787058591842651, 'ave_value': -1.0093837184797938, 'soft_opc': nan} step=5312




2022-04-22 01:39.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.21 [info     ] FQE_20220422013831: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.0001502080136034862, 'time_algorithm_update': 0.008449337568627783, 'loss': 0.00938315571289992, 'time_step': 0.008670376007815441, 'init_value': -1.925955057144165, 'ave_value': -1.0460145324015537, 'soft_opc': nan} step=5478




2022-04-22 01:39.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.23 [info     ] FQE_20220422013831: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.0001463918800813606, 'time_algorithm_update': 0.008472395230488605, 'loss': 0.009642868835047022, 'time_step': 0.008683591003877571, 'init_value': -1.961155652999878, 'ave_value': -1.038387237899509, 'soft_opc': nan} step=5644




2022-04-22 01:39.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.25 [info     ] FQE_20220422013831: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00014480338039168393, 'time_algorithm_update': 0.008509271116141814, 'loss': 0.00998639678849042, 'time_step': 0.008718266544571841, 'init_value': -2.046703815460205, 'ave_value': -1.1063176993826018, 'soft_opc': nan} step=5810




2022-04-22 01:39.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.26 [info     ] FQE_20220422013831: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00014985182199133448, 'time_algorithm_update': 0.00856907683682729, 'loss': 0.01028703489808607, 'time_step': 0.008782530405435217, 'init_value': -2.047961950302124, 'ave_value': -1.086415751608139, 'soft_opc': nan} step=5976




2022-04-22 01:39.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.28 [info     ] FQE_20220422013831: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.0001439861504428358, 'time_algorithm_update': 0.008364137396754989, 'loss': 0.010764527715095992, 'time_step': 0.008572676095617822, 'init_value': -2.0856215953826904, 'ave_value': -1.1049061416556034, 'soft_opc': nan} step=6142




2022-04-22 01:39.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.29 [info     ] FQE_20220422013831: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00014868127294333586, 'time_algorithm_update': 0.008403966225773454, 'loss': 0.010791063657435812, 'time_step': 0.008616085512092314, 'init_value': -2.145246982574463, 'ave_value': -1.1605074966309574, 'soft_opc': nan} step=6308




2022-04-22 01:39.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.31 [info     ] FQE_20220422013831: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00014978719044880695, 'time_algorithm_update': 0.008405584886849645, 'loss': 0.011700738835041916, 'time_step': 0.008621651006032186, 'init_value': -2.209158182144165, 'ave_value': -1.211075656810725, 'soft_opc': nan} step=6474




2022-04-22 01:39.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.32 [info     ] FQE_20220422013831: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00015320404466376248, 'time_algorithm_update': 0.008485548467521208, 'loss': 0.011947421696989416, 'time_step': 0.008705933410001088, 'init_value': -2.2986178398132324, 'ave_value': -1.3027989383476475, 'soft_opc': nan} step=6640




2022-04-22 01:39.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.34 [info     ] FQE_20220422013831: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.0001470353229936347, 'time_algorithm_update': 0.008181179862424552, 'loss': 0.012283441667790043, 'time_step': 0.008390610476574266, 'init_value': -2.334449291229248, 'ave_value': -1.3232409457751626, 'soft_opc': nan} step=6806




2022-04-22 01:39.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.35 [info     ] FQE_20220422013831: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00015245144625744187, 'time_algorithm_update': 0.00838888266000403, 'loss': 0.01211211191215384, 'time_step': 0.008606791496276855, 'init_value': -2.326693534851074, 'ave_value': -1.2768333658341084, 'soft_opc': nan} step=6972




2022-04-22 01:39.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.37 [info     ] FQE_20220422013831: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00015451534684882107, 'time_algorithm_update': 0.008492662245968738, 'loss': 0.01278562827371706, 'time_step': 0.008716912154691765, 'init_value': -2.447873115539551, 'ave_value': -1.3915009306290664, 'soft_opc': nan} step=7138




2022-04-22 01:39.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.38 [info     ] FQE_20220422013831: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00015127946095294263, 'time_algorithm_update': 0.008459273591099015, 'loss': 0.013133472948495007, 'time_step': 0.008676671120057622, 'init_value': -2.537692070007324, 'ave_value': -1.4673459527644892, 'soft_opc': nan} step=7304




2022-04-22 01:39.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.40 [info     ] FQE_20220422013831: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.0001488564962364105, 'time_algorithm_update': 0.008618363414902285, 'loss': 0.013564533935873825, 'time_step': 0.008831399032868534, 'init_value': -2.578464984893799, 'ave_value': -1.4868370716166388, 'soft_opc': nan} step=7470




2022-04-22 01:39.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.42 [info     ] FQE_20220422013831: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00014573407460408038, 'time_algorithm_update': 0.008446169186787433, 'loss': 0.014052277049211033, 'time_step': 0.008654851511300328, 'init_value': -2.521155834197998, 'ave_value': -1.412638665602316, 'soft_opc': nan} step=7636




2022-04-22 01:39.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.43 [info     ] FQE_20220422013831: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00015167586774711148, 'time_algorithm_update': 0.008564225162368223, 'loss': 0.014551923325894042, 'time_step': 0.00878380436495126, 'init_value': -2.656970739364624, 'ave_value': -1.521762044288151, 'soft_opc': nan} step=7802




2022-04-22 01:39.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.45 [info     ] FQE_20220422013831: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00014777212257844857, 'time_algorithm_update': 0.008546418454273638, 'loss': 0.015147510767319655, 'time_step': 0.008761588349399796, 'init_value': -2.644160747528076, 'ave_value': -1.4992168405537938, 'soft_opc': nan} step=7968




2022-04-22 01:39.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.46 [info     ] FQE_20220422013831: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.000146050051034215, 'time_algorithm_update': 0.007910658078021314, 'loss': 0.01533014316592893, 'time_step': 0.008122337869850987, 'init_value': -2.6906614303588867, 'ave_value': -1.5561755769105778, 'soft_opc': nan} step=8134




2022-04-22 01:39.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:39.48 [info     ] FQE_20220422013831: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00014880909977189028, 'time_algorithm_update': 0.008472304746329066, 'loss': 0.01564129707417507, 'time_step': 0.008685639105647444, 'init_value': -2.7195355892181396, 'ave_value': -1.5609545623229162, 'soft_opc': nan} step=8300




2022-04-22 01:39.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013831/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

[ 0.00000000e+00  7.95731469e+08  4.27108923e-02  1.24000047e-02
  1.42999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.34732792e-01  6.00000000e-01  3.37421461e-01]
Read chunk # 39 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.38489108e-01  4.94000047e-02
 -1.56000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -7.49080829e-02  7.04145269e-02]
Read chunk # 40 out of 4999
torch.Size([44400, 6])
2022-04-22 01:39.48 [debug    ] RoundIterator is selected.
2022-04-22 01:39.48 [info     ] Directory is created at d3rlpy_logs/FQE_20220422013948
2022-04-22 01:39.48 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 01:39.48 [debug    ] Building models...
2022-04-22 01:39.48 [debug    ] Models have been built.
2022-04-22 01:39.48 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422013948/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size':

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 01:39.51 [info     ] FQE_20220422013948: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00014821110769759778, 'time_algorithm_update': 0.0076232656489971075, 'loss': 0.022669343943815937, 'time_step': 0.007838353168132693, 'init_value': -0.7163808345794678, 'ave_value': -0.6844900227344788, 'soft_opc': nan} step=344




2022-04-22 01:39.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:39.54 [info     ] FQE_20220422013948: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00014893606651661008, 'time_algorithm_update': 0.008460204268610754, 'loss': 0.019979990225523537, 'time_step': 0.008674936932186747, 'init_value': -1.4490313529968262, 'ave_value': -1.420397353622022, 'soft_opc': nan} step=688




2022-04-22 01:39.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:39.57 [info     ] FQE_20220422013948: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00015113935914150503, 'time_algorithm_update': 0.008475855339405149, 'loss': 0.021579249729510655, 'time_step': 0.008694438047187274, 'init_value': -2.265648365020752, 'ave_value': -2.278295774731013, 'soft_opc': nan} step=1032




2022-04-22 01:39.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:40.01 [info     ] FQE_20220422013948: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00015233630357786666, 'time_algorithm_update': 0.008336287598277247, 'loss': 0.023018705769615292, 'time_step': 0.00855344949766647, 'init_value': -2.7652835845947266, 'ave_value': -2.848906291027864, 'soft_opc': nan} step=1376




2022-04-22 01:40.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:40.04 [info     ] FQE_20220422013948: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00014984191850174304, 'time_algorithm_update': 0.00841612316841303, 'loss': 0.02831390161787398, 'time_step': 0.008632641199023225, 'init_value': -3.466308116912842, 'ave_value': -3.6711988383182534, 'soft_opc': nan} step=1720




2022-04-22 01:40.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:40.07 [info     ] FQE_20220422013948: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00015318948169087255, 'time_algorithm_update': 0.008497642223225084, 'loss': 0.03266149909820321, 'time_step': 0.008718331885892291, 'init_value': -3.820899486541748, 'ave_value': -4.142570795159082, 'soft_opc': nan} step=2064




2022-04-22 01:40.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:40.10 [info     ] FQE_20220422013948: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001536718634671943, 'time_algorithm_update': 0.008501131174176238, 'loss': 0.037636777781339927, 'time_step': 0.00871970625810845, 'init_value': -4.366272926330566, 'ave_value': -4.887270863810638, 'soft_opc': nan} step=2408




2022-04-22 01:40.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:40.13 [info     ] FQE_20220422013948: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.0001530314600744913, 'time_algorithm_update': 0.008333473011504772, 'loss': 0.044337191076158695, 'time_step': 0.008554762186006058, 'init_value': -4.580866813659668, 'ave_value': -5.306703692876004, 'soft_opc': nan} step=2752




2022-04-22 01:40.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:40.17 [info     ] FQE_20220422013948: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00015758359154989553, 'time_algorithm_update': 0.00849502031193223, 'loss': 0.04989878129928784, 'time_step': 0.008721748063730638, 'init_value': -5.023348808288574, 'ave_value': -5.942330230034149, 'soft_opc': nan} step=3096




2022-04-22 01:40.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:40.20 [info     ] FQE_20220422013948: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00015631179476893224, 'time_algorithm_update': 0.00847113756246345, 'loss': 0.057941211655541044, 'time_step': 0.00869558855544689, 'init_value': -5.433359146118164, 'ave_value': -6.632674868794175, 'soft_opc': nan} step=3440




2022-04-22 01:40.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:40.23 [info     ] FQE_20220422013948: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00015047469804453295, 'time_algorithm_update': 0.008468237034110137, 'loss': 0.06506210650848009, 'time_step': 0.008687055388162302, 'init_value': -5.688394069671631, 'ave_value': -7.148415933965563, 'soft_opc': nan} step=3784




2022-04-22 01:40.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:40.26 [info     ] FQE_20220422013948: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00015301967776098916, 'time_algorithm_update': 0.008306179628815762, 'loss': 0.07630133029170941, 'time_step': 0.008526631565981133, 'init_value': -5.973669052124023, 'ave_value': -7.677908994633335, 'soft_opc': nan} step=4128




2022-04-22 01:40.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:40.29 [info     ] FQE_20220422013948: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00015311670857806538, 'time_algorithm_update': 0.008520409811374753, 'loss': 0.08498480361019976, 'time_step': 0.008740881847780805, 'init_value': -6.321857452392578, 'ave_value': -8.265376012537393, 'soft_opc': nan} step=4472




2022-04-22 01:40.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:40.33 [info     ] FQE_20220422013948: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001552472280901532, 'time_algorithm_update': 0.008541447478671407, 'loss': 0.10098289245154796, 'time_step': 0.008768255627432536, 'init_value': -6.673508644104004, 'ave_value': -8.822073542575042, 'soft_opc': nan} step=4816




2022-04-22 01:40.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:40.36 [info     ] FQE_20220422013948: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00015140550081119982, 'time_algorithm_update': 0.008523585491402204, 'loss': 0.1206736145378632, 'time_step': 0.008740009956581647, 'init_value': -7.023674488067627, 'ave_value': -9.3618778741306, 'soft_opc': nan} step=5160




2022-04-22 01:40.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:40.39 [info     ] FQE_20220422013948: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00015893301298451978, 'time_algorithm_update': 0.008374182983886364, 'loss': 0.14322556713920867, 'time_step': 0.008597301189289537, 'init_value': -7.41471004486084, 'ave_value': -9.963741791349005, 'soft_opc': nan} step=5504




2022-04-22 01:40.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:40.42 [info     ] FQE_20220422013948: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00015284848767657612, 'time_algorithm_update': 0.008582730625951013, 'loss': 0.17398200089891636, 'time_step': 0.008803838214208914, 'init_value': -7.593857765197754, 'ave_value': -10.17942484164493, 'soft_opc': nan} step=5848




2022-04-22 01:40.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:40.45 [info     ] FQE_20220422013948: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00015514534573222316, 'time_algorithm_update': 0.008530814287274383, 'loss': 0.1991751462621831, 'time_step': 0.008753660113312477, 'init_value': -8.004988670349121, 'ave_value': -10.706979814849726, 'soft_opc': nan} step=6192




2022-04-22 01:40.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:40.49 [info     ] FQE_20220422013948: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00015177906945694323, 'time_algorithm_update': 0.008491613144098326, 'loss': 0.23036633653371313, 'time_step': 0.008711019920748334, 'init_value': -8.373010635375977, 'ave_value': -11.148284037977557, 'soft_opc': nan} step=6536




2022-04-22 01:40.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:40.52 [info     ] FQE_20220422013948: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00015043380648590798, 'time_algorithm_update': 0.008364897827769434, 'loss': 0.2551929410059698, 'time_step': 0.008581986260968585, 'init_value': -8.939220428466797, 'ave_value': -11.730047344354082, 'soft_opc': nan} step=6880




2022-04-22 01:40.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:40.55 [info     ] FQE_20220422013948: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00015688150428062262, 'time_algorithm_update': 0.008508497892424118, 'loss': 0.2919278697078225, 'time_step': 0.008731830258702123, 'init_value': -9.43120002746582, 'ave_value': -12.291819914618323, 'soft_opc': nan} step=7224




2022-04-22 01:40.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:40.58 [info     ] FQE_20220422013948: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00015384236047434252, 'time_algorithm_update': 0.008515524309734965, 'loss': 0.30835916635818605, 'time_step': 0.008738643901292668, 'init_value': -10.073446273803711, 'ave_value': -12.916684359565235, 'soft_opc': nan} step=7568




2022-04-22 01:40.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:41.01 [info     ] FQE_20220422013948: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.0001530016577521036, 'time_algorithm_update': 0.008394977381063063, 'loss': 0.3428859311090999, 'time_step': 0.0086137687051019, 'init_value': -10.803196907043457, 'ave_value': -13.703598600469302, 'soft_opc': nan} step=7912




2022-04-22 01:41.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:41.05 [info     ] FQE_20220422013948: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00015347918798757154, 'time_algorithm_update': 0.00834853080816047, 'loss': 0.3611617655287562, 'time_step': 0.008572910414185635, 'init_value': -10.91309928894043, 'ave_value': -13.988308933962909, 'soft_opc': nan} step=8256




2022-04-22 01:41.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:41.08 [info     ] FQE_20220422013948: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.0001530155192974002, 'time_algorithm_update': 0.008446506289548652, 'loss': 0.37269806749793855, 'time_step': 0.008666125840918963, 'init_value': -11.039424896240234, 'ave_value': -14.163813985793993, 'soft_opc': nan} step=8600




2022-04-22 01:41.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:41.11 [info     ] FQE_20220422013948: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00015125025150387785, 'time_algorithm_update': 0.008413174817728441, 'loss': 0.3920799505647792, 'time_step': 0.008630631274955218, 'init_value': -11.251155853271484, 'ave_value': -14.499071859390064, 'soft_opc': nan} step=8944




2022-04-22 01:41.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:41.14 [info     ] FQE_20220422013948: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00015365037807198457, 'time_algorithm_update': 0.008513309234796568, 'loss': 0.41676319064572453, 'time_step': 0.00873426365297894, 'init_value': -11.926469802856445, 'ave_value': -15.256162543347953, 'soft_opc': nan} step=9288




2022-04-22 01:41.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:41.17 [info     ] FQE_20220422013948: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00015438226766364518, 'time_algorithm_update': 0.008280031209768252, 'loss': 0.42721300335535994, 'time_step': 0.00850185751914978, 'init_value': -12.39258861541748, 'ave_value': -15.72654987792778, 'soft_opc': nan} step=9632




2022-04-22 01:41.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:41.21 [info     ] FQE_20220422013948: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00015586822531944098, 'time_algorithm_update': 0.008545898420866145, 'loss': 0.4609051125481465, 'time_step': 0.008770101985266043, 'init_value': -12.725165367126465, 'ave_value': -15.884578650524873, 'soft_opc': nan} step=9976




2022-04-22 01:41.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:41.24 [info     ] FQE_20220422013948: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.000154175037561461, 'time_algorithm_update': 0.008560914632885955, 'loss': 0.4799367542497727, 'time_step': 0.008782551039096921, 'init_value': -12.957490921020508, 'ave_value': -16.308567866250065, 'soft_opc': nan} step=10320




2022-04-22 01:41.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:41.27 [info     ] FQE_20220422013948: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00015306680701499762, 'time_algorithm_update': 0.008543524631234102, 'loss': 0.49739671461717333, 'time_step': 0.00876354547434075, 'init_value': -13.428802490234375, 'ave_value': -16.85016785735214, 'soft_opc': nan} step=10664




2022-04-22 01:41.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:41.30 [info     ] FQE_20220422013948: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00015382503354272178, 'time_algorithm_update': 0.008266541846962861, 'loss': 0.5186607482583198, 'time_step': 0.008487706960633744, 'init_value': -14.18197250366211, 'ave_value': -17.53830983849989, 'soft_opc': nan} step=11008




2022-04-22 01:41.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:41.33 [info     ] FQE_20220422013948: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00015431711840075115, 'time_algorithm_update': 0.008500296709149383, 'loss': 0.5347852973256607, 'time_step': 0.008722769659618998, 'init_value': -14.500092506408691, 'ave_value': -17.829639659080286, 'soft_opc': nan} step=11352




2022-04-22 01:41.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:41.37 [info     ] FQE_20220422013948: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00015065420505612395, 'time_algorithm_update': 0.008483667706334314, 'loss': 0.5462796437868112, 'time_step': 0.008699920981429344, 'init_value': -14.822757720947266, 'ave_value': -18.336497267822292, 'soft_opc': nan} step=11696




2022-04-22 01:41.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:41.40 [info     ] FQE_20220422013948: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.0001523765020592268, 'time_algorithm_update': 0.008466517509416092, 'loss': 0.5360605943579834, 'time_step': 0.008687828169312588, 'init_value': -15.437337875366211, 'ave_value': -18.942920096616707, 'soft_opc': nan} step=12040




2022-04-22 01:41.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:41.43 [info     ] FQE_20220422013948: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00015468999396922977, 'time_algorithm_update': 0.008329863465109538, 'loss': 0.5468944651616174, 'time_step': 0.008554290893465974, 'init_value': -16.282169342041016, 'ave_value': -19.706339230833088, 'soft_opc': nan} step=12384




2022-04-22 01:41.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:41.46 [info     ] FQE_20220422013948: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.0001491509204687074, 'time_algorithm_update': 0.008482196996378344, 'loss': 0.5678829932260478, 'time_step': 0.00869832621064297, 'init_value': -16.614213943481445, 'ave_value': -19.923714366235963, 'soft_opc': nan} step=12728




2022-04-22 01:41.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:41.50 [info     ] FQE_20220422013948: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00015320403631343397, 'time_algorithm_update': 0.009459981391596239, 'loss': 0.5722508437628316, 'time_step': 0.009684655555458955, 'init_value': -16.965648651123047, 'ave_value': -20.23391853643598, 'soft_opc': nan} step=13072




2022-04-22 01:41.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:41.53 [info     ] FQE_20220422013948: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.0001572467559991881, 'time_algorithm_update': 0.00911763449047887, 'loss': 0.5675174569776065, 'time_step': 0.009343150743218355, 'init_value': -17.38994789123535, 'ave_value': -20.555431753787975, 'soft_opc': nan} step=13416




2022-04-22 01:41.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:41.57 [info     ] FQE_20220422013948: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.0001547710840092149, 'time_algorithm_update': 0.009182742861814277, 'loss': 0.5649035868700594, 'time_step': 0.009404343228007471, 'init_value': -17.950101852416992, 'ave_value': -21.06352923562171, 'soft_opc': nan} step=13760




2022-04-22 01:41.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:42.00 [info     ] FQE_20220422013948: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00016898471255635106, 'time_algorithm_update': 0.0092852559200553, 'loss': 0.5791485911684631, 'time_step': 0.009523132512735765, 'init_value': -18.05754852294922, 'ave_value': -21.279290467624023, 'soft_opc': nan} step=14104




2022-04-22 01:42.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:42.04 [info     ] FQE_20220422013948: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.0001637395038161167, 'time_algorithm_update': 0.009662853423939195, 'loss': 0.5761596907531236, 'time_step': 0.009896742743115092, 'init_value': -18.176483154296875, 'ave_value': -21.367064937306484, 'soft_opc': nan} step=14448




2022-04-22 01:42.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:42.08 [info     ] FQE_20220422013948: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00015633535939593647, 'time_algorithm_update': 0.009314310412074244, 'loss': 0.5609109653674369, 'time_step': 0.00953778347303701, 'init_value': -18.506668090820312, 'ave_value': -21.682140162999968, 'soft_opc': nan} step=14792




2022-04-22 01:42.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:42.11 [info     ] FQE_20220422013948: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001586876636327699, 'time_algorithm_update': 0.009308277174483898, 'loss': 0.5710504479343504, 'time_step': 0.009534293135931326, 'init_value': -18.725624084472656, 'ave_value': -22.010558394278895, 'soft_opc': nan} step=15136




2022-04-22 01:42.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:42.15 [info     ] FQE_20220422013948: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016080501467682595, 'time_algorithm_update': 0.009483024131420046, 'loss': 0.567548377458331, 'time_step': 0.009713595689729203, 'init_value': -18.731475830078125, 'ave_value': -21.988263078240326, 'soft_opc': nan} step=15480




2022-04-22 01:42.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:42.18 [info     ] FQE_20220422013948: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00015926776930343274, 'time_algorithm_update': 0.00965222300485123, 'loss': 0.580407272531586, 'time_step': 0.009882176338240158, 'init_value': -18.59246063232422, 'ave_value': -21.87907813450506, 'soft_opc': nan} step=15824




2022-04-22 01:42.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:42.22 [info     ] FQE_20220422013948: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00015945975170579066, 'time_algorithm_update': 0.009628768577132114, 'loss': 0.5724956059590155, 'time_step': 0.009855689004410145, 'init_value': -18.604476928710938, 'ave_value': -22.034613002237638, 'soft_opc': nan} step=16168




2022-04-22 01:42.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:42.25 [info     ] FQE_20220422013948: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00015683368194934933, 'time_algorithm_update': 0.009011508420456288, 'loss': 0.5680781846194592, 'time_step': 0.009238658256308978, 'init_value': -18.79009246826172, 'ave_value': -22.335445983455358, 'soft_opc': nan} step=16512




2022-04-22 01:42.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:42.29 [info     ] FQE_20220422013948: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.0001602748105692309, 'time_algorithm_update': 0.009476055932599444, 'loss': 0.5670523334695243, 'time_step': 0.009705253811769708, 'init_value': -18.82476043701172, 'ave_value': -22.49696301255157, 'soft_opc': nan} step=16856




2022-04-22 01:42.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 01:42.32 [info     ] FQE_20220422013948: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.0001585053843121196, 'time_algorithm_update': 0.009250507798305777, 'loss': 0.5878219562907552, 'time_step': 0.009478284176005873, 'init_value': -18.820903778076172, 'ave_value': -22.407395665859802, 'soft_opc': nan} step=17200




2022-04-22 01:42.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422013948/model_17200.pt
search iteration:  11
using hyper params:  [0.0011155108607659333, 0.00020712753152959376, 3.4035755235188266e-05, 1]
2022-04-22 01:42.32 [debug    ] RoundIterator is selected.
2022-04-22 01:42.32 [info     ] Directory is created at d3rlpy_logs/CQL_20220422014232
2022-04-22 01:42.32 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 01:42.32 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 01:42.32 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422014232/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.0011155108607659333, 'actor_optim_factory': {'

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:42.53 [info     ] CQL_20220422014232: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.00029822443262000995, 'time_algorithm_update': 0.05580329481576909, 'temp_loss': 4.708870235895146, 'temp': 0.993805096328603, 'alpha_loss': -17.565479162800518, 'alpha': 1.01773164899363, 'critic_loss': 36.07919190246935, 'actor_loss': -1.685452857803058, 'time_step': 0.056191336212819715, 'td_error': 1.249251611119671, 'init_value': 0.023919183760881424, 'ave_value': 0.04179268842134687} step=346
2022-04-22 01:42.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:43.13 [info     ] CQL_20220422014232: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.0003022175992844422, 'time_algorithm_update': 0.056749216393928305, 'temp_loss': 4.932862008927185, 'temp': 0.9817174185562685, 'alpha_loss': -18.33960168761325, 'alpha': 1.054307604800759, 'critic_loss': 35.22995716161122, 'actor_loss': -1.7894396509738326, 'time_step': 0.05712694791011039, 'td_error': 1.2344843543804116, 'init_value': 0.033933866769075394, 'ave_value': 0.06486007630058435} step=692
2022-04-22 01:43.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:43.34 [info     ] CQL_20220422014232: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.0002997617501054885, 'time_algorithm_update': 0.0570750264074072, 'temp_loss': 4.895326936865128, 'temp': 0.9700529632196261, 'alpha_loss': -19.07087821629695, 'alpha': 1.0927692868806034, 'critic_loss': 35.74670463076906, 'actor_loss': -1.754588693208088, 'time_step': 0.05745180700555702, 'td_error': 1.2562420754043409, 'init_value': 0.07454116642475128, 'ave_value': 0.1566001932693565} step=1038
2022-04-22 01:43.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:43.55 [info     ] CQL_20220422014232: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.00029364831185754324, 'time_algorithm_update': 0.056420520550942835, 'temp_loss': 4.8402362324598895, 'temp': 0.9586965190192868, 'alpha_loss': -19.732869820787727, 'alpha': 1.1330395750916762, 'critic_loss': 35.76077740178632, 'actor_loss': -1.8177203201834178, 'time_step': 0.05679149228024345, 'td_error': 1.2357205044780726, 'init_value': 0.006068024318665266, 'ave_value': 0.18134819315247425} step=1384
2022-04-22 01:43.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:44.15 [info     ] CQL_20220422014232: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.000304297904747759, 'time_algorithm_update': 0.056560825061246836, 'temp_loss': 4.783773270645582, 'temp': 0.9475746738772861, 'alpha_loss': -20.46856832779901, 'alpha': 1.1751862702342126, 'critic_loss': 38.75101749883222, 'actor_loss': -1.66422612232969, 'time_step': 0.05694485468671501, 'td_error': 1.2223609901665664, 'init_value': -0.10691853612661362, 'ave_value': 0.1296626180896139} step=1730
2022-04-22 01:44.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:44.36 [info     ] CQL_20220422014232: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.00029871091677274316, 'time_algorithm_update': 0.05692355687907665, 'temp_loss': 4.73080177114189, 'temp': 0.9366464849152317, 'alpha_loss': -21.21996583001462, 'alpha': 1.2192884118570757, 'critic_loss': 43.100337860901234, 'actor_loss': -1.4432948882869214, 'time_step': 0.057302467395804524, 'td_error': 1.2186063142097006, 'init_value': -0.3134293556213379, 'ave_value': 0.014663259177735881} step=2076
2022-04-22 01:44.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:44.57 [info     ] CQL_20220422014232: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.000303276012398604, 'time_algorithm_update': 0.056952213965399415, 'temp_loss': 4.675467670308373, 'temp': 0.9258922403947466, 'alpha_loss': -22.013534005666745, 'alpha': 1.265366711712986, 'critic_loss': 48.60607434972862, 'actor_loss': -1.17668694119922, 'time_step': 0.0573403401181877, 'td_error': 1.217263385952845, 'init_value': -0.5775882601737976, 'ave_value': -0.1661062709690412} step=2422
2022-04-22 01:44.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:45.17 [info     ] CQL_20220422014232: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.0002944993145893075, 'time_algorithm_update': 0.05683231905016596, 'temp_loss': 4.621233339254567, 'temp': 0.915298239516385, 'alpha_loss': -22.84584364587861, 'alpha': 1.3134883628415235, 'critic_loss': 54.755923342842586, 'actor_loss': -0.8726474359373136, 'time_step': 0.057212511928095296, 'td_error': 1.2138531262465087, 'init_value': -0.7504541277885437, 'ave_value': -0.2859659162151723} step=2768
2022-04-22 01:45.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:45.38 [info     ] CQL_20220422014232: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.0003049097998293838, 'time_algorithm_update': 0.05726421086085325, 'temp_loss': 4.568598801000959, 'temp': 0.9048523492895799, 'alpha_loss': -23.716001218453997, 'alpha': 1.3636952597281837, 'critic_loss': 61.46372360714598, 'actor_loss': -0.5231136695584121, 'time_step': 0.057655195280306604, 'td_error': 1.216824326361349, 'init_value': -1.2894562482833862, 'ave_value': -0.7588446178824599} step=3114
2022-04-22 01:45.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:45.59 [info     ] CQL_20220422014232: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.00030848401130279364, 'time_algorithm_update': 0.05673816093819679, 'temp_loss': 4.518248833672849, 'temp': 0.8945419507564147, 'alpha_loss': -24.625687130613823, 'alpha': 1.4160400215600957, 'critic_loss': 68.53106611174655, 'actor_loss': -0.12859760855749852, 'time_step': 0.05713188923852292, 'td_error': 1.2177761112120744, 'init_value': -1.5691145658493042, 'ave_value': -1.0156281856146114} step=3460
2022-04-22 01:45.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:46.20 [info     ] CQL_20220422014232: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.0002983836080297569, 'time_algorithm_update': 0.057363049143311605, 'temp_loss': 4.467171521545145, 'temp': 0.8843626398916189, 'alpha_loss': -25.576405927625007, 'alpha': 1.4705776492984308, 'critic_loss': 75.65625804834973, 'actor_loss': 0.23952524970161776, 'time_step': 0.057746109934900536, 'td_error': 1.219831640622986, 'init_value': -1.7403273582458496, 'ave_value': -1.1845828969351286} step=3806
2022-04-22 01:46.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:46.40 [info     ] CQL_20220422014232: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.00029442765120136946, 'time_algorithm_update': 0.05691797953809617, 'temp_loss': 4.415174149364406, 'temp': 0.8743134759409579, 'alpha_loss': -26.558817438996595, 'alpha': 1.5273544454161143, 'critic_loss': 82.88122695305444, 'actor_loss': 0.5669689816826513, 'time_step': 0.05729949060892094, 'td_error': 1.2213705754775923, 'init_value': -2.049152135848999, 'ave_value': -1.483972620994432} step=4152
2022-04-22 01:46.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:47.01 [info     ] CQL_20220422014232: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.0002993186774281408, 'time_algorithm_update': 0.05713869519316392, 'temp_loss': 4.364801207029751, 'temp': 0.8643883227268395, 'alpha_loss': -27.5893476078276, 'alpha': 1.5864423478958924, 'critic_loss': 90.54648151287454, 'actor_loss': 0.8513828784744174, 'time_step': 0.057524525361254034, 'td_error': 1.2193746704760207, 'init_value': -1.8738387823104858, 'ave_value': -1.3884551080660041} step=4498
2022-04-22 01:47.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:47.22 [info     ] CQL_20220422014232: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.0003034034905406092, 'time_algorithm_update': 0.057102068311217204, 'temp_loss': 4.31474360840858, 'temp': 0.8545831093898398, 'alpha_loss': -28.65549335038731, 'alpha': 1.6479120475019333, 'critic_loss': 98.15658181251129, 'actor_loss': 1.0809407313435064, 'time_step': 0.05749030540444258, 'td_error': 1.2320876941541814, 'init_value': -2.8482236862182617, 'ave_value': -2.2117899420258276} step=4844
2022-04-22 01:47.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:47.43 [info     ] CQL_20220422014232: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.0003051633779713184, 'time_algorithm_update': 0.057505172112084536, 'temp_loss': 4.266723798189549, 'temp': 0.8448928461943058, 'alpha_loss': -29.768999154857127, 'alpha': 1.7118378446970373, 'critic_loss': 106.15308953434057, 'actor_loss': 1.2491777680512797, 'time_step': 0.05788663012443939, 'td_error': 1.2280145011775268, 'init_value': -2.7077198028564453, 'ave_value': -2.1143790064687806} step=5190
2022-04-22 01:47.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:48.04 [info     ] CQL_20220422014232: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.00029780892278417686, 'time_algorithm_update': 0.05752036681754052, 'temp_loss': 4.218818905725644, 'temp': 0.8353156919424245, 'alpha_loss': -30.92891152332284, 'alpha': 1.7783049982407189, 'critic_loss': 113.71298275104148, 'actor_loss': 1.3406716815998099, 'time_step': 0.05789723630585422, 'td_error': 1.2261895586218372, 'init_value': -2.6538848876953125, 'ave_value': -2.090222429431491} step=5536
2022-04-22 01:48.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:48.25 [info     ] CQL_20220422014232: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.00030020551185387407, 'time_algorithm_update': 0.05734652453075255, 'temp_loss': 4.170745113681506, 'temp': 0.8258494457413006, 'alpha_loss': -32.12246388231399, 'alpha': 1.8473947182556107, 'critic_loss': 121.20649567091397, 'actor_loss': 1.3404714596064793, 'time_step': 0.05772567415513055, 'td_error': 1.2251875295685835, 'init_value': -2.5341989994049072, 'ave_value': -2.027629273002212} step=5882
2022-04-22 01:48.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:48.45 [info     ] CQL_20220422014232: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.0003004949216897777, 'time_algorithm_update': 0.054850930423405816, 'temp_loss': 4.123842939476058, 'temp': 0.8164931447864268, 'alpha_loss': -33.37212768731089, 'alpha': 1.9191962673484935, 'critic_loss': 128.83896317233928, 'actor_loss': 1.284767907306638, 'time_step': 0.05522959700898628, 'td_error': 1.226718149516765, 'init_value': -2.6246337890625, 'ave_value': -2.0967240103772604} step=6228
2022-04-22 01:48.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:49.04 [info     ] CQL_20220422014232: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.00029608969054470175, 'time_algorithm_update': 0.053594812492414705, 'temp_loss': 4.07713363211968, 'temp': 0.8072439676764384, 'alpha_loss': -34.66849997415708, 'alpha': 1.9938219251660254, 'critic_loss': 135.41361523225817, 'actor_loss': 1.1545780862686952, 'time_step': 0.05397100255668508, 'td_error': 1.2241133858534141, 'init_value': -2.284270763397217, 'ave_value': -1.826519441073288} step=6574
2022-04-22 01:49.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:49.24 [info     ] CQL_20220422014232: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.0003047285741464251, 'time_algorithm_update': 0.05357536966401028, 'temp_loss': 4.031488371722271, 'temp': 0.7980994139448067, 'alpha_loss': -36.02037567623778, 'alpha': 2.0713735922223573, 'critic_loss': 141.88119206952223, 'actor_loss': 0.9441960335122368, 'time_step': 0.053964246904229844, 'td_error': 1.2226929937942144, 'init_value': -2.0313024520874023, 'ave_value': -1.6503728965621471} step=6920
2022-04-22 01:49.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:49.43 [info     ] CQL_20220422014232: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.00029903753644469155, 'time_algorithm_update': 0.05176636111529576, 'temp_loss': 3.984726920982317, 'temp': 0.7890613869780061, 'alpha_loss': -37.41907371124091, 'alpha': 2.1519572803739866, 'critic_loss': 148.08937196235436, 'actor_loss': 0.7079708819961272, 'time_step': 0.05215165105169219, 'td_error': 1.2241357445379526, 'init_value': -1.8992829322814941, 'ave_value': -1.5173135589202358} step=7266
2022-04-22 01:49.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:50.01 [info     ] CQL_20220422014232: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.00030108476649819084, 'time_algorithm_update': 0.05004131862882934, 'temp_loss': 3.938796969507471, 'temp': 0.7801279256798629, 'alpha_loss': -38.87603983575898, 'alpha': 2.2356860747916163, 'critic_loss': 153.29455636967126, 'actor_loss': 0.41458232584544, 'time_step': 0.050428336755388735, 'td_error': 1.2255597909234899, 'init_value': -1.7143455743789673, 'ave_value': -1.3513577950614049} step=7612
2022-04-22 01:50.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:50.19 [info     ] CQL_20220422014232: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.0002923425222407876, 'time_algorithm_update': 0.049804939699999856, 'temp_loss': 3.894779709033194, 'temp': 0.7712959903857611, 'alpha_loss': -40.38552242345204, 'alpha': 2.3226828478664334, 'critic_loss': 158.76093724421682, 'actor_loss': 0.1171548680935292, 'time_step': 0.0501833747577116, 'td_error': 1.2251842568952018, 'init_value': -1.257056713104248, 'ave_value': -0.9902042202485462} step=7958
2022-04-22 01:50.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:50.37 [info     ] CQL_20220422014232: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.0002937819916388892, 'time_algorithm_update': 0.05033312367566059, 'temp_loss': 3.850332515777191, 'temp': 0.762563842914008, 'alpha_loss': -41.95823550362118, 'alpha': 2.413066008187443, 'critic_loss': 163.89953630921468, 'actor_loss': -0.1869309344786526, 'time_step': 0.05071454378910836, 'td_error': 1.2271007575949204, 'init_value': -0.9705429077148438, 'ave_value': -0.7345841224722638} step=8304
2022-04-22 01:50.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:50.55 [info     ] CQL_20220422014232: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.0002888620244285275, 'time_algorithm_update': 0.04943788809583366, 'temp_loss': 3.8077031601371103, 'temp': 0.753930125622391, 'alpha_loss': -43.59381462935078, 'alpha': 2.5069763384802495, 'critic_loss': 169.00470239716458, 'actor_loss': -0.4826768646178218, 'time_step': 0.04981167743660811, 'td_error': 1.2287758837283216, 'init_value': -0.7636060118675232, 'ave_value': -0.5434728755239855} step=8650
2022-04-22 01:50.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:51.13 [info     ] CQL_20220422014232: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.00028697741513996456, 'time_algorithm_update': 0.048314128997008925, 'temp_loss': 3.76508025144566, 'temp': 0.7453927669911026, 'alpha_loss': -45.295804315908796, 'alpha': 2.604558574671001, 'critic_loss': 174.1830775531041, 'actor_loss': -0.7365607992245283, 'time_step': 0.04868517928040786, 'td_error': 1.2302787183038213, 'init_value': -0.549102246761322, 'ave_value': -0.3533207281413605} step=8996
2022-04-22 01:51.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:51.31 [info     ] CQL_20220422014232: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.00029572103753944354, 'time_algorithm_update': 0.04990343689229447, 'temp_loss': 3.722823743875316, 'temp': 0.7369514994883124, 'alpha_loss': -47.05884806682609, 'alpha': 2.705944662149242, 'critic_loss': 179.6188660990985, 'actor_loss': -0.9544564516558124, 'time_step': 0.050287295628145254, 'td_error': 1.2315934979597911, 'init_value': -0.3681449592113495, 'ave_value': -0.19623862058701477} step=9342
2022-04-22 01:51.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:51.49 [info     ] CQL_20220422014232: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.0002965796200526243, 'time_algorithm_update': 0.048989176750183105, 'temp_loss': 3.67989373689442, 'temp': 0.7286072077089651, 'alpha_loss': -48.89195124675773, 'alpha': 2.8112848246028657, 'critic_loss': 185.33609762908407, 'actor_loss': -1.1348038003968366, 'time_step': 0.049362671857624385, 'td_error': 1.232468491330915, 'init_value': -0.14130692183971405, 'ave_value': -0.00013634428812915988} step=9688
2022-04-22 01:51.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:52.06 [info     ] CQL_20220422014232: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.00028561581076914173, 'time_algorithm_update': 0.04936066114833589, 'temp_loss': 3.638890994077473, 'temp': 0.7203575272780622, 'alpha_loss': -50.79335470695716, 'alpha': 2.9207283720115704, 'critic_loss': 190.63645886547994, 'actor_loss': -1.2886522853305573, 'time_step': 0.049726364240480984, 'td_error': 1.233429358461077, 'init_value': 0.004355739802122116, 'ave_value': 0.1309294327224471} step=10034
2022-04-22 01:52.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:52.24 [info     ] CQL_20220422014232: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.0002883555572156961, 'time_algorithm_update': 0.049309233020495814, 'temp_loss': 3.5972312516559755, 'temp': 0.7122011396581727, 'alpha_loss': -52.76939195842412, 'alpha': 3.0344330279124265, 'critic_loss': 195.87545516173964, 'actor_loss': -1.422068687188143, 'time_step': 0.049677155610453876, 'td_error': 1.234015167144867, 'init_value': 0.09635217487812042, 'ave_value': 0.21408422186567977} step=10380
2022-04-22 01:52.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:52.42 [info     ] CQL_20220422014232: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.00029497339546335913, 'time_algorithm_update': 0.049509452257542254, 'temp_loss': 3.5568328971807666, 'temp': 0.7041372599973844, 'alpha_loss': -54.825416719293315, 'alpha': 3.152559521570371, 'critic_loss': 201.47912677037235, 'actor_loss': -1.5292986028456275, 'time_step': 0.04988578289230435, 'td_error': 1.2345713203870958, 'init_value': 0.19496582448482513, 'ave_value': 0.30476528018105364} step=10726
2022-04-22 01:52.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:53.00 [info     ] CQL_20220422014232: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.0002906143320778202, 'time_algorithm_update': 0.04929063774946797, 'temp_loss': 3.516058249280632, 'temp': 0.6961645173888675, 'alpha_loss': -56.951606739463145, 'alpha': 3.2752799505443244, 'critic_loss': 206.58823491796593, 'actor_loss': -1.6197954402493604, 'time_step': 0.04965943890499931, 'td_error': 1.235012668233383, 'init_value': 0.30065250396728516, 'ave_value': 0.4016859007254773} step=11072
2022-04-22 01:53.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:53.19 [info     ] CQL_20220422014232: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.00029523317524463456, 'time_algorithm_update': 0.05198788573976197, 'temp_loss': 3.4773796245541875, 'temp': 0.6882830412057094, 'alpha_loss': -59.16688963167929, 'alpha': 3.4027669953473043, 'critic_loss': 211.97802130197513, 'actor_loss': -1.7019894412487229, 'time_step': 0.0523631214406449, 'td_error': 1.2353531656961187, 'init_value': 0.4068906307220459, 'ave_value': 0.4948682619955597} step=11418
2022-04-22 01:53.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:53.39 [info     ] CQL_20220422014232: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.00029707437305781195, 'time_algorithm_update': 0.05410508957901442, 'temp_loss': 3.43655040636228, 'temp': 0.6804897835144418, 'alpha_loss': -61.47390108714903, 'alpha': 3.535224468032749, 'critic_loss': 217.47177728200924, 'actor_loss': -1.7765456813608291, 'time_step': 0.05448143192798416, 'td_error': 1.2355763029163458, 'init_value': 0.48391157388687134, 'ave_value': 0.5673023112694083} step=11764
2022-04-22 01:53.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:54.00 [info     ] CQL_20220422014232: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.0003049649255124131, 'time_algorithm_update': 0.05735841996407922, 'temp_loss': 3.3983893739005735, 'temp': 0.6727870073966208, 'alpha_loss': -63.8704436417949, 'alpha': 3.6728538081825124, 'critic_loss': 223.36805385523448, 'actor_loss': -1.8416757239082646, 'time_step': 0.05774385322725153, 'td_error': 1.2358904304496265, 'init_value': 0.5705949664115906, 'ave_value': 0.6458641307084919} step=12110
2022-04-22 01:54.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:54.21 [info     ] CQL_20220422014232: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.00029946338234609264, 'time_algorithm_update': 0.05759586006230702, 'temp_loss': 3.3591829014651347, 'temp': 0.6651707489711012, 'alpha_loss': -66.35389585991126, 'alpha': 3.815837208935291, 'critic_loss': 229.5660370843259, 'actor_loss': -1.9006583149722547, 'time_step': 0.05797207975663202, 'td_error': 1.2361090550379634, 'init_value': 0.6331213116645813, 'ave_value': 0.7026380980252203} step=12456
2022-04-22 01:54.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:54.42 [info     ] CQL_20220422014232: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.00031145666376014667, 'time_algorithm_update': 0.05721549698383133, 'temp_loss': 3.3214499977971776, 'temp': 0.6576408280458065, 'alpha_loss': -68.94920071287652, 'alpha': 3.9643933655898693, 'critic_loss': 235.53111346470828, 'actor_loss': -1.9621453850255537, 'time_step': 0.05760692378689099, 'td_error': 1.2363134651357863, 'init_value': 0.699672520160675, 'ave_value': 0.7675494520796482} step=12802
2022-04-22 01:54.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:55.02 [info     ] CQL_20220422014232: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.0002993572654062613, 'time_algorithm_update': 0.05709470489810657, 'temp_loss': 3.2828702995542844, 'temp': 0.6501960924939613, 'alpha_loss': -71.62522041453103, 'alpha': 4.118724583201326, 'critic_loss': 242.35872610455993, 'actor_loss': -2.0168515195736307, 'time_step': 0.057473779413741446, 'td_error': 1.2364664548059416, 'init_value': 0.7801273465156555, 'ave_value': 0.8401017403110841} step=13148
2022-04-22 01:55.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:55.23 [info     ] CQL_20220422014232: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.0003024449927269379, 'time_algorithm_update': 0.05704105038174315, 'temp_loss': 3.2468446107269022, 'temp': 0.6428356062125609, 'alpha_loss': -74.4080819587487, 'alpha': 4.279065021889747, 'critic_loss': 248.97747079485413, 'actor_loss': -2.0630976947056765, 'time_step': 0.05742441505365978, 'td_error': 1.2367182414074513, 'init_value': 0.8200856447219849, 'ave_value': 0.8781782584486555} step=13494
2022-04-22 01:55.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:55.44 [info     ] CQL_20220422014232: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.0003060839768779071, 'time_algorithm_update': 0.05732355779306048, 'temp_loss': 3.210718252755314, 'temp': 0.6355573509125351, 'alpha_loss': -77.30498976514518, 'alpha': 4.445650237144073, 'critic_loss': 255.82358546615336, 'actor_loss': -2.1177459445302884, 'time_step': 0.057710376778089933, 'td_error': 1.236801418432181, 'init_value': 0.8848364949226379, 'ave_value': 0.9390995228838841} step=13840
2022-04-22 01:55.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:56.05 [info     ] CQL_20220422014232: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.000304297904747759, 'time_algorithm_update': 0.05689285600805558, 'temp_loss': 3.1735128398575534, 'temp': 0.628360996528857, 'alpha_loss': -80.32002174509743, 'alpha': 4.618719395874552, 'critic_loss': 262.73357157624525, 'actor_loss': -2.166332759609112, 'time_step': 0.05727997267177339, 'td_error': 1.2369489563945562, 'init_value': 0.9591526389122009, 'ave_value': 1.0058542148204368} step=14186
2022-04-22 01:56.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:56.25 [info     ] CQL_20220422014232: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.00030448257578590703, 'time_algorithm_update': 0.057315002287054335, 'temp_loss': 3.1372648195035198, 'temp': 0.6212476657305149, 'alpha_loss': -83.45257533078937, 'alpha': 4.798529590485413, 'critic_loss': 270.26789238549384, 'actor_loss': -2.2060700751453464, 'time_step': 0.057698021734380996, 'td_error': 1.2370862641047122, 'init_value': 1.0098358392715454, 'ave_value': 1.057595811284736} step=14532
2022-04-22 01:56.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:56.46 [info     ] CQL_20220422014232: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.00030064858453122175, 'time_algorithm_update': 0.05712641801448227, 'temp_loss': 3.1023813375847875, 'temp': 0.6142144268647783, 'alpha_loss': -86.69694247824609, 'alpha': 4.985344214246452, 'critic_loss': 277.59779507852016, 'actor_loss': -2.258312195022671, 'time_step': 0.05750865881153614, 'td_error': 1.2372253256708547, 'init_value': 1.0692232847213745, 'ave_value': 1.1090039304219586} step=14878
2022-04-22 01:56.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:57.07 [info     ] CQL_20220422014232: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.0002988218572098396, 'time_algorithm_update': 0.05719814341881372, 'temp_loss': 3.0669464761811183, 'temp': 0.6072605924110193, 'alpha_loss': -90.0618051958911, 'alpha': 5.179415730382666, 'critic_loss': 285.4272833146112, 'actor_loss': -2.301762995692347, 'time_step': 0.05757555934046045, 'td_error': 1.2372391122989717, 'init_value': 1.1269536018371582, 'ave_value': 1.1691517536704605} step=15224
2022-04-22 01:57.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:57.28 [info     ] CQL_20220422014232: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.0003108227184053101, 'time_algorithm_update': 0.05703615866644534, 'temp_loss': 3.0321894553355397, 'temp': 0.6003859453118605, 'alpha_loss': -93.56855590908513, 'alpha': 5.3810423977802255, 'critic_loss': 293.7115168929789, 'actor_loss': -2.3371678246239016, 'time_step': 0.057428569462947075, 'td_error': 1.237280367427753, 'init_value': 1.1697107553482056, 'ave_value': 1.2072949106515782} step=15570
2022-04-22 01:57.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:57.49 [info     ] CQL_20220422014232: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.00029619580748453303, 'time_algorithm_update': 0.057167130398612494, 'temp_loss': 2.997959798471087, 'temp': 0.5935895213846526, 'alpha_loss': -97.21946903735916, 'alpha': 5.590537366150431, 'critic_loss': 301.4277640105672, 'actor_loss': -2.3831834365866778, 'time_step': 0.057546830590749755, 'td_error': 1.2373894684495113, 'init_value': 1.2290652990341187, 'ave_value': 1.2658509544141487} step=15916
2022-04-22 01:57.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:58.10 [info     ] CQL_20220422014232: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.0002980438960080891, 'time_algorithm_update': 0.05771724337098227, 'temp_loss': 2.9640060780365345, 'temp': 0.5868692198240688, 'alpha_loss': -100.99674478431658, 'alpha': 5.808180226066898, 'critic_loss': 308.983037430427, 'actor_loss': -2.4323150760176553, 'time_step': 0.05809372973579892, 'td_error': 1.2374041144266965, 'init_value': 1.2779090404510498, 'ave_value': 1.3125434107212954} step=16262
2022-04-22 01:58.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:58.30 [info     ] CQL_20220422014232: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.00029929938343908056, 'time_algorithm_update': 0.05710809148115919, 'temp_loss': 2.930824910974227, 'temp': 0.5802251582889888, 'alpha_loss': -104.92818865472871, 'alpha': 6.034298477834359, 'critic_loss': 317.61673529299696, 'actor_loss': -2.467794896550261, 'time_step': 0.057487055745428006, 'td_error': 1.2375110634738102, 'init_value': 1.3309426307678223, 'ave_value': 1.3638562079852878} step=16608
2022-04-22 01:58.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:58.51 [info     ] CQL_20220422014232: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.0003041704266057538, 'time_algorithm_update': 0.05747230411264938, 'temp_loss': 2.8971243290542867, 'temp': 0.5736562862906153, 'alpha_loss': -109.01298240705722, 'alpha': 6.269215208946625, 'critic_loss': 325.21144615570245, 'actor_loss': -2.5055385387012725, 'time_step': 0.0578516687271912, 'td_error': 1.2376156834311458, 'init_value': 1.3914746046066284, 'ave_value': 1.421366266726091} step=16954
2022-04-22 01:58.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 01:59.11 [info     ] CQL_20220422014232: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.00029839738945051426, 'time_algorithm_update': 0.055593847539383554, 'temp_loss': 2.8640075846214517, 'temp': 0.5671627457775822, 'alpha_loss': -113.26062962085525, 'alpha': 6.513274908065796, 'critic_loss': 333.69377092129923, 'actor_loss': -2.540804987008861, 'time_step': 0.055971549425511004, 'td_error': 1.2376400365060194, 'init_value': 1.4285348653793335, 'ave_value': 1.45613214631621} step=17300
2022-04-22 01:59.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422014232/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.51

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 01:59.13 [info     ] FQE_20220422015912: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.0001532327697937747, 'time_algorithm_update': 0.009358001042561358, 'loss': 0.007981337334803608, 'time_step': 0.009576237345316324, 'init_value': -0.22476540505886078, 'ave_value': -0.17817158901241717, 'soft_opc': nan} step=166




2022-04-22 01:59.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:59.15 [info     ] FQE_20220422015912: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00015118610428040287, 'time_algorithm_update': 0.009415586310696888, 'loss': 0.004712194759757763, 'time_step': 0.009632685098303369, 'init_value': -0.29993152618408203, 'ave_value': -0.22717543461670478, 'soft_opc': nan} step=332




2022-04-22 01:59.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:59.17 [info     ] FQE_20220422015912: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00015619433069803627, 'time_algorithm_update': 0.009035641888538039, 'loss': 0.003666003715774291, 'time_step': 0.009254204221518642, 'init_value': -0.30595576763153076, 'ave_value': -0.21939586999549254, 'soft_opc': nan} step=498




2022-04-22 01:59.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:59.18 [info     ] FQE_20220422015912: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00015048377485160367, 'time_algorithm_update': 0.008909822946571442, 'loss': 0.003241722142402397, 'time_step': 0.009118597191500377, 'init_value': -0.33419546484947205, 'ave_value': -0.22994637980318822, 'soft_opc': nan} step=664




2022-04-22 01:59.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:59.20 [info     ] FQE_20220422015912: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00015468051634639143, 'time_algorithm_update': 0.009359774819339615, 'loss': 0.0029055994678658984, 'time_step': 0.009576869298176593, 'init_value': -0.41809213161468506, 'ave_value': -0.3014773384777007, 'soft_opc': nan} step=830




2022-04-22 01:59.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:59.22 [info     ] FQE_20220422015912: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00015833722539694914, 'time_algorithm_update': 0.009423795952854386, 'loss': 0.002650828165961542, 'time_step': 0.009649203484316906, 'init_value': -0.42865806818008423, 'ave_value': -0.30904499940775537, 'soft_opc': nan} step=996




2022-04-22 01:59.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:59.24 [info     ] FQE_20220422015912: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015438752002026662, 'time_algorithm_update': 0.00941523873662374, 'loss': 0.0023848974448909244, 'time_step': 0.00963249694870179, 'init_value': -0.4520184099674225, 'ave_value': -0.31277550949404637, 'soft_opc': nan} step=1162




2022-04-22 01:59.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:59.25 [info     ] FQE_20220422015912: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00015343384570386037, 'time_algorithm_update': 0.008785682988454061, 'loss': 0.002022788488566988, 'time_step': 0.008998080908534038, 'init_value': -0.49578216671943665, 'ave_value': -0.345465491129807, 'soft_opc': nan} step=1328




2022-04-22 01:59.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:59.27 [info     ] FQE_20220422015912: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00015112003648137473, 'time_algorithm_update': 0.009315696107335838, 'loss': 0.001806522360848577, 'time_step': 0.00952545275171119, 'init_value': -0.548493504524231, 'ave_value': -0.3895683343740398, 'soft_opc': nan} step=1494




2022-04-22 01:59.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:59.29 [info     ] FQE_20220422015912: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00015647727322865682, 'time_algorithm_update': 0.009399896644684205, 'loss': 0.001718372922861971, 'time_step': 0.009618533663002842, 'init_value': -0.5867823958396912, 'ave_value': -0.4063541083646989, 'soft_opc': nan} step=1660




2022-04-22 01:59.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:59.30 [info     ] FQE_20220422015912: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.0001490532633769943, 'time_algorithm_update': 0.008881120796663216, 'loss': 0.0016336401876101427, 'time_step': 0.009093353547245622, 'init_value': -0.6761690378189087, 'ave_value': -0.4821225911991352, 'soft_opc': nan} step=1826




2022-04-22 01:59.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:59.32 [info     ] FQE_20220422015912: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00015202344182025954, 'time_algorithm_update': 0.008888813386480492, 'loss': 0.0014961907837865312, 'time_step': 0.009111249303243247, 'init_value': -0.7499127388000488, 'ave_value': -0.5470080515629872, 'soft_opc': nan} step=1992




2022-04-22 01:59.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:59.34 [info     ] FQE_20220422015912: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.0001519703003297369, 'time_algorithm_update': 0.009224831339824631, 'loss': 0.0016211555686538074, 'time_step': 0.009434840765344092, 'init_value': -0.7930679321289062, 'ave_value': -0.5766663346221452, 'soft_opc': nan} step=2158




2022-04-22 01:59.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:59.35 [info     ] FQE_20220422015912: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00014927588313458915, 'time_algorithm_update': 0.009263009910123894, 'loss': 0.0017382275070840806, 'time_step': 0.009477119847952601, 'init_value': -0.896384596824646, 'ave_value': -0.6505568980700798, 'soft_opc': nan} step=2324




2022-04-22 01:59.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:59.37 [info     ] FQE_20220422015912: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00014964500105524637, 'time_algorithm_update': 0.0093268759279366, 'loss': 0.0018435141854675442, 'time_step': 0.00954211763588779, 'init_value': -0.9548354744911194, 'ave_value': -0.6941456449494974, 'soft_opc': nan} step=2490




2022-04-22 01:59.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:59.39 [info     ] FQE_20220422015912: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.000152465808822448, 'time_algorithm_update': 0.009235726781638271, 'loss': 0.0019751632235823647, 'time_step': 0.009458378136876118, 'init_value': -1.0296151638031006, 'ave_value': -0.750264904460123, 'soft_opc': nan} step=2656




2022-04-22 01:59.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:59.40 [info     ] FQE_20220422015912: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00015524065638163002, 'time_algorithm_update': 0.008971494364451212, 'loss': 0.0022489182033550823, 'time_step': 0.00920299880475883, 'init_value': -1.1041367053985596, 'ave_value': -0.7994818508859959, 'soft_opc': nan} step=2822




2022-04-22 01:59.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:59.42 [info     ] FQE_20220422015912: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00015098215585731598, 'time_algorithm_update': 0.009461025157606745, 'loss': 0.002359101988153577, 'time_step': 0.009676831314362675, 'init_value': -1.1306548118591309, 'ave_value': -0.8085384559557514, 'soft_opc': nan} step=2988




2022-04-22 01:59.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:59.44 [info     ] FQE_20220422015912: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00015003853533641402, 'time_algorithm_update': 0.009041195892425904, 'loss': 0.0027143258826894104, 'time_step': 0.0092548074492489, 'init_value': -1.2527050971984863, 'ave_value': -0.8992353315054028, 'soft_opc': nan} step=3154




2022-04-22 01:59.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:59.45 [info     ] FQE_20220422015912: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.0001528679606426193, 'time_algorithm_update': 0.009330248258200037, 'loss': 0.0029418353404757195, 'time_step': 0.009548633931631065, 'init_value': -1.293452262878418, 'ave_value': -0.922923150101492, 'soft_opc': nan} step=3320




2022-04-22 01:59.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:59.47 [info     ] FQE_20220422015912: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00014940945498914603, 'time_algorithm_update': 0.008743373744459037, 'loss': 0.003194399825325348, 'time_step': 0.008957087275493577, 'init_value': -1.3365284204483032, 'ave_value': -0.9472107200002348, 'soft_opc': nan} step=3486




2022-04-22 01:59.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:59.49 [info     ] FQE_20220422015912: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.0001511056739163686, 'time_algorithm_update': 0.009409964802753494, 'loss': 0.003397143240428967, 'time_step': 0.00962848548429558, 'init_value': -1.4696612358093262, 'ave_value': -1.0685969487570965, 'soft_opc': nan} step=3652




2022-04-22 01:59.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:59.51 [info     ] FQE_20220422015912: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00015288806823362787, 'time_algorithm_update': 0.009331282362880477, 'loss': 0.003869978063361136, 'time_step': 0.00955075959125197, 'init_value': -1.516469955444336, 'ave_value': -1.09518937961878, 'soft_opc': nan} step=3818




2022-04-22 01:59.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:59.52 [info     ] FQE_20220422015912: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00015198897166424487, 'time_algorithm_update': 0.009325031774589815, 'loss': 0.004224436232435842, 'time_step': 0.009543871305075037, 'init_value': -1.553576946258545, 'ave_value': -1.1214187353178187, 'soft_opc': nan} step=3984




2022-04-22 01:59.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:59.54 [info     ] FQE_20220422015912: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00015257065554699265, 'time_algorithm_update': 0.008515238761901855, 'loss': 0.0048801436501692596, 'time_step': 0.008732614747012955, 'init_value': -1.6241480112075806, 'ave_value': -1.1637850260815106, 'soft_opc': nan} step=4150




2022-04-22 01:59.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:59.55 [info     ] FQE_20220422015912: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.0001493189708296075, 'time_algorithm_update': 0.008536413491490376, 'loss': 0.004975833443374968, 'time_step': 0.00875180600637413, 'init_value': -1.6783918142318726, 'ave_value': -1.200678282244517, 'soft_opc': nan} step=4316




2022-04-22 01:59.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:59.57 [info     ] FQE_20220422015912: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.0001527918390480869, 'time_algorithm_update': 0.008390453924615699, 'loss': 0.005413451466775297, 'time_step': 0.008607676230281233, 'init_value': -1.6732710599899292, 'ave_value': -1.1821645521768578, 'soft_opc': nan} step=4482




2022-04-22 01:59.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 01:59.58 [info     ] FQE_20220422015912: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001522115914218397, 'time_algorithm_update': 0.008205245776348803, 'loss': 0.005881044637420249, 'time_step': 0.00842337005109672, 'init_value': -1.7708384990692139, 'ave_value': -1.2511467491889054, 'soft_opc': nan} step=4648




2022-04-22 01:59.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:00.00 [info     ] FQE_20220422015912: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00014818576445062477, 'time_algorithm_update': 0.008458607168082732, 'loss': 0.006085543431122844, 'time_step': 0.008670783904661616, 'init_value': -1.780713438987732, 'ave_value': -1.2534345486243297, 'soft_opc': nan} step=4814




2022-04-22 02:00.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:00.02 [info     ] FQE_20220422015912: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00014883638864540192, 'time_algorithm_update': 0.008334212992564741, 'loss': 0.006269549869431508, 'time_step': 0.008549716099199042, 'init_value': -1.8766942024230957, 'ave_value': -1.3360739594392546, 'soft_opc': nan} step=4980




2022-04-22 02:00.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:00.03 [info     ] FQE_20220422015912: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00015401696584310876, 'time_algorithm_update': 0.008439536554267607, 'loss': 0.006913453654185532, 'time_step': 0.008664424160876906, 'init_value': -1.9214985370635986, 'ave_value': -1.3545025905669743, 'soft_opc': nan} step=5146




2022-04-22 02:00.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:00.05 [info     ] FQE_20220422015912: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00014934051467711666, 'time_algorithm_update': 0.008355534220316324, 'loss': 0.00715938216395794, 'time_step': 0.008568452065249524, 'init_value': -1.9644207954406738, 'ave_value': -1.3696598937349007, 'soft_opc': nan} step=5312




2022-04-22 02:00.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:00.06 [info     ] FQE_20220422015912: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00015124642705342857, 'time_algorithm_update': 0.00839160723858569, 'loss': 0.007686211467027687, 'time_step': 0.008609661136765078, 'init_value': -2.0434043407440186, 'ave_value': -1.4305945005108442, 'soft_opc': nan} step=5478




2022-04-22 02:00.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:00.08 [info     ] FQE_20220422015912: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.0001564744007156556, 'time_algorithm_update': 0.008603107498352787, 'loss': 0.008070134120700173, 'time_step': 0.008827515395290881, 'init_value': -2.0175938606262207, 'ave_value': -1.3993969511878384, 'soft_opc': nan} step=5644




2022-04-22 02:00.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:00.09 [info     ] FQE_20220422015912: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00015138287142098668, 'time_algorithm_update': 0.00853815854313862, 'loss': 0.008089912049640643, 'time_step': 0.00875896861754268, 'init_value': -2.1020898818969727, 'ave_value': -1.4467470378455547, 'soft_opc': nan} step=5810




2022-04-22 02:00.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:00.11 [info     ] FQE_20220422015912: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00015030855155852903, 'time_algorithm_update': 0.007978911859443388, 'loss': 0.008643946145781798, 'time_step': 0.00819465051214379, 'init_value': -2.1184961795806885, 'ave_value': -1.4421609921397658, 'soft_opc': nan} step=5976




2022-04-22 02:00.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:00.12 [info     ] FQE_20220422015912: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00015157245727906744, 'time_algorithm_update': 0.008374696754547486, 'loss': 0.009000565590377594, 'time_step': 0.008591809904718974, 'init_value': -2.1643004417419434, 'ave_value': -1.4623278502341326, 'soft_opc': nan} step=6142




2022-04-22 02:00.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:00.14 [info     ] FQE_20220422015912: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.000149550208126206, 'time_algorithm_update': 0.008473782654268196, 'loss': 0.009447528323732282, 'time_step': 0.00868640606661877, 'init_value': -2.220611095428467, 'ave_value': -1.488625271500902, 'soft_opc': nan} step=6308




2022-04-22 02:00.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:00.16 [info     ] FQE_20220422015912: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.0001532557498977845, 'time_algorithm_update': 0.008521510894040027, 'loss': 0.009627740550365478, 'time_step': 0.008741299790072155, 'init_value': -2.2401695251464844, 'ave_value': -1.4947502959217573, 'soft_opc': nan} step=6474




2022-04-22 02:00.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:00.17 [info     ] FQE_20220422015912: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00015033727668854128, 'time_algorithm_update': 0.008431635707257742, 'loss': 0.010450208030152976, 'time_step': 0.008647187646613064, 'init_value': -2.404913902282715, 'ave_value': -1.65474827784273, 'soft_opc': nan} step=6640




2022-04-22 02:00.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:00.19 [info     ] FQE_20220422015912: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00014890963772693313, 'time_algorithm_update': 0.008502253566879824, 'loss': 0.010618447734629286, 'time_step': 0.008720732596983393, 'init_value': -2.4202609062194824, 'ave_value': -1.6491929673195422, 'soft_opc': nan} step=6806




2022-04-22 02:00.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:00.20 [info     ] FQE_20220422015912: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00014934195093361727, 'time_algorithm_update': 0.008293802479663527, 'loss': 0.011188655789431005, 'time_step': 0.008509503789694912, 'init_value': -2.5390517711639404, 'ave_value': -1.763106850029582, 'soft_opc': nan} step=6972




2022-04-22 02:00.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:00.22 [info     ] FQE_20220422015912: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.0001514819731195289, 'time_algorithm_update': 0.008435305342616805, 'loss': 0.011557060402554336, 'time_step': 0.008650812758020607, 'init_value': -2.4450156688690186, 'ave_value': -1.6377757800860449, 'soft_opc': nan} step=7138




2022-04-22 02:00.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:00.23 [info     ] FQE_20220422015912: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00014981735183531978, 'time_algorithm_update': 0.0072168241064232514, 'loss': 0.012006384530244956, 'time_step': 0.007428587201129959, 'init_value': -2.538886785507202, 'ave_value': -1.7060611810065336, 'soft_opc': nan} step=7304




2022-04-22 02:00.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:00.25 [info     ] FQE_20220422015912: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00015392648168357023, 'time_algorithm_update': 0.008512476840651179, 'loss': 0.012262946607317799, 'time_step': 0.008737871445805193, 'init_value': -2.6315512657165527, 'ave_value': -1.779946761480994, 'soft_opc': nan} step=7470




2022-04-22 02:00.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:00.26 [info     ] FQE_20220422015912: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00015331032764480775, 'time_algorithm_update': 0.007941524666475963, 'loss': 0.012782150582152318, 'time_step': 0.008160395794604198, 'init_value': -2.6763694286346436, 'ave_value': -1.7939682440177815, 'soft_opc': nan} step=7636




2022-04-22 02:00.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:00.28 [info     ] FQE_20220422015912: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00015042919710458043, 'time_algorithm_update': 0.008468547499323466, 'loss': 0.013532247404492158, 'time_step': 0.008681988141622889, 'init_value': -2.6539294719696045, 'ave_value': -1.7517768956385218, 'soft_opc': nan} step=7802




2022-04-22 02:00.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:00.29 [info     ] FQE_20220422015912: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00015351140355489342, 'time_algorithm_update': 0.008466497961297092, 'loss': 0.014103142948223683, 'time_step': 0.00868938773511404, 'init_value': -2.8502774238586426, 'ave_value': -1.9022747884261177, 'soft_opc': nan} step=7968




2022-04-22 02:00.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:00.31 [info     ] FQE_20220422015912: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00015168017651661332, 'time_algorithm_update': 0.00841574065656547, 'loss': 0.014861089112910342, 'time_step': 0.008637995605009148, 'init_value': -2.904214382171631, 'ave_value': -1.9582150827656994, 'soft_opc': nan} step=8134




2022-04-22 02:00.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:00.32 [info     ] FQE_20220422015912: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.0001499537962028779, 'time_algorithm_update': 0.008420573659690029, 'loss': 0.014789687628445429, 'time_step': 0.008638903319117534, 'init_value': -2.907952308654785, 'ave_value': -1.9508149730326894, 'soft_opc': nan} step=8300




2022-04-22 02:00.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422015912/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

[ 0.00000000e+00  7.95731469e+08 -4.38489108e-01  4.94000047e-02
 -1.56000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -7.49080829e-02  7.04145269e-02]
Read chunk # 40 out of 4999
torch.Size([44400, 6])
2022-04-22 02:00.33 [debug    ] RoundIterator is selected.
2022-04-22 02:00.33 [info     ] Directory is created at d3rlpy_logs/FQE_20220422020033
2022-04-22 02:00.33 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 02:00.33 [debug    ] Building models...
2022-04-22 02:00.33 [debug    ] Models have been built.
2022-04-22 02:00.33 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422020033/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 02:00.36 [info     ] FQE_20220422020033: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.0001519578833912694, 'time_algorithm_update': 0.008261744366135708, 'loss': 0.02369184520496272, 'time_step': 0.008479834989059803, 'init_value': -1.4586408138275146, 'ave_value': -1.4402052577954154, 'soft_opc': nan} step=344




2022-04-22 02:00.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:00.39 [info     ] FQE_20220422020033: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00015319017476813737, 'time_algorithm_update': 0.008402852124946063, 'loss': 0.021880471580770125, 'time_step': 0.008622827918030495, 'init_value': -2.1610851287841797, 'ave_value': -2.12034363533194, 'soft_opc': nan} step=688




2022-04-22 02:00.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:00.42 [info     ] FQE_20220422020033: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00014908785043760787, 'time_algorithm_update': 0.008439926213996356, 'loss': 0.025085458997637033, 'time_step': 0.008655453837195109, 'init_value': -3.0004560947418213, 'ave_value': -2.995511183344029, 'soft_opc': nan} step=1032




2022-04-22 02:00.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:00.45 [info     ] FQE_20220422020033: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00015045945034470668, 'time_algorithm_update': 0.008496616468873135, 'loss': 0.02876550663630803, 'time_step': 0.008712404689123465, 'init_value': -3.484405040740967, 'ave_value': -3.5739148321183953, 'soft_opc': nan} step=1376




2022-04-22 02:00.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:00.48 [info     ] FQE_20220422020033: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00015044558879941008, 'time_algorithm_update': 0.008222007474233939, 'loss': 0.03539302598056925, 'time_step': 0.008439335019089455, 'init_value': -4.07090425491333, 'ave_value': -4.3314725417610225, 'soft_opc': nan} step=1720




2022-04-22 02:00.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:00.52 [info     ] FQE_20220422020033: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.000151004902152128, 'time_algorithm_update': 0.008387957201447598, 'loss': 0.04205067079178556, 'time_step': 0.00860462978828785, 'init_value': -4.505843639373779, 'ave_value': -5.020811745302903, 'soft_opc': nan} step=2064




2022-04-22 02:00.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:00.55 [info     ] FQE_20220422020033: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00015392275743706283, 'time_algorithm_update': 0.008471729450447614, 'loss': 0.05432297848706502, 'time_step': 0.008694677851920905, 'init_value': -5.004016399383545, 'ave_value': -5.794021181124556, 'soft_opc': nan} step=2408




2022-04-22 02:00.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:00.58 [info     ] FQE_20220422020033: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.000157304281412169, 'time_algorithm_update': 0.008384596469790437, 'loss': 0.06496935582524815, 'time_step': 0.008609698262325553, 'init_value': -5.342626571655273, 'ave_value': -6.467318244781849, 'soft_opc': nan} step=2752




2022-04-22 02:00.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:01.01 [info     ] FQE_20220422020033: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00014885982801747877, 'time_algorithm_update': 0.008289459832879, 'loss': 0.07656499842549999, 'time_step': 0.008505253597747448, 'init_value': -5.822889804840088, 'ave_value': -7.24109125143905, 'soft_opc': nan} step=3096




2022-04-22 02:01.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:01.04 [info     ] FQE_20220422020033: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00015075816664584848, 'time_algorithm_update': 0.008436634096988412, 'loss': 0.09474494259501266, 'time_step': 0.008652341920276021, 'init_value': -6.4670257568359375, 'ave_value': -8.234131853031581, 'soft_opc': nan} step=3440




2022-04-22 02:01.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:01.08 [info     ] FQE_20220422020033: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.0001511060914327932, 'time_algorithm_update': 0.008241799681685692, 'loss': 0.10663669490320392, 'time_step': 0.008459234653517257, 'init_value': -6.828391075134277, 'ave_value': -8.939916927588113, 'soft_opc': nan} step=3784




2022-04-22 02:01.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:01.11 [info     ] FQE_20220422020033: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00015435107918672784, 'time_algorithm_update': 0.008570741775423982, 'loss': 0.1194301528780353, 'time_step': 0.00879173431285592, 'init_value': -7.165522575378418, 'ave_value': -9.581813233216746, 'soft_opc': nan} step=4128




2022-04-22 02:01.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:01.14 [info     ] FQE_20220422020033: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00014968320380809696, 'time_algorithm_update': 0.00832346289656883, 'loss': 0.12749512385858527, 'time_step': 0.008540000333342441, 'init_value': -7.559074401855469, 'ave_value': -10.27825532352168, 'soft_opc': nan} step=4472




2022-04-22 02:01.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:01.17 [info     ] FQE_20220422020033: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001525282859802246, 'time_algorithm_update': 0.008431652950686078, 'loss': 0.13613375730076155, 'time_step': 0.008652495090351549, 'init_value': -8.064239501953125, 'ave_value': -11.050209418996364, 'soft_opc': nan} step=4816




2022-04-22 02:01.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:01.20 [info     ] FQE_20220422020033: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00014977191769799522, 'time_algorithm_update': 0.008296078027680863, 'loss': 0.14709769199104156, 'time_step': 0.008512453977451769, 'init_value': -8.331101417541504, 'ave_value': -11.585151344966059, 'soft_opc': nan} step=5160




2022-04-22 02:01.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:01.23 [info     ] FQE_20220422020033: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00015060361041579137, 'time_algorithm_update': 0.008465908294500307, 'loss': 0.15512106323961256, 'time_step': 0.008686423301696777, 'init_value': -8.558673858642578, 'ave_value': -12.03050706246742, 'soft_opc': nan} step=5504




2022-04-22 02:01.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:01.27 [info     ] FQE_20220422020033: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00014963884686314784, 'time_algorithm_update': 0.008391957643420198, 'loss': 0.1667047258693898, 'time_step': 0.008604430875112845, 'init_value': -8.802295684814453, 'ave_value': -12.497043402862593, 'soft_opc': nan} step=5848




2022-04-22 02:01.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:01.30 [info     ] FQE_20220422020033: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00015485356020372967, 'time_algorithm_update': 0.008443339619525644, 'loss': 0.17576921548759347, 'time_step': 0.008665563755257184, 'init_value': -8.888223648071289, 'ave_value': -12.72886811709, 'soft_opc': nan} step=6192




2022-04-22 02:01.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:01.33 [info     ] FQE_20220422020033: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00014902339425197867, 'time_algorithm_update': 0.008404620858125909, 'loss': 0.1875868521760716, 'time_step': 0.008619586395662885, 'init_value': -9.467823028564453, 'ave_value': -13.428286120168831, 'soft_opc': nan} step=6536




2022-04-22 02:01.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:01.36 [info     ] FQE_20220422020033: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00014966795610827068, 'time_algorithm_update': 0.008448239675787993, 'loss': 0.19507109306666048, 'time_step': 0.008665377317472946, 'init_value': -9.91010570526123, 'ave_value': -13.883119764180984, 'soft_opc': nan} step=6880




2022-04-22 02:01.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:01.39 [info     ] FQE_20220422020033: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00014980102694311806, 'time_algorithm_update': 0.008350831824679708, 'loss': 0.20585478555822614, 'time_step': 0.008563763873521672, 'init_value': -10.408900260925293, 'ave_value': -14.507144339933996, 'soft_opc': nan} step=7224




2022-04-22 02:01.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:01.42 [info     ] FQE_20220422020033: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00014787981676500896, 'time_algorithm_update': 0.008339639319929966, 'loss': 0.21336996082628015, 'time_step': 0.008552910976631696, 'init_value': -10.784236907958984, 'ave_value': -14.904836372171449, 'soft_opc': nan} step=7568




2022-04-22 02:01.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:01.46 [info     ] FQE_20220422020033: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00014879537183184956, 'time_algorithm_update': 0.00848804379618445, 'loss': 0.2223116927942651, 'time_step': 0.008705624314241631, 'init_value': -11.083104133605957, 'ave_value': -15.041378269341632, 'soft_opc': nan} step=7912




2022-04-22 02:01.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:01.49 [info     ] FQE_20220422020033: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00015072697816893111, 'time_algorithm_update': 0.008491606213325677, 'loss': 0.2368645870159272, 'time_step': 0.00870919435523277, 'init_value': -11.692002296447754, 'ave_value': -15.641460003310224, 'soft_opc': nan} step=8256




2022-04-22 02:01.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:01.52 [info     ] FQE_20220422020033: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.000150066475535548, 'time_algorithm_update': 0.008479855781377749, 'loss': 0.24404393012505457, 'time_step': 0.008698452350705169, 'init_value': -11.83487319946289, 'ave_value': -15.74935527280317, 'soft_opc': nan} step=8600




2022-04-22 02:01.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:01.55 [info     ] FQE_20220422020033: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00015482445095860682, 'time_algorithm_update': 0.008341224387634632, 'loss': 0.2605753652101686, 'time_step': 0.00855889361958171, 'init_value': -12.54949951171875, 'ave_value': -16.286784837133595, 'soft_opc': nan} step=8944




2022-04-22 02:01.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:01.58 [info     ] FQE_20220422020033: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.0001498245915701223, 'time_algorithm_update': 0.00845125248265821, 'loss': 0.27376995635825363, 'time_step': 0.008664262156153834, 'init_value': -13.002082824707031, 'ave_value': -16.657587813985586, 'soft_opc': nan} step=9288




2022-04-22 02:01.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:02.02 [info     ] FQE_20220422020033: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00015259551447491314, 'time_algorithm_update': 0.0085229707318683, 'loss': 0.28557612040340036, 'time_step': 0.008743829505388127, 'init_value': -13.506562232971191, 'ave_value': -17.031003662663604, 'soft_opc': nan} step=9632




2022-04-22 02:02.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:02.05 [info     ] FQE_20220422020033: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00015256640522979026, 'time_algorithm_update': 0.00844131306160328, 'loss': 0.3007166547549152, 'time_step': 0.008660787066747977, 'init_value': -14.033075332641602, 'ave_value': -17.552347270275, 'soft_opc': nan} step=9976




2022-04-22 02:02.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:02.08 [info     ] FQE_20220422020033: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.0001537654288979464, 'time_algorithm_update': 0.008335681155670521, 'loss': 0.31779137119477574, 'time_step': 0.008556971023249071, 'init_value': -14.657146453857422, 'ave_value': -18.21146483342963, 'soft_opc': nan} step=10320




2022-04-22 02:02.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:02.11 [info     ] FQE_20220422020033: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00015154203703237134, 'time_algorithm_update': 0.008464614319246869, 'loss': 0.3227487706557609, 'time_step': 0.008682749992193178, 'init_value': -14.625389099121094, 'ave_value': -18.171812892071, 'soft_opc': nan} step=10664




2022-04-22 02:02.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:02.14 [info     ] FQE_20220422020033: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.0001514145108156426, 'time_algorithm_update': 0.008447409369224725, 'loss': 0.33751325343930444, 'time_step': 0.008669786675031795, 'init_value': -14.979244232177734, 'ave_value': -18.60884703887938, 'soft_opc': nan} step=11008




2022-04-22 02:02.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:02.18 [info     ] FQE_20220422020033: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00015031598335088684, 'time_algorithm_update': 0.008395693329877631, 'loss': 0.35039493667914773, 'time_step': 0.008613596821940222, 'init_value': -15.33846664428711, 'ave_value': -18.868777348534024, 'soft_opc': nan} step=11352




2022-04-22 02:02.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:02.21 [info     ] FQE_20220422020033: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00015291779540305915, 'time_algorithm_update': 0.008300792339236238, 'loss': 0.3640607675472491, 'time_step': 0.008522718451743903, 'init_value': -15.636688232421875, 'ave_value': -19.391282546686792, 'soft_opc': nan} step=11696




2022-04-22 02:02.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:02.24 [info     ] FQE_20220422020033: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.0001515226308689561, 'time_algorithm_update': 0.008490549963574076, 'loss': 0.37556930601077027, 'time_step': 0.00871003505795501, 'init_value': -15.844614028930664, 'ave_value': -19.66178045725629, 'soft_opc': nan} step=12040




2022-04-22 02:02.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:02.27 [info     ] FQE_20220422020033: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00015239105668178824, 'time_algorithm_update': 0.008506510146828584, 'loss': 0.38831723731032813, 'time_step': 0.008724516907403635, 'init_value': -16.142858505249023, 'ave_value': -20.049896041766917, 'soft_opc': nan} step=12384




2022-04-22 02:02.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:02.30 [info     ] FQE_20220422020033: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00015282769535863124, 'time_algorithm_update': 0.00850850690242856, 'loss': 0.4099723219676593, 'time_step': 0.0087297004322673, 'init_value': -16.546640396118164, 'ave_value': -20.474770139011152, 'soft_opc': nan} step=12728




2022-04-22 02:02.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:02.34 [info     ] FQE_20220422020033: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00015146441237871036, 'time_algorithm_update': 0.008208602666854858, 'loss': 0.43010422780698293, 'time_step': 0.008424859407336213, 'init_value': -16.947952270507812, 'ave_value': -21.055340597593762, 'soft_opc': nan} step=13072




2022-04-22 02:02.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:02.37 [info     ] FQE_20220422020033: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00015170352403507677, 'time_algorithm_update': 0.00847452671028847, 'loss': 0.432095666957456, 'time_step': 0.008692097525263941, 'init_value': -16.786481857299805, 'ave_value': -21.03726071058825, 'soft_opc': nan} step=13416




2022-04-22 02:02.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:02.40 [info     ] FQE_20220422020033: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.0001531562139821607, 'time_algorithm_update': 0.008472371933072112, 'loss': 0.43640691970020185, 'time_step': 0.008691071770911994, 'init_value': -17.25143814086914, 'ave_value': -21.555866316224364, 'soft_opc': nan} step=13760




2022-04-22 02:02.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:02.43 [info     ] FQE_20220422020033: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00015520495037699855, 'time_algorithm_update': 0.008481842833896016, 'loss': 0.43457073579152483, 'time_step': 0.008703522210897402, 'init_value': -17.311246871948242, 'ave_value': -21.651599144190445, 'soft_opc': nan} step=14104




2022-04-22 02:02.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:02.46 [info     ] FQE_20220422020033: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00015276115994120753, 'time_algorithm_update': 0.008188868677893351, 'loss': 0.4399338318738913, 'time_step': 0.008412111637204192, 'init_value': -17.08310317993164, 'ave_value': -21.587632049472404, 'soft_opc': nan} step=14448




2022-04-22 02:02.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:02.50 [info     ] FQE_20220422020033: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00015308066856029422, 'time_algorithm_update': 0.008481372927510462, 'loss': 0.4508565523564209, 'time_step': 0.008704044791155083, 'init_value': -17.833480834960938, 'ave_value': -22.415240492351245, 'soft_opc': nan} step=14792




2022-04-22 02:02.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:02.53 [info     ] FQE_20220422020033: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00015174510867096657, 'time_algorithm_update': 0.008480469847834387, 'loss': 0.45995081317351133, 'time_step': 0.008699872466020806, 'init_value': -18.009531021118164, 'ave_value': -22.575689678667175, 'soft_opc': nan} step=15136




2022-04-22 02:02.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:02.56 [info     ] FQE_20220422020033: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00015274937762770543, 'time_algorithm_update': 0.008504575768182443, 'loss': 0.469196853671916, 'time_step': 0.00872406571410423, 'init_value': -17.833641052246094, 'ave_value': -22.468877939149493, 'soft_opc': nan} step=15480




2022-04-22 02:02.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:02.59 [info     ] FQE_20220422020033: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.0001550337602925855, 'time_algorithm_update': 0.008239169453465662, 'loss': 0.44460836455204283, 'time_step': 0.008461392896119939, 'init_value': -18.148616790771484, 'ave_value': -22.977562736984297, 'soft_opc': nan} step=15824




2022-04-22 02:02.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:03.02 [info     ] FQE_20220422020033: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00013340905655262082, 'time_algorithm_update': 0.008263133292974428, 'loss': 0.4911909267403792, 'time_step': 0.008455330549284469, 'init_value': -18.620685577392578, 'ave_value': -23.473273643369154, 'soft_opc': nan} step=16168




2022-04-22 02:03.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:03.05 [info     ] FQE_20220422020033: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00013564561688622764, 'time_algorithm_update': 0.008337361868037733, 'loss': 0.4951547833462787, 'time_step': 0.008531288352123526, 'init_value': -18.655973434448242, 'ave_value': -23.518105160647337, 'soft_opc': nan} step=16512




2022-04-22 02:03.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:03.09 [info     ] FQE_20220422020033: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00014690396397612815, 'time_algorithm_update': 0.008474573839542478, 'loss': 0.4997678139669344, 'time_step': 0.008686279141625692, 'init_value': -18.505516052246094, 'ave_value': -23.545782844582074, 'soft_opc': nan} step=16856




2022-04-22 02:03.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:03.12 [info     ] FQE_20220422020033: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00015331423559854197, 'time_algorithm_update': 0.008426795865214148, 'loss': 0.5118811552651054, 'time_step': 0.008648726136185402, 'init_value': -19.084030151367188, 'ave_value': -24.14520381543458, 'soft_opc': nan} step=17200




2022-04-22 02:03.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422020033/model_17200.pt
search iteration:  12
using hyper params:  [0.003928388570802373, 0.005399635171508037, 2.419043767130238e-05, 1]
2022-04-22 02:03.12 [debug    ] RoundIterator is selected.
2022-04-22 02:03.12 [info     ] Directory is created at d3rlpy_logs/CQL_20220422020312
2022-04-22 02:03.12 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 02:03.12 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 02:03.12 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422020312/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.003928388570802373, 'actor_optim_factory': {'optim

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:03.31 [info     ] CQL_20220422020312: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.0003046396839825404, 'time_algorithm_update': 0.05368824156722581, 'temp_loss': 4.906567428153374, 'temp': 0.9956049061234976, 'alpha_loss': -17.660906306581, 'alpha': 1.0177345472264152, 'critic_loss': 25.878581350249362, 'actor_loss': -1.9616018890731597, 'time_step': 0.05407696238831978, 'td_error': 1.214112277695717, 'init_value': 0.3102830946445465, 'ave_value': 0.5015252577419131} step=346
2022-04-22 02:03.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:03.51 [info     ] CQL_20220422020312: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.0002922412287982213, 'time_algorithm_update': 0.05341986355753992, 'temp_loss': 4.983008631392021, 'temp': 0.9871415141000913, 'alpha_loss': -18.342659895130666, 'alpha': 1.0542273114871428, 'critic_loss': 30.808813712500424, 'actor_loss': -2.02808269496598, 'time_step': 0.0537958207157995, 'td_error': 1.205262680157056, 'init_value': 0.07555823773145676, 'ave_value': 0.39240038789854675} step=692
2022-04-22 02:03.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:04.10 [info     ] CQL_20220422020312: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.0002924203872680664, 'time_algorithm_update': 0.053816601031088415, 'temp_loss': 4.940733624331524, 'temp': 0.9789045330761486, 'alpha_loss': -19.006956491856215, 'alpha': 1.0925245901752758, 'critic_loss': 40.72624483273898, 'actor_loss': -1.7247535055772418, 'time_step': 0.05419413547295367, 'td_error': 1.2004645896724246, 'init_value': -0.014532879926264286, 'ave_value': 0.3918840426497688} step=1038
2022-04-22 02:04.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:04.30 [info     ] CQL_20220422020312: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.0003054658801569415, 'time_algorithm_update': 0.0549791803249734, 'temp_loss': 4.900569455472031, 'temp': 0.9707922845906605, 'alpha_loss': -19.70882556893233, 'alpha': 1.132707661286944, 'critic_loss': 53.27252822390871, 'actor_loss': -1.3286293193784062, 'time_step': 0.05537076630344281, 'td_error': 1.204936665280356, 'init_value': -0.5077903866767883, 'ave_value': 0.004742139527700477} step=1384
2022-04-22 02:04.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:04.51 [info     ] CQL_20220422020312: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.00029927526595275526, 'time_algorithm_update': 0.056751930644746464, 'temp_loss': 4.860051815220387, 'temp': 0.9627812399685038, 'alpha_loss': -20.43941642232024, 'alpha': 1.1748227955978041, 'critic_loss': 67.65488685211005, 'actor_loss': -0.8839964706859836, 'time_step': 0.05713708001065117, 'td_error': 1.2064047140934344, 'init_value': -0.9883252382278442, 'ave_value': -0.39538854070894525} step=1730
2022-04-22 02:04.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:05.12 [info     ] CQL_20220422020312: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.00029043586267901294, 'time_algorithm_update': 0.05676914570648546, 'temp_loss': 4.819537461837593, 'temp': 0.9548618697017603, 'alpha_loss': -21.20755858228386, 'alpha': 1.2189037090092036, 'critic_loss': 83.73340536128579, 'actor_loss': -0.46667819434636937, 'time_step': 0.05714528477949903, 'td_error': 1.2094029229619256, 'init_value': -1.2128394842147827, 'ave_value': -0.6224380376385309} step=2076
2022-04-22 02:05.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:05.32 [info     ] CQL_20220422020312: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.0002966926277028343, 'time_algorithm_update': 0.056606698587450675, 'temp_loss': 4.781519047786735, 'temp': 0.9470242198147526, 'alpha_loss': -22.014785700450744, 'alpha': 1.2649988252992574, 'critic_loss': 101.85198196234731, 'actor_loss': -0.11125830194853634, 'time_step': 0.05699005016701759, 'td_error': 1.2070963565410067, 'init_value': -1.2914732694625854, 'ave_value': -0.7190725291888823} step=2422
2022-04-22 02:05.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:05.53 [info     ] CQL_20220422020312: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.00029993195065184134, 'time_algorithm_update': 0.057142817905183475, 'temp_loss': 4.74279151762152, 'temp': 0.9392652279035204, 'alpha_loss': -22.84451759206077, 'alpha': 1.3131339966906288, 'critic_loss': 123.92848831794166, 'actor_loss': 0.005517443189817357, 'time_step': 0.05752860672901131, 'td_error': 1.2114441341625795, 'init_value': -1.3160220384597778, 'ave_value': -0.7775542027538226} step=2768
2022-04-22 02:05.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:06.13 [info     ] CQL_20220422020312: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.0003048415817966351, 'time_algorithm_update': 0.054228539411732224, 'temp_loss': 4.704557919088816, 'temp': 0.9315797057799521, 'alpha_loss': -23.71824512591941, 'alpha': 1.3633463692802914, 'critic_loss': 155.4634770299658, 'actor_loss': -0.15830439667222818, 'time_step': 0.054622575726812284, 'td_error': 1.207684450976145, 'init_value': -1.1582735776901245, 'ave_value': -0.6948472547327641} step=3114
2022-04-22 02:06.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:06.32 [info     ] CQL_20220422020312: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.00029278972934436247, 'time_algorithm_update': 0.05288210631795012, 'temp_loss': 4.666286658689466, 'temp': 0.9239672536105779, 'alpha_loss': -24.625674065826946, 'alpha': 1.4156903362687612, 'critic_loss': 200.254821071735, 'actor_loss': -0.7079635406332898, 'time_step': 0.05326153983959573, 'td_error': 1.2121018117666733, 'init_value': -0.45307233929634094, 'ave_value': -0.15681815849711625} step=3460
2022-04-22 02:06.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:06.51 [info     ] CQL_20220422020312: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.000297818569778707, 'time_algorithm_update': 0.052663743151405644, 'temp_loss': 4.627861293065066, 'temp': 0.9164227796772312, 'alpha_loss': -25.569651239869224, 'alpha': 1.4702107899450843, 'critic_loss': 259.4999566050623, 'actor_loss': -1.4871600189305454, 'time_step': 0.05304589299108252, 'td_error': 1.2213223704190894, 'init_value': 0.016078487038612366, 'ave_value': 0.19909752249830762} step=3806
2022-04-22 02:06.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:07.11 [info     ] CQL_20220422020312: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.0002959808173207189, 'time_algorithm_update': 0.05294161931627748, 'temp_loss': 4.5907720857962016, 'temp': 0.9089467577162506, 'alpha_loss': -26.55513130584893, 'alpha': 1.526976774193648, 'critic_loss': 323.3897805627371, 'actor_loss': -2.141546471615058, 'time_step': 0.05331981319912596, 'td_error': 1.2275518792479738, 'init_value': 0.6540365815162659, 'ave_value': 0.7637618677940567} step=4152
2022-04-22 02:07.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:07.30 [info     ] CQL_20220422020312: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.0003022796156778501, 'time_algorithm_update': 0.052979807633196, 'temp_loss': 4.553159207966975, 'temp': 0.9015352505479934, 'alpha_loss': -27.57976583249307, 'alpha': 1.586046997522343, 'critic_loss': 384.0446294001761, 'actor_loss': -2.745001230625748, 'time_step': 0.0533690369887159, 'td_error': 1.2309118429903974, 'init_value': 1.205632209777832, 'ave_value': 1.261999426898712} step=4498
2022-04-22 02:07.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:07.49 [info     ] CQL_20220422020312: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.0003002737298866228, 'time_algorithm_update': 0.052817489370445295, 'temp_loss': 4.515629032443713, 'temp': 0.8941878821119408, 'alpha_loss': -28.64821771941433, 'alpha': 1.6474961917524393, 'critic_loss': 444.82769616628656, 'actor_loss': -3.297483674363594, 'time_step': 0.053202594635803575, 'td_error': 1.233129716883595, 'init_value': 1.7962318658828735, 'ave_value': 1.8272559853230907} step=4844
2022-04-22 02:07.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:08.08 [info     ] CQL_20220422020312: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.0002933285828959735, 'time_algorithm_update': 0.05264147099731974, 'temp_loss': 4.480445186526789, 'temp': 0.8869023522889683, 'alpha_loss': -29.760500533043306, 'alpha': 1.7114039666390832, 'critic_loss': 512.662461826567, 'actor_loss': -3.8393767803390593, 'time_step': 0.05302157980858246, 'td_error': 1.2334769388911042, 'init_value': 2.30257248878479, 'ave_value': 2.32407748684492} step=5190
2022-04-22 02:08.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:08.27 [info     ] CQL_20220422020312: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.00031258053862290574, 'time_algorithm_update': 0.05280605423657191, 'temp_loss': 4.442488369914148, 'temp': 0.8796777583960164, 'alpha_loss': -30.916501651609565, 'alpha': 1.777849144673761, 'critic_loss': 590.7671729291794, 'actor_loss': -4.297620607938381, 'time_step': 0.053202740718863606, 'td_error': 1.2344512209451677, 'init_value': 2.778768539428711, 'ave_value': 2.7939088925302396} step=5536
2022-04-22 02:08.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:08.46 [info     ] CQL_20220422020312: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.0003055086025612892, 'time_algorithm_update': 0.05204626245994788, 'temp_loss': 4.407285013639858, 'temp': 0.872514322486227, 'alpha_loss': -32.11697173807662, 'alpha': 1.8469262519323757, 'critic_loss': 683.4145673630554, 'actor_loss': -4.745435986215669, 'time_step': 0.05243860779470102, 'td_error': 1.236204473264029, 'init_value': 3.2645514011383057, 'ave_value': 3.274957881340092} step=5882
2022-04-22 02:08.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:09.05 [info     ] CQL_20220422020312: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.0002907569697826584, 'time_algorithm_update': 0.05151957652472347, 'temp_loss': 4.370767184075593, 'temp': 0.8654099718339181, 'alpha_loss': -33.364892220910576, 'alpha': 1.9187116981241745, 'critic_loss': 784.9689847913091, 'actor_loss': -5.172225431210733, 'time_step': 0.051893396873694625, 'td_error': 1.235848343967024, 'init_value': 3.6235692501068115, 'ave_value': 3.640131409883838} step=6228
2022-04-22 02:09.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:09.24 [info     ] CQL_20220422020312: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.000298070080707528, 'time_algorithm_update': 0.05139210458435764, 'temp_loss': 4.334522838537404, 'temp': 0.8583652904267945, 'alpha_loss': -34.66330964854687, 'alpha': 1.993322289058928, 'critic_loss': 901.0832523059294, 'actor_loss': -5.579928472551996, 'time_step': 0.051776998993978335, 'td_error': 1.2378871511179634, 'init_value': 4.058877468109131, 'ave_value': 4.0672892005706975} step=6574
2022-04-22 02:09.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:09.42 [info     ] CQL_20220422020312: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.00029699306267534376, 'time_algorithm_update': 0.05090068254856705, 'temp_loss': 4.2995341402946865, 'temp': 0.8513785107976439, 'alpha_loss': -36.01146148946244, 'alpha': 2.0708517222046163, 'critic_loss': 1038.5220892580946, 'actor_loss': -5.871635846319915, 'time_step': 0.051278841288792607, 'td_error': 1.2400503907761926, 'init_value': 4.327393054962158, 'ave_value': 4.331046604352745} step=6920
2022-04-22 02:09.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:10.01 [info     ] CQL_20220422020312: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.00029219368289660856, 'time_algorithm_update': 0.05123950222324085, 'temp_loss': 4.265585279189093, 'temp': 0.8444472978569869, 'alpha_loss': -37.41260991620191, 'alpha': 2.1514168750343985, 'critic_loss': 1187.8803608624232, 'actor_loss': -6.088098663815184, 'time_step': 0.05161072821975443, 'td_error': 1.238820169984835, 'init_value': 4.469081878662109, 'ave_value': 4.478743189892649} step=7266
2022-04-22 02:10.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:10.19 [info     ] CQL_20220422020312: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.0002908334566678615, 'time_algorithm_update': 0.051100097639712294, 'temp_loss': 4.229635208328335, 'temp': 0.8375744049604228, 'alpha_loss': -38.86204381071763, 'alpha': 2.235121728367888, 'critic_loss': 1356.194581136538, 'actor_loss': -6.2157416674443065, 'time_step': 0.05146514611437142, 'td_error': 1.2401688494539942, 'init_value': 4.580874443054199, 'ave_value': 4.586448487810412} step=7612
2022-04-22 02:10.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:10.38 [info     ] CQL_20220422020312: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.000295744465954731, 'time_algorithm_update': 0.05184352466825805, 'temp_loss': 4.195743023315606, 'temp': 0.830757263078855, 'alpha_loss': -40.38095236100214, 'alpha': 2.3220941758569267, 'critic_loss': 1541.5256425273212, 'actor_loss': -6.26221008521284, 'time_step': 0.052217037691546314, 'td_error': 1.2416505177131631, 'init_value': 4.668823719024658, 'ave_value': 4.672732351356459} step=7958
2022-04-22 02:10.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:10.58 [info     ] CQL_20220422020312: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.0002990588976468654, 'time_algorithm_update': 0.05336035400456776, 'temp_loss': 4.1619482412503634, 'temp': 0.8239951840025841, 'alpha_loss': -41.947055276418695, 'alpha': 2.4124566640468004, 'critic_loss': 1749.8613288306087, 'actor_loss': -6.24084608127616, 'time_step': 0.053738418342061126, 'td_error': 1.2400981833534575, 'init_value': 4.5426740646362305, 'ave_value': 4.549884958185817} step=8304
2022-04-22 02:10.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:11.17 [info     ] CQL_20220422020312: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.00029760978125423367, 'time_algorithm_update': 0.051963409247425936, 'temp_loss': 4.127030488383563, 'temp': 0.8172892204598884, 'alpha_loss': -43.58254535211993, 'alpha': 2.5063477795937157, 'critic_loss': 1981.9674467406521, 'actor_loss': -6.131995515327233, 'time_step': 0.05234078727016559, 'td_error': 1.2398352913493842, 'init_value': 4.417873382568359, 'ave_value': 4.423051424985138} step=8650
2022-04-22 02:11.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:11.35 [info     ] CQL_20220422020312: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.0002974995298881751, 'time_algorithm_update': 0.05212382774132525, 'temp_loss': 4.094346932593108, 'temp': 0.8106379138596486, 'alpha_loss': -45.276093025428025, 'alpha': 2.603893752732029, 'critic_loss': 2228.810191248194, 'actor_loss': -5.9983250102555825, 'time_step': 0.05250128431816321, 'td_error': 1.2389913814535043, 'init_value': 4.284579277038574, 'ave_value': 4.293947853641863} step=8996
2022-04-22 02:11.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:11.54 [info     ] CQL_20220422020312: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.000295020252293934, 'time_algorithm_update': 0.05221545627351441, 'temp_loss': 4.061243717381031, 'temp': 0.8040400981214005, 'alpha_loss': -47.046124937906434, 'alpha': 2.7052478859190305, 'critic_loss': 2500.0083579355583, 'actor_loss': -5.854162254774502, 'time_step': 0.05259249182794824, 'td_error': 1.2394623427586338, 'init_value': 4.1366047859191895, 'ave_value': 4.141816228303145} step=9342
2022-04-22 02:11.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:12.14 [info     ] CQL_20220422020312: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.00031429908179134305, 'time_algorithm_update': 0.052255924037426196, 'temp_loss': 4.027133865852576, 'temp': 0.7974963174389966, 'alpha_loss': -48.86997902048805, 'alpha': 2.810557891178682, 'critic_loss': 2795.607788791546, 'actor_loss': -5.665426919915084, 'time_step': 0.05264941460824426, 'td_error': 1.238624107049393, 'init_value': 3.951730489730835, 'ave_value': 3.958132513126755} step=9688
2022-04-22 02:12.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:12.32 [info     ] CQL_20220422020312: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.0002939894020212868, 'time_algorithm_update': 0.05215184468065383, 'temp_loss': 3.9949257904394515, 'temp': 0.791006103244131, 'alpha_loss': -50.77738969174424, 'alpha': 2.9199642838770257, 'critic_loss': 3110.6809702966943, 'actor_loss': -5.474280565460293, 'time_step': 0.05252664351049875, 'td_error': 1.2388940356858225, 'init_value': 3.7683441638946533, 'ave_value': 3.773136033871661} step=10034
2022-04-22 02:12.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:12.51 [info     ] CQL_20220422020312: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.0002952386878129375, 'time_algorithm_update': 0.05213849737465037, 'temp_loss': 3.961967258784123, 'temp': 0.7845687275332522, 'alpha_loss': -52.750573373254326, 'alpha': 3.0336384580314504, 'critic_loss': 3459.2697521055366, 'actor_loss': -5.204301922307538, 'time_step': 0.05251574723017698, 'td_error': 1.2390788202280572, 'init_value': 3.5735044479370117, 'ave_value': 3.5749510281229537} step=10380
2022-04-22 02:12.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:13.10 [info     ] CQL_20220422020312: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.00029676566923284806, 'time_algorithm_update': 0.052182881129270345, 'temp_loss': 3.929626504120799, 'temp': 0.7781842002978904, 'alpha_loss': -54.8020887540255, 'alpha': 3.15172978012548, 'critic_loss': 3821.4934780583903, 'actor_loss': -4.965991134588429, 'time_step': 0.05256517705200724, 'td_error': 1.23809550833217, 'init_value': 3.2685606479644775, 'ave_value': 3.2732887526268413} step=10726
2022-04-22 02:13.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:13.29 [info     ] CQL_20220422020312: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.00030037433425815117, 'time_algorithm_update': 0.052236290336344283, 'temp_loss': 3.896364945207717, 'temp': 0.771852219552663, 'alpha_loss': -56.94287317750082, 'alpha': 3.274419813486882, 'critic_loss': 4203.693834955293, 'actor_loss': -4.682451365311022, 'time_step': 0.05262638171973256, 'td_error': 1.2381460209599129, 'init_value': 2.9815943241119385, 'ave_value': 2.9841462547262454} step=11072
2022-04-22 02:13.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:13.49 [info     ] CQL_20220422020312: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.0003071451462762204, 'time_algorithm_update': 0.052998764666518726, 'temp_loss': 3.865717814147817, 'temp': 0.7655719837356854, 'alpha_loss': -59.15157952060589, 'alpha': 3.401888839082222, 'critic_loss': 4620.109424392612, 'actor_loss': -4.362294117150279, 'time_step': 0.05338892082258456, 'td_error': 1.2384533862644698, 'init_value': 2.7003731727600098, 'ave_value': 2.700922598009033} step=11418
2022-04-22 02:13.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:14.08 [info     ] CQL_20220422020312: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.0003042710309772822, 'time_algorithm_update': 0.053273526230299406, 'temp_loss': 3.836096353613572, 'temp': 0.7593398469720962, 'alpha_loss': -61.45746948815495, 'alpha': 3.5343165638818905, 'critic_loss': 4962.515269373193, 'actor_loss': -4.079502217342399, 'time_step': 0.05365760684702438, 'td_error': 1.2405933700445613, 'init_value': 2.671804189682007, 'ave_value': 2.66456324820159} step=11764
2022-04-22 02:14.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:14.28 [info     ] CQL_20220422020312: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.000299790002018041, 'time_algorithm_update': 0.053493357807225576, 'temp_loss': 3.804491850682077, 'temp': 0.7531583062830688, 'alpha_loss': -63.84866038636665, 'alpha': 3.6718980225524462, 'critic_loss': 4617.359617729408, 'actor_loss': -4.058636985762271, 'time_step': 0.053874208747996075, 'td_error': 1.2391433239500005, 'init_value': 2.5084335803985596, 'ave_value': 2.507244791742635} step=12110
2022-04-22 02:14.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:14.47 [info     ] CQL_20220422020312: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.0003024642867159981, 'time_algorithm_update': 0.053254051015556206, 'temp_loss': 3.7724264281333526, 'temp': 0.7470285136231108, 'alpha_loss': -66.33628042584898, 'alpha': 3.8148326515462356, 'critic_loss': 4042.519347791727, 'actor_loss': -4.1648554712361685, 'time_step': 0.05364159214703334, 'td_error': 1.2400571489140682, 'init_value': 2.7811131477355957, 'ave_value': 2.777031511828593} step=12456
2022-04-22 02:14.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:15.06 [info     ] CQL_20220422020312: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.00029827680201888776, 'time_algorithm_update': 0.05362381135797225, 'temp_loss': 3.742290742135461, 'temp': 0.7409494036883977, 'alpha_loss': -68.9237640028055, 'alpha': 3.9633497826625845, 'critic_loss': 3392.7183308684066, 'actor_loss': -4.3187063002172925, 'time_step': 0.05400533069764948, 'td_error': 1.239227900278883, 'init_value': 2.8522510528564453, 'ave_value': 2.8514167130417367} step=12802
2022-04-22 02:15.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:15.26 [info     ] CQL_20220422020312: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.00029254028562865505, 'time_algorithm_update': 0.05234535718928872, 'temp_loss': 3.7113599336216216, 'temp': 0.734918685662264, 'alpha_loss': -71.59212394670254, 'alpha': 4.117633559111225, 'critic_loss': 2979.529823259122, 'actor_loss': -4.40496040079635, 'time_step': 0.052717131686348444, 'td_error': 1.2405548241857798, 'init_value': 3.0537331104278564, 'ave_value': 3.048168122321862} step=13148
2022-04-22 02:15.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:15.45 [info     ] CQL_20220422020312: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.0002978364856256915, 'time_algorithm_update': 0.05343635440561813, 'temp_loss': 3.6819538877189504, 'temp': 0.728937561973671, 'alpha_loss': -74.39333932248154, 'alpha': 4.277933699547211, 'critic_loss': 2595.354941660269, 'actor_loss': -4.552942211228299, 'time_step': 0.05381635434365686, 'td_error': 1.2393435704291087, 'init_value': 3.1256911754608154, 'ave_value': 3.126219358258815} step=13494
2022-04-22 02:15.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:16.05 [info     ] CQL_20220422020312: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.00030326912168822536, 'time_algorithm_update': 0.055638815626243635, 'temp_loss': 3.6514272593349393, 'temp': 0.7230053871353238, 'alpha_loss': -77.29746951946633, 'alpha': 4.4444792091501935, 'critic_loss': 2512.972697175307, 'actor_loss': -4.524241583884796, 'time_step': 0.05602426335990773, 'td_error': 1.2383656665049603, 'init_value': 3.0678999423980713, 'ave_value': 3.0693053837916815} step=13840
2022-04-22 02:16.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:16.26 [info     ] CQL_20220422020312: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.0002996707927284902, 'time_algorithm_update': 0.05658235853118015, 'temp_loss': 3.6209852626557986, 'temp': 0.7171218007630695, 'alpha_loss': -80.29522740358563, 'alpha': 4.617506301471953, 'critic_loss': 2383.1332196362446, 'actor_loss': -4.613105795976055, 'time_step': 0.056967392822221526, 'td_error': 1.2394619211427014, 'init_value': 3.234393358230591, 'ave_value': 3.2331456429475627} step=14186
2022-04-22 02:16.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:16.47 [info     ] CQL_20220422020312: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.0003033042643111565, 'time_algorithm_update': 0.05707114900467713, 'temp_loss': 3.591969313649084, 'temp': 0.7112861913063623, 'alpha_loss': -83.41626179425013, 'alpha': 4.797254492092684, 'critic_loss': 2401.1678170441205, 'actor_loss': -4.579070419245372, 'time_step': 0.05746007585801141, 'td_error': 1.2384877217121346, 'init_value': 3.1261420249938965, 'ave_value': 3.128491409065713} step=14532
2022-04-22 02:16.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:17.07 [info     ] CQL_20220422020312: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.0002985276238766709, 'time_algorithm_update': 0.05679259892833026, 'temp_loss': 3.5630790477543206, 'temp': 0.7054977956190275, 'alpha_loss': -86.66679263252743, 'alpha': 4.9839998766176965, 'critic_loss': 2469.8496100806087, 'actor_loss': -4.609576733815188, 'time_step': 0.05717453446691436, 'td_error': 1.2384918134235476, 'init_value': 3.2393789291381836, 'ave_value': 3.2414319253408723} step=14878
2022-04-22 02:17.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:17.28 [info     ] CQL_20220422020312: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.0002986482113082974, 'time_algorithm_update': 0.056731252311971146, 'temp_loss': 3.5338656943657494, 'temp': 0.6997560441838524, 'alpha_loss': -90.0387527598122, 'alpha': 5.178014301840281, 'critic_loss': 2367.2085137339686, 'actor_loss': -4.682608532767764, 'time_step': 0.057112684139626564, 'td_error': 1.2394704474812832, 'init_value': 3.3376712799072266, 'ave_value': 3.3357647638515364} step=15224
2022-04-22 02:17.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:17.49 [info     ] CQL_20220422020312: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.0003041187462779139, 'time_algorithm_update': 0.056849831790593316, 'temp_loss': 3.5060312782408873, 'temp': 0.6940608317452359, 'alpha_loss': -93.54813356344411, 'alpha': 5.379593685183222, 'critic_loss': 2312.5569440367594, 'actor_loss': -4.778352926232222, 'time_step': 0.057235691588738064, 'td_error': 1.2385780882081137, 'init_value': 3.4149398803710938, 'ave_value': 3.418164191668833} step=15570
2022-04-22 02:17.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:18.09 [info     ] CQL_20220422020312: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.00030786522551079016, 'time_algorithm_update': 0.056906653966517805, 'temp_loss': 3.476510053425166, 'temp': 0.6884119309097356, 'alpha_loss': -97.18208507030685, 'alpha': 5.5890132471316125, 'critic_loss': 2175.3996444437544, 'actor_loss': -4.928051320114577, 'time_step': 0.05729894279744584, 'td_error': 1.239180275668739, 'init_value': 3.5967183113098145, 'ave_value': 3.5990410817183833} step=15916
2022-04-22 02:18.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:18.30 [info     ] CQL_20220422020312: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.0003062452195007677, 'time_algorithm_update': 0.05664272184316822, 'temp_loss': 3.448178389168888, 'temp': 0.6828100083535806, 'alpha_loss': -100.96158641749035, 'alpha': 5.806590162949755, 'critic_loss': 2017.1946157113666, 'actor_loss': -5.085466977488788, 'time_step': 0.05703065919049213, 'td_error': 1.2393836455902039, 'init_value': 3.771693229675293, 'ave_value': 3.7735757399306245} step=16262
2022-04-22 02:18.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:18.50 [info     ] CQL_20220422020312: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.00029871091677274316, 'time_algorithm_update': 0.05678096258571382, 'temp_loss': 3.4212642908096313, 'temp': 0.677252480060379, 'alpha_loss': -104.90292135690677, 'alpha': 6.032635309792667, 'critic_loss': 2045.8126277151825, 'actor_loss': -5.127259718889446, 'time_step': 0.05715892080626736, 'td_error': 1.2403603620334849, 'init_value': 3.8196728229522705, 'ave_value': 3.816327256351469} step=16608
2022-04-22 02:18.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:19.11 [info     ] CQL_20220422020312: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.00030539214955588984, 'time_algorithm_update': 0.0565281575814837, 'temp_loss': 3.39238768299191, 'temp': 0.6717406399332719, 'alpha_loss': -108.98155205787262, 'alpha': 6.267491904297316, 'critic_loss': 2116.474166605514, 'actor_loss': -5.2094365186084906, 'time_step': 0.05691910272388789, 'td_error': 1.2404543157535781, 'init_value': 3.9971225261688232, 'ave_value': 3.9966062988246884} step=16954
2022-04-22 02:19.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:19.31 [info     ] CQL_20220422020312: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.00029778687251096514, 'time_algorithm_update': 0.05537682117065253, 'temp_loss': 3.364924009824764, 'temp': 0.6662739100139266, 'alpha_loss': -113.22065254167325, 'alpha': 6.511482278735651, 'critic_loss': 2026.7882344681404, 'actor_loss': -5.386559573212111, 'time_step': 0.0557551893884736, 'td_error': 1.239918078211816, 'init_value': 4.1179518699646, 'ave_value': 4.118692420319168} step=17300
2022-04-22 02:19.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422020312/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.51910049e

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 02:19.33 [info     ] FQE_20220422021932: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00014626692576580737, 'time_algorithm_update': 0.009173228080014148, 'loss': 0.003494078649000366, 'time_step': 0.009383566408272249, 'init_value': -0.40494489669799805, 'ave_value': -0.3243353928632296, 'soft_opc': nan} step=166




2022-04-22 02:19.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:19.35 [info     ] FQE_20220422021932: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00014860084257930158, 'time_algorithm_update': 0.009014243102935424, 'loss': 0.002398460852374962, 'time_step': 0.00921957176851939, 'init_value': -0.5085505247116089, 'ave_value': -0.3822228496407603, 'soft_opc': nan} step=332




2022-04-22 02:19.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:19.37 [info     ] FQE_20220422021932: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00014484359557370106, 'time_algorithm_update': 0.009356087948902544, 'loss': 0.0021420659669899346, 'time_step': 0.009564593613865864, 'init_value': -0.5201677083969116, 'ave_value': -0.3761074152615693, 'soft_opc': nan} step=498




2022-04-22 02:19.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:19.38 [info     ] FQE_20220422021932: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00014475742018366433, 'time_algorithm_update': 0.009373008486736252, 'loss': 0.0020871035563927815, 'time_step': 0.009579578077936747, 'init_value': -0.5812664031982422, 'ave_value': -0.4154642026099536, 'soft_opc': nan} step=664




2022-04-22 02:19.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:19.40 [info     ] FQE_20220422021932: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00014733406434576195, 'time_algorithm_update': 0.009409535362059811, 'loss': 0.0019554200624985374, 'time_step': 0.00961580764816468, 'init_value': -0.655123233795166, 'ave_value': -0.45850383873257017, 'soft_opc': nan} step=830




2022-04-22 02:19.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:19.42 [info     ] FQE_20220422021932: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00014629995966532143, 'time_algorithm_update': 0.008769660110933235, 'loss': 0.0018561755160447658, 'time_step': 0.008978233279952084, 'init_value': -0.6811200976371765, 'ave_value': -0.46765128346243956, 'soft_opc': nan} step=996




2022-04-22 02:19.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:19.43 [info     ] FQE_20220422021932: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00014631432223032755, 'time_algorithm_update': 0.0093174885554486, 'loss': 0.001804439395495006, 'time_step': 0.009528483252927482, 'init_value': -0.7211140394210815, 'ave_value': -0.4919313599934449, 'soft_opc': nan} step=1162




2022-04-22 02:19.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:19.45 [info     ] FQE_20220422021932: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001479459096150226, 'time_algorithm_update': 0.008787650659859899, 'loss': 0.0018533165331688676, 'time_step': 0.009000885917479733, 'init_value': -0.7955150008201599, 'ave_value': -0.5392531756613705, 'soft_opc': nan} step=1328




2022-04-22 02:19.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:19.47 [info     ] FQE_20220422021932: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00014625399945730186, 'time_algorithm_update': 0.009442250412630748, 'loss': 0.0016892231490283486, 'time_step': 0.009654552103525185, 'init_value': -0.8290038704872131, 'ave_value': -0.5600107392413659, 'soft_opc': nan} step=1494




2022-04-22 02:19.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:19.48 [info     ] FQE_20220422021932: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00014601845339120152, 'time_algorithm_update': 0.00876554092728948, 'loss': 0.0017907642745356783, 'time_step': 0.0089731058442449, 'init_value': -0.8975156545639038, 'ave_value': -0.6087753549486667, 'soft_opc': nan} step=1660




2022-04-22 02:19.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:19.50 [info     ] FQE_20220422021932: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.0001501462545739599, 'time_algorithm_update': 0.009387710008276514, 'loss': 0.0017442016309009393, 'time_step': 0.009602723351444107, 'init_value': -0.935916006565094, 'ave_value': -0.6187117757359604, 'soft_opc': nan} step=1826




2022-04-22 02:19.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:19.52 [info     ] FQE_20220422021932: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00014939940119364174, 'time_algorithm_update': 0.009344610823206154, 'loss': 0.001728424626046298, 'time_step': 0.009557808738156974, 'init_value': -0.9888560771942139, 'ave_value': -0.6571733203557161, 'soft_opc': nan} step=1992




2022-04-22 02:19.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:19.54 [info     ] FQE_20220422021932: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00014497573117175735, 'time_algorithm_update': 0.00936997654926346, 'loss': 0.0017731997672908839, 'time_step': 0.00958235436175243, 'init_value': -1.0273919105529785, 'ave_value': -0.6782293880274436, 'soft_opc': nan} step=2158




2022-04-22 02:19.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:19.55 [info     ] FQE_20220422021932: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00014467124479362764, 'time_algorithm_update': 0.009019439478954637, 'loss': 0.0017764877241472322, 'time_step': 0.009224716439304581, 'init_value': -1.1020792722702026, 'ave_value': -0.7348811449432695, 'soft_opc': nan} step=2324




2022-04-22 02:19.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:19.57 [info     ] FQE_20220422021932: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00014940083745014235, 'time_algorithm_update': 0.009209917252322277, 'loss': 0.0018235805392874047, 'time_step': 0.009424972246928388, 'init_value': -1.126720905303955, 'ave_value': -0.7515954772262154, 'soft_opc': nan} step=2490




2022-04-22 02:19.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:19.59 [info     ] FQE_20220422021932: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00014738720583628458, 'time_algorithm_update': 0.008916006030806577, 'loss': 0.001986489474846342, 'time_step': 0.009128412568425557, 'init_value': -1.2061264514923096, 'ave_value': -0.8140143160414588, 'soft_opc': nan} step=2656




2022-04-22 02:19.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.00 [info     ] FQE_20220422021932: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001484442906207349, 'time_algorithm_update': 0.009419041943837362, 'loss': 0.00203554687375646, 'time_step': 0.009636468197925982, 'init_value': -1.2684407234191895, 'ave_value': -0.8481506674407838, 'soft_opc': nan} step=2822




2022-04-22 02:20.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.02 [info     ] FQE_20220422021932: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00014697069145110716, 'time_algorithm_update': 0.009313920894301081, 'loss': 0.0020951436468223334, 'time_step': 0.009526001401694423, 'init_value': -1.2894103527069092, 'ave_value': -0.8742416986507607, 'soft_opc': nan} step=2988




2022-04-22 02:20.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.04 [info     ] FQE_20220422021932: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00015894045312720608, 'time_algorithm_update': 0.00898639552564506, 'loss': 0.002229171076786693, 'time_step': 0.00921064974313759, 'init_value': -1.3832741975784302, 'ave_value': -0.9349644722131727, 'soft_opc': nan} step=3154




2022-04-22 02:20.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.05 [info     ] FQE_20220422021932: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00014787266053349138, 'time_algorithm_update': 0.009412945034992263, 'loss': 0.0024015545748252735, 'time_step': 0.009634464620107627, 'init_value': -1.4165430068969727, 'ave_value': -0.9524273114127887, 'soft_opc': nan} step=3320




2022-04-22 02:20.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.07 [info     ] FQE_20220422021932: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.0001466216811214585, 'time_algorithm_update': 0.009392992559685764, 'loss': 0.0024871977255819925, 'time_step': 0.009609746645732099, 'init_value': -1.463768482208252, 'ave_value': -0.9794492988201143, 'soft_opc': nan} step=3486




2022-04-22 02:20.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.09 [info     ] FQE_20220422021932: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00014656710337443524, 'time_algorithm_update': 0.009453469012157026, 'loss': 0.0025317230668965257, 'time_step': 0.00966533982610128, 'init_value': -1.528541088104248, 'ave_value': -1.0293748415167536, 'soft_opc': nan} step=3652




2022-04-22 02:20.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.10 [info     ] FQE_20220422021932: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00014843280056873, 'time_algorithm_update': 0.00875936646059335, 'loss': 0.0026307565327355056, 'time_step': 0.00897209615592497, 'init_value': -1.5663340091705322, 'ave_value': -1.0682811109500157, 'soft_opc': nan} step=3818




2022-04-22 02:20.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.12 [info     ] FQE_20220422021932: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00014531612396240234, 'time_algorithm_update': 0.008891346942947572, 'loss': 0.002871032800568907, 'time_step': 0.009100638240216726, 'init_value': -1.5734537839889526, 'ave_value': -1.058880441286811, 'soft_opc': nan} step=3984




2022-04-22 02:20.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.14 [info     ] FQE_20220422021932: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00015151069824954113, 'time_algorithm_update': 0.009373674909752536, 'loss': 0.003121984010830196, 'time_step': 0.009590131690703243, 'init_value': -1.6760761737823486, 'ave_value': -1.1329059171347737, 'soft_opc': nan} step=4150




2022-04-22 02:20.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.16 [info     ] FQE_20220422021932: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00014863674899181687, 'time_algorithm_update': 0.009363072464265019, 'loss': 0.0030646268089010143, 'time_step': 0.00957784595259701, 'init_value': -1.7611112594604492, 'ave_value': -1.180740260678503, 'soft_opc': nan} step=4316




2022-04-22 02:20.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.17 [info     ] FQE_20220422021932: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.0001449484422982457, 'time_algorithm_update': 0.00864877758255924, 'loss': 0.003379771110730762, 'time_step': 0.008859700467213091, 'init_value': -1.8211793899536133, 'ave_value': -1.2234514671067396, 'soft_opc': nan} step=4482




2022-04-22 02:20.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.19 [info     ] FQE_20220422021932: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001492457217480763, 'time_algorithm_update': 0.009420960782522178, 'loss': 0.003635063401882043, 'time_step': 0.00963524594364396, 'init_value': -1.871221661567688, 'ave_value': -1.2416984791607335, 'soft_opc': nan} step=4648




2022-04-22 02:20.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.21 [info     ] FQE_20220422021932: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00014807373644357705, 'time_algorithm_update': 0.009326098913169769, 'loss': 0.003869398744602923, 'time_step': 0.009539619985833225, 'init_value': -1.9715359210968018, 'ave_value': -1.3244144739935526, 'soft_opc': nan} step=4814




2022-04-22 02:20.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.22 [info     ] FQE_20220422021932: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.0001508141138467444, 'time_algorithm_update': 0.009428565760692918, 'loss': 0.004133154755403535, 'time_step': 0.009644028652145201, 'init_value': -2.0213401317596436, 'ave_value': -1.3452004655692222, 'soft_opc': nan} step=4980




2022-04-22 02:20.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.24 [info     ] FQE_20220422021932: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00014744321983980844, 'time_algorithm_update': 0.00898754596710205, 'loss': 0.004427747361934239, 'time_step': 0.009208661964140743, 'init_value': -2.089995861053467, 'ave_value': -1.3856226877384894, 'soft_opc': nan} step=5146




2022-04-22 02:20.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.26 [info     ] FQE_20220422021932: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.0001445075115525579, 'time_algorithm_update': 0.00872847114700869, 'loss': 0.0043639292695394055, 'time_step': 0.008941524000052947, 'init_value': -2.15028715133667, 'ave_value': -1.4316852228934163, 'soft_opc': nan} step=5312




2022-04-22 02:20.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.27 [info     ] FQE_20220422021932: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00014500014753226773, 'time_algorithm_update': 0.009336327931967127, 'loss': 0.00489427368749208, 'time_step': 0.009547578283103115, 'init_value': -2.1318984031677246, 'ave_value': -1.422449643792467, 'soft_opc': nan} step=5478




2022-04-22 02:20.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.29 [info     ] FQE_20220422021932: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.0001495875507952219, 'time_algorithm_update': 0.009395178542079696, 'loss': 0.005228441932339625, 'time_step': 0.009616808718945607, 'init_value': -2.244837522506714, 'ave_value': -1.5138707419941286, 'soft_opc': nan} step=5644




2022-04-22 02:20.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.31 [info     ] FQE_20220422021932: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00014541809817394578, 'time_algorithm_update': 0.009209201996584973, 'loss': 0.00544587747212808, 'time_step': 0.00941925020102995, 'init_value': -2.346776008605957, 'ave_value': -1.5668425806705748, 'soft_opc': nan} step=5810




2022-04-22 02:20.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.32 [info     ] FQE_20220422021932: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00014890963772693313, 'time_algorithm_update': 0.008911780564181775, 'loss': 0.005703187303397389, 'time_step': 0.009125168064990676, 'init_value': -2.3734383583068848, 'ave_value': -1.59556840021868, 'soft_opc': nan} step=5976




2022-04-22 02:20.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.34 [info     ] FQE_20220422021932: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00014831502753567984, 'time_algorithm_update': 0.009418896881930799, 'loss': 0.005859074269243269, 'time_step': 0.009635353662881506, 'init_value': -2.414731740951538, 'ave_value': -1.6307030302581487, 'soft_opc': nan} step=6142




2022-04-22 02:20.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.36 [info     ] FQE_20220422021932: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00015289381325963032, 'time_algorithm_update': 0.00941962650023311, 'loss': 0.006036143670705742, 'time_step': 0.009637161909815777, 'init_value': -2.531050205230713, 'ave_value': -1.7075060934499577, 'soft_opc': nan} step=6308




2022-04-22 02:20.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.37 [info     ] FQE_20220422021932: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00015244713748794003, 'time_algorithm_update': 0.009203850504863694, 'loss': 0.006455581615249772, 'time_step': 0.009421309792851827, 'init_value': -2.58253812789917, 'ave_value': -1.7581778736197733, 'soft_opc': nan} step=6474




2022-04-22 02:20.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.39 [info     ] FQE_20220422021932: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001448378505476986, 'time_algorithm_update': 0.008524244090160692, 'loss': 0.007263282422293314, 'time_step': 0.008733414741883794, 'init_value': -2.661884307861328, 'ave_value': -1.808849660364223, 'soft_opc': nan} step=6640




2022-04-22 02:20.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.41 [info     ] FQE_20220422021932: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00014594807682267153, 'time_algorithm_update': 0.009286320353128824, 'loss': 0.006954293332696634, 'time_step': 0.009493849363671729, 'init_value': -2.6693737506866455, 'ave_value': -1.8143583080521575, 'soft_opc': nan} step=6806




2022-04-22 02:20.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.43 [info     ] FQE_20220422021932: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00014913512999752918, 'time_algorithm_update': 0.009431372205895114, 'loss': 0.007188502794162494, 'time_step': 0.009645852697900978, 'init_value': -2.739819049835205, 'ave_value': -1.860236843366612, 'soft_opc': nan} step=6972




2022-04-22 02:20.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.44 [info     ] FQE_20220422021932: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.0001528794506946242, 'time_algorithm_update': 0.009406536458486534, 'loss': 0.007638582008461614, 'time_step': 0.009631819035633501, 'init_value': -2.770650863647461, 'ave_value': -1.8858066882233362, 'soft_opc': nan} step=7138




2022-04-22 02:20.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.46 [info     ] FQE_20220422021932: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.0001467811055930264, 'time_algorithm_update': 0.008756321596812052, 'loss': 0.007670550984968656, 'time_step': 0.008968114852905273, 'init_value': -2.7564282417297363, 'ave_value': -1.8988155901901893, 'soft_opc': nan} step=7304




2022-04-22 02:20.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.48 [info     ] FQE_20220422021932: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.0001488464424409062, 'time_algorithm_update': 0.009427000241107252, 'loss': 0.0077593046535404, 'time_step': 0.009640475353562689, 'init_value': -2.789982795715332, 'ave_value': -1.8901891965821789, 'soft_opc': nan} step=7470




2022-04-22 02:20.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.49 [info     ] FQE_20220422021932: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00014644502157188323, 'time_algorithm_update': 0.009322055851120547, 'loss': 0.007382740879204147, 'time_step': 0.009534455207457026, 'init_value': -2.8163907527923584, 'ave_value': -1.8968714463415446, 'soft_opc': nan} step=7636




2022-04-22 02:20.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.51 [info     ] FQE_20220422021932: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.0001447545476706631, 'time_algorithm_update': 0.009421881422939071, 'loss': 0.008126181830667486, 'time_step': 0.009636413620178958, 'init_value': -2.852597236633301, 'ave_value': -1.9552952465374727, 'soft_opc': nan} step=7802




2022-04-22 02:20.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.53 [info     ] FQE_20220422021932: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00014335276132606598, 'time_algorithm_update': 0.008974335279809424, 'loss': 0.008475819538615318, 'time_step': 0.00918961720294263, 'init_value': -2.8355493545532227, 'ave_value': -1.9713773172382298, 'soft_opc': nan} step=7968




2022-04-22 02:20.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.54 [info     ] FQE_20220422021932: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00014917821769254753, 'time_algorithm_update': 0.008843400392187646, 'loss': 0.008745933654795243, 'time_step': 0.009057633848075407, 'init_value': -2.937021255493164, 'ave_value': -2.0141610342505816, 'soft_opc': nan} step=8134




2022-04-22 02:20.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:20.56 [info     ] FQE_20220422021932: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.0001489326178309429, 'time_algorithm_update': 0.009397397558373141, 'loss': 0.00879429747716591, 'time_step': 0.009611514677484351, 'init_value': -2.905454158782959, 'ave_value': -1.9960621455969574, 'soft_opc': nan} step=8300




2022-04-22 02:20.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422021932/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-22 02:20.56 [info     ] Directory is created at d3rlpy_logs/FQE_20220422022056
2022-04-22 02:20.56 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 02:20.56 [debug    ] Building models...
2022-04-22 02:20.56 [debug    ] Models have been built.
2022-04-22 02:20.56 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422022056/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 02:21.00 [info     ] FQE_20220422022056: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00015332463175751442, 'time_algorithm_update': 0.009239960548489592, 'loss': 0.02523661500416956, 'time_step': 0.009458094835281372, 'init_value': -0.8171850442886353, 'ave_value': -0.8346240809304757, 'soft_opc': nan} step=344




2022-04-22 02:21.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:21.03 [info     ] FQE_20220422022056: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00015481197556783987, 'time_algorithm_update': 0.009291783321735471, 'loss': 0.021021932588218776, 'time_step': 0.009516110946965773, 'init_value': -1.5368473529815674, 'ave_value': -1.606835845468549, 'soft_opc': nan} step=688




2022-04-22 02:21.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:21.07 [info     ] FQE_20220422022056: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00015487781790799872, 'time_algorithm_update': 0.00917161134786384, 'loss': 0.02308776862052984, 'time_step': 0.00939312785170799, 'init_value': -2.4153590202331543, 'ave_value': -2.5666896034468403, 'soft_opc': nan} step=1032




2022-04-22 02:21.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:21.10 [info     ] FQE_20220422022056: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.0001663766628087953, 'time_algorithm_update': 0.009272034778151401, 'loss': 0.02699986412352341, 'time_step': 0.009505913701168326, 'init_value': -3.0193428993225098, 'ave_value': -3.2554491234322387, 'soft_opc': nan} step=1376




2022-04-22 02:21.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:21.14 [info     ] FQE_20220422022056: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.0001556873321533203, 'time_algorithm_update': 0.00939773958782817, 'loss': 0.035921263489547335, 'time_step': 0.009618062612622283, 'init_value': -3.7449498176574707, 'ave_value': -4.120587952690082, 'soft_opc': nan} step=1720




2022-04-22 02:21.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:21.17 [info     ] FQE_20220422022056: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00015754963076391884, 'time_algorithm_update': 0.00919364843257638, 'loss': 0.047470483888308844, 'time_step': 0.009417549122211545, 'init_value': -4.4116950035095215, 'ave_value': -4.89384026434776, 'soft_opc': nan} step=2064




2022-04-22 02:21.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:21.21 [info     ] FQE_20220422022056: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001560207023177036, 'time_algorithm_update': 0.00921491550844769, 'loss': 0.061898689308247073, 'time_step': 0.009438781544219616, 'init_value': -5.286361217498779, 'ave_value': -5.861212861564782, 'soft_opc': nan} step=2408




2022-04-22 02:21.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:21.24 [info     ] FQE_20220422022056: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.0001532622548036797, 'time_algorithm_update': 0.00911466257516728, 'loss': 0.07964537501725, 'time_step': 0.009336084127426147, 'init_value': -6.013801574707031, 'ave_value': -6.634179909638893, 'soft_opc': nan} step=2752




2022-04-22 02:21.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:21.28 [info     ] FQE_20220422022056: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00015813805336175964, 'time_algorithm_update': 0.00945196318071942, 'loss': 0.09671155829524059, 'time_step': 0.009678221719209538, 'init_value': -6.7044901847839355, 'ave_value': -7.286676901383829, 'soft_opc': nan} step=3096




2022-04-22 02:21.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:21.31 [info     ] FQE_20220422022056: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00015126619228096895, 'time_algorithm_update': 0.009051020062246989, 'loss': 0.1156960908637577, 'time_step': 0.009268385726352071, 'init_value': -7.771141052246094, 'ave_value': -8.278241551749677, 'soft_opc': nan} step=3440




2022-04-22 02:21.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:21.35 [info     ] FQE_20220422022056: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.0001545132592666981, 'time_algorithm_update': 0.00917516683423242, 'loss': 0.13125041504065657, 'time_step': 0.009394066278324571, 'init_value': -8.491832733154297, 'ave_value': -8.979270737903478, 'soft_opc': nan} step=3784




2022-04-22 02:21.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:21.38 [info     ] FQE_20220422022056: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00015404612519020257, 'time_algorithm_update': 0.00904913073362306, 'loss': 0.15184188930339418, 'time_step': 0.009269218112147131, 'init_value': -9.419251441955566, 'ave_value': -9.8455134535158, 'soft_opc': nan} step=4128




2022-04-22 02:21.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:21.42 [info     ] FQE_20220422022056: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00015638734019079873, 'time_algorithm_update': 0.009406688601471657, 'loss': 0.17461269305000993, 'time_step': 0.009630083344703498, 'init_value': -10.204818725585938, 'ave_value': -10.606103110091073, 'soft_opc': nan} step=4472




2022-04-22 02:21.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:21.45 [info     ] FQE_20220422022056: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00015818448953850326, 'time_algorithm_update': 0.00907822611720063, 'loss': 0.19578169781151553, 'time_step': 0.009307840535807054, 'init_value': -11.212879180908203, 'ave_value': -11.565236476708941, 'soft_opc': nan} step=4816




2022-04-22 02:21.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:21.49 [info     ] FQE_20220422022056: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.0001529330431028854, 'time_algorithm_update': 0.009214688179104827, 'loss': 0.21374720039365944, 'time_step': 0.009433357521545055, 'init_value': -11.958702087402344, 'ave_value': -12.286745006215197, 'soft_opc': nan} step=5160




2022-04-22 02:21.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:21.52 [info     ] FQE_20220422022056: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00015124262765396472, 'time_algorithm_update': 0.009029992098032041, 'loss': 0.23635975694348818, 'time_step': 0.009245335362678351, 'init_value': -12.759004592895508, 'ave_value': -13.127859257331101, 'soft_opc': nan} step=5504




2022-04-22 02:21.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:21.56 [info     ] FQE_20220422022056: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00015717121057732162, 'time_algorithm_update': 0.00947724040164504, 'loss': 0.2693651681928354, 'time_step': 0.00970020474389542, 'init_value': -13.22302532196045, 'ave_value': -13.565512126096868, 'soft_opc': nan} step=5848




2022-04-22 02:21.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:21.59 [info     ] FQE_20220422022056: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00015569703523502795, 'time_algorithm_update': 0.00909862753956817, 'loss': 0.2871833309736969, 'time_step': 0.009320085824922074, 'init_value': -13.540870666503906, 'ave_value': -14.060687839757936, 'soft_opc': nan} step=6192




2022-04-22 02:21.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:22.03 [info     ] FQE_20220422022056: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00015980906264726505, 'time_algorithm_update': 0.009212099535520686, 'loss': 0.3123572587447111, 'time_step': 0.009441517120183901, 'init_value': -14.253843307495117, 'ave_value': -14.928968311520713, 'soft_opc': nan} step=6536




2022-04-22 02:22.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:22.06 [info     ] FQE_20220422022056: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00015406691750814749, 'time_algorithm_update': 0.009063600800758185, 'loss': 0.3320127047901583, 'time_step': 0.009282334599384042, 'init_value': -14.694137573242188, 'ave_value': -15.716668341915451, 'soft_opc': nan} step=6880




2022-04-22 02:22.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:22.10 [info     ] FQE_20220422022056: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00015546485435130985, 'time_algorithm_update': 0.00941336570784103, 'loss': 0.35723032983193215, 'time_step': 0.009639316520025564, 'init_value': -15.291510581970215, 'ave_value': -16.404456552903813, 'soft_opc': nan} step=7224




2022-04-22 02:22.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:22.13 [info     ] FQE_20220422022056: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00015875489212745843, 'time_algorithm_update': 0.009122905343077903, 'loss': 0.3875270639979389, 'time_step': 0.009350528550702472, 'init_value': -15.351633071899414, 'ave_value': -16.92849378681129, 'soft_opc': nan} step=7568




2022-04-22 02:22.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:22.16 [info     ] FQE_20220422022056: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00015337384024331736, 'time_algorithm_update': 0.009119385896727096, 'loss': 0.40113563983441264, 'time_step': 0.009340108827103016, 'init_value': -15.963510513305664, 'ave_value': -17.708904179881184, 'soft_opc': nan} step=7912




2022-04-22 02:22.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:22.20 [info     ] FQE_20220422022056: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00015296631081159726, 'time_algorithm_update': 0.009059749370397524, 'loss': 0.4304568697373534, 'time_step': 0.009279784075049468, 'init_value': -16.449054718017578, 'ave_value': -18.41329475855203, 'soft_opc': nan} step=8256




2022-04-22 02:22.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:22.23 [info     ] FQE_20220422022056: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.0001521048157714134, 'time_algorithm_update': 0.008485146040140196, 'loss': 0.4456690296714822, 'time_step': 0.008707273145054662, 'init_value': -16.723346710205078, 'ave_value': -18.9185991920019, 'soft_opc': nan} step=8600




2022-04-22 02:22.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:22.26 [info     ] FQE_20220422022056: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.0001554454481878946, 'time_algorithm_update': 0.008421640756518342, 'loss': 0.47776675263831264, 'time_step': 0.008646743935207988, 'init_value': -17.287118911743164, 'ave_value': -19.60135541917832, 'soft_opc': nan} step=8944




2022-04-22 02:22.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:22.29 [info     ] FQE_20220422022056: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00015182065409283306, 'time_algorithm_update': 0.008276575526525809, 'loss': 0.48669847031665403, 'time_step': 0.008493891982145087, 'init_value': -17.72585678100586, 'ave_value': -20.240280969800043, 'soft_opc': nan} step=9288




2022-04-22 02:22.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:22.33 [info     ] FQE_20220422022056: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00015422632527905842, 'time_algorithm_update': 0.008446803619695265, 'loss': 0.4866939392590592, 'time_step': 0.00867062391236771, 'init_value': -18.08115577697754, 'ave_value': -20.767925522668154, 'soft_opc': nan} step=9632




2022-04-22 02:22.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:22.36 [info     ] FQE_20220422022056: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.0001550011856611385, 'time_algorithm_update': 0.008525631455487983, 'loss': 0.5009379002893734, 'time_step': 0.008746396663577058, 'init_value': -18.351537704467773, 'ave_value': -21.15639351692705, 'soft_opc': nan} step=9976




2022-04-22 02:22.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:22.39 [info     ] FQE_20220422022056: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00015747962996017102, 'time_algorithm_update': 0.008487380521242009, 'loss': 0.5083888649269072, 'time_step': 0.00871755841166474, 'init_value': -18.755046844482422, 'ave_value': -21.531470804393024, 'soft_opc': nan} step=10320




2022-04-22 02:22.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:22.42 [info     ] FQE_20220422022056: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00015681704809499341, 'time_algorithm_update': 0.008257077876911607, 'loss': 0.5148054622005411, 'time_step': 0.00848244165265283, 'init_value': -18.75861358642578, 'ave_value': -21.54466912399365, 'soft_opc': nan} step=10664




2022-04-22 02:22.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:22.45 [info     ] FQE_20220422022056: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00015198491340459778, 'time_algorithm_update': 0.008492279191349828, 'loss': 0.5309516318673051, 'time_step': 0.008713169846423837, 'init_value': -19.1857967376709, 'ave_value': -21.922344790495142, 'soft_opc': nan} step=11008




2022-04-22 02:22.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:22.49 [info     ] FQE_20220422022056: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.0001526433368061864, 'time_algorithm_update': 0.008453605479972308, 'loss': 0.5341503248647462, 'time_step': 0.008671999670738397, 'init_value': -19.63384437561035, 'ave_value': -22.324398210074182, 'soft_opc': nan} step=11352




2022-04-22 02:22.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:22.52 [info     ] FQE_20220422022056: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00015212144962576933, 'time_algorithm_update': 0.00847568761470706, 'loss': 0.5515520697362114, 'time_step': 0.008696811836819316, 'init_value': -19.54360008239746, 'ave_value': -22.223486150737415, 'soft_opc': nan} step=11696




2022-04-22 02:22.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:22.55 [info     ] FQE_20220422022056: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00015247422595356786, 'time_algorithm_update': 0.00803565493849821, 'loss': 0.5634016271648106, 'time_step': 0.008256354997324388, 'init_value': -20.05177116394043, 'ave_value': -22.638322924807884, 'soft_opc': nan} step=12040




2022-04-22 02:22.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:22.58 [info     ] FQE_20220422022056: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00015509267186009608, 'time_algorithm_update': 0.007453705682310947, 'loss': 0.578358891457969, 'time_step': 0.007675060006075127, 'init_value': -20.11199188232422, 'ave_value': -22.669986288732773, 'soft_opc': nan} step=12384




2022-04-22 02:22.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:23.01 [info     ] FQE_20220422022056: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00015324423479479412, 'time_algorithm_update': 0.007548277461251547, 'loss': 0.5998740333110787, 'time_step': 0.007771931415380433, 'init_value': -20.20879364013672, 'ave_value': -22.794789920664215, 'soft_opc': nan} step=12728




2022-04-22 02:23.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:23.03 [info     ] FQE_20220422022056: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.000150645888128946, 'time_algorithm_update': 0.00741701417191084, 'loss': 0.6135940248969682, 'time_step': 0.00763167752776035, 'init_value': -20.59557342529297, 'ave_value': -23.09033838656001, 'soft_opc': nan} step=13072




2022-04-22 02:23.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:23.06 [info     ] FQE_20220422022056: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00015346532644227494, 'time_algorithm_update': 0.007460591404937034, 'loss': 0.6258506169488524, 'time_step': 0.007683031780775203, 'init_value': -20.98129653930664, 'ave_value': -23.392730708413563, 'soft_opc': nan} step=13416




2022-04-22 02:23.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:23.09 [info     ] FQE_20220422022056: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00015377097351606503, 'time_algorithm_update': 0.007474253343981366, 'loss': 0.635282768201906, 'time_step': 0.007697917694269225, 'init_value': -20.773353576660156, 'ave_value': -23.287748116350457, 'soft_opc': nan} step=13760




2022-04-22 02:23.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:23.12 [info     ] FQE_20220422022056: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00014924102051313534, 'time_algorithm_update': 0.007380841776382091, 'loss': 0.6386018896659533, 'time_step': 0.007595681173856868, 'init_value': -21.056983947753906, 'ave_value': -23.53568609469646, 'soft_opc': nan} step=14104




2022-04-22 02:23.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:23.15 [info     ] FQE_20220422022056: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00014979132386141046, 'time_algorithm_update': 0.0074266368566557415, 'loss': 0.6612870179679852, 'time_step': 0.007642721020898154, 'init_value': -21.20296859741211, 'ave_value': -23.52159063424999, 'soft_opc': nan} step=14448




2022-04-22 02:23.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:23.18 [info     ] FQE_20220422022056: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.0001536836457806964, 'time_algorithm_update': 0.0075152488641960676, 'loss': 0.6818636686833532, 'time_step': 0.007736066746157269, 'init_value': -21.42766761779785, 'ave_value': -23.78411681068524, 'soft_opc': nan} step=14792




2022-04-22 02:23.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:23.20 [info     ] FQE_20220422022056: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00015016627866168354, 'time_algorithm_update': 0.007459982883098514, 'loss': 0.6925486427688494, 'time_step': 0.007680920667426531, 'init_value': -21.660259246826172, 'ave_value': -23.97946208214874, 'soft_opc': nan} step=15136




2022-04-22 02:23.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:23.23 [info     ] FQE_20220422022056: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.0001503651918366898, 'time_algorithm_update': 0.007437225691107816, 'loss': 0.7236351136789593, 'time_step': 0.0076525495495907096, 'init_value': -22.290620803833008, 'ave_value': -24.562256945741808, 'soft_opc': nan} step=15480




2022-04-22 02:23.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:23.26 [info     ] FQE_20220422022056: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00015172223712122716, 'time_algorithm_update': 0.007432423358739808, 'loss': 0.7384899441141982, 'time_step': 0.007651725480722827, 'init_value': -22.48402976989746, 'ave_value': -24.6614407350821, 'soft_opc': nan} step=15824




2022-04-22 02:23.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:23.29 [info     ] FQE_20220422022056: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00015167857325354287, 'time_algorithm_update': 0.007195986287538396, 'loss': 0.7373221091774487, 'time_step': 0.007417327442834544, 'init_value': -21.959264755249023, 'ave_value': -24.370035567586015, 'soft_opc': nan} step=16168




2022-04-22 02:23.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:23.32 [info     ] FQE_20220422022056: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00015463662701983784, 'time_algorithm_update': 0.007894732231317564, 'loss': 0.7366160320789489, 'time_step': 0.008116657650747965, 'init_value': -22.49634552001953, 'ave_value': -24.870013181059747, 'soft_opc': nan} step=16512




2022-04-22 02:23.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:23.35 [info     ] FQE_20220422022056: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00015265373296515885, 'time_algorithm_update': 0.0077141523361206055, 'loss': 0.7436525114097221, 'time_step': 0.007934438627819682, 'init_value': -22.267179489135742, 'ave_value': -24.527879510007732, 'soft_opc': nan} step=16856




2022-04-22 02:23.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:23.38 [info     ] FQE_20220422022056: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00015519594037255576, 'time_algorithm_update': 0.007975262957949971, 'loss': 0.7389982455453381, 'time_step': 0.008200691189876822, 'init_value': -22.656658172607422, 'ave_value': -25.07318211513496, 'soft_opc': nan} step=17200




2022-04-22 02:23.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422022056/model_17200.pt
search iteration:  13
using hyper params:  [0.008485755196141405, 0.0036257977511722247, 9.022254441160106e-05, 5]
2022-04-22 02:23.38 [debug    ] RoundIterator is selected.
2022-04-22 02:23.38 [info     ] Directory is created at d3rlpy_logs/CQL_20220422022338
2022-04-22 02:23.38 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 02:23.38 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 02:23.38 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422022338/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.008485755196141405, 'actor_optim_factory': {'opti

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:23.56 [info     ] CQL_20220422022338: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.0003491254211161178, 'time_algorithm_update': 0.04959841063945969, 'temp_loss': 4.923386824613361, 'temp': 0.9842059372821984, 'alpha_loss': -17.709803845841073, 'alpha': 1.0177265257504635, 'critic_loss': 113.84020700620088, 'actor_loss': 3.2974833434910615, 'time_step': 0.05003331920315075, 'td_error': 1.3216539338520994, 'init_value': -7.523486614227295, 'ave_value': -7.120049298928778} step=346
2022-04-22 02:23.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:24.14 [info     ] CQL_20220422022338: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.00034906202658063415, 'time_algorithm_update': 0.04984631221418436, 'temp_loss': 4.815585202564394, 'temp': 0.9541047449745884, 'alpha_loss': -18.37914461345342, 'alpha': 1.0542128158442547, 'critic_loss': 148.809168468321, 'actor_loss': 8.772997652175109, 'time_step': 0.05028487767787338, 'td_error': 1.3908469223288507, 'init_value': -11.291749000549316, 'ave_value': -10.590182766704098} step=692
2022-04-22 02:24.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:24.32 [info     ] CQL_20220422022338: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.00035503558340789265, 'time_algorithm_update': 0.05008110834683986, 'temp_loss': 4.670764581316468, 'temp': 0.9254431879589323, 'alpha_loss': -19.050337835543417, 'alpha': 1.09249542972256, 'critic_loss': 279.52050340244534, 'actor_loss': 12.743638372145636, 'time_step': 0.05052232949030882, 'td_error': 1.5063907865477346, 'init_value': -14.715156555175781, 'ave_value': -13.904748894626804} step=1038
2022-04-22 02:24.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:24.50 [info     ] CQL_20220422022338: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.00035197404078665494, 'time_algorithm_update': 0.04999134306273709, 'temp_loss': 4.534147219850838, 'temp': 0.8979822019965662, 'alpha_loss': -19.742325755213038, 'alpha': 1.1326732142812255, 'critic_loss': 472.1594660764485, 'actor_loss': 14.828439434139714, 'time_step': 0.050428794987628915, 'td_error': 1.524640692881315, 'init_value': -15.481698989868164, 'ave_value': -14.68472512608289} step=1384
2022-04-22 02:24.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:25.09 [info     ] CQL_20220422022338: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.00035069719215348965, 'time_algorithm_update': 0.05032391630845263, 'temp_loss': 4.401350432048643, 'temp': 0.871596695198489, 'alpha_loss': -20.45833884773916, 'alpha': 1.1747544280366402, 'critic_loss': 728.2979491129087, 'actor_loss': 14.149478658775374, 'time_step': 0.05076110294099488, 'td_error': 1.4503542557595608, 'init_value': -13.72794246673584, 'ave_value': -13.22503423745059} step=1730
2022-04-22 02:25.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:25.27 [info     ] CQL_20220422022338: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.00037027232219718093, 'time_algorithm_update': 0.05030739858660395, 'temp_loss': 4.273231291357493, 'temp': 0.8461961651468553, 'alpha_loss': -21.20628920869331, 'alpha': 1.218786112490417, 'critic_loss': 1054.790495944161, 'actor_loss': 10.227384835998446, 'time_step': 0.05076405974481836, 'td_error': 1.327623411387668, 'init_value': -8.778022766113281, 'ave_value': -8.554101207890517} step=2076
2022-04-22 02:25.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:25.45 [info     ] CQL_20220422022338: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.00035445745280712326, 'time_algorithm_update': 0.0502741123210488, 'temp_loss': 4.1504285087475195, 'temp': 0.8217027178734024, 'alpha_loss': -21.997291140473646, 'alpha': 1.2648083326444461, 'critic_loss': 1444.0177181883355, 'actor_loss': 5.875093829425084, 'time_step': 0.05071470020823396, 'td_error': 1.3025652367466394, 'init_value': -6.382182598114014, 'ave_value': -6.303553885179088} step=2422
2022-04-22 02:25.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:26.04 [info     ] CQL_20220422022338: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.00035003154953091133, 'time_algorithm_update': 0.05009615145666751, 'temp_loss': 4.031011375388658, 'temp': 0.7980541528304878, 'alpha_loss': -22.83089445367714, 'alpha': 1.3128818907489666, 'critic_loss': 1825.5803533124097, 'actor_loss': 4.708300474751202, 'time_step': 0.05052784274768278, 'td_error': 1.3044603190861686, 'init_value': -6.138923168182373, 'ave_value': -6.085486080500683} step=2768
2022-04-22 02:26.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:26.22 [info     ] CQL_20220422022338: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.0003458881653802243, 'time_algorithm_update': 0.04985476298139274, 'temp_loss': 3.9146864028335306, 'temp': 0.7751918037158216, 'alpha_loss': -23.7035752114533, 'alpha': 1.3630542524288156, 'critic_loss': 2172.76430057239, 'actor_loss': 4.647561878138195, 'time_step': 0.05028249142486925, 'td_error': 1.307792359658315, 'init_value': -6.070812225341797, 'ave_value': -6.031385084688522} step=3114
2022-04-22 02:26.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:26.40 [info     ] CQL_20220422022338: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.0003449903058178852, 'time_algorithm_update': 0.050235054396480495, 'temp_loss': 3.8027358372087425, 'temp': 0.7530712914260137, 'alpha_loss': -24.612938092623143, 'alpha': 1.4153684873801435, 'critic_loss': 2501.1042981450955, 'actor_loss': 4.793053842004324, 'time_step': 0.05066244657329052, 'td_error': 1.3124724572075976, 'init_value': -6.191882610321045, 'ave_value': -6.165334102518472} step=3460
2022-04-22 02:26.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:26.59 [info     ] CQL_20220422022338: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.0003495285276732693, 'time_algorithm_update': 0.051456738069567375, 'temp_loss': 3.6953986965851975, 'temp': 0.7316445088110908, 'alpha_loss': -25.561081946929754, 'alpha': 1.4698726283332515, 'critic_loss': 2817.49206401847, 'actor_loss': 5.040763121808885, 'time_step': 0.051891371693914334, 'td_error': 1.3184560896604989, 'init_value': -6.444981098175049, 'ave_value': -6.422424407161317} step=3806
2022-04-22 02:26.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:27.20 [info     ] CQL_20220422022338: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.0003667945806690723, 'time_algorithm_update': 0.056867380362714645, 'temp_loss': 3.590871029506529, 'temp': 0.7108725571218942, 'alpha_loss': -26.54489712356832, 'alpha': 1.526618600236198, 'critic_loss': 3114.742771744039, 'actor_loss': 5.352072758481682, 'time_step': 0.05731937651000271, 'td_error': 1.3251888420173927, 'init_value': -6.7719316482543945, 'ave_value': -6.75389431645487} step=4152
2022-04-22 02:27.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:27.40 [info     ] CQL_20220422022338: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.0003574976342261871, 'time_algorithm_update': 0.05707603314019352, 'temp_loss': 3.4884369194163063, 'temp': 0.6907322089796122, 'alpha_loss': -27.571000474036772, 'alpha': 1.5856746521299285, 'critic_loss': 3410.1166921626627, 'actor_loss': 5.715489568048819, 'time_step': 0.05751891356672166, 'td_error': 1.333935713841416, 'init_value': -7.206637859344482, 'ave_value': -7.188259864181294} step=4498
2022-04-22 02:27.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:28.01 [info     ] CQL_20220422022338: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.00034465128286725524, 'time_algorithm_update': 0.05569140414971148, 'temp_loss': 3.389369300335129, 'temp': 0.6711926877154091, 'alpha_loss': -28.64131717461382, 'alpha': 1.6471149249572974, 'critic_loss': 3676.3784624221007, 'actor_loss': 6.1250732027726364, 'time_step': 0.056117737913407344, 'td_error': 1.3432114843304226, 'init_value': -7.6356964111328125, 'ave_value': -7.618682443773424} step=4844
2022-04-22 02:28.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:28.21 [info     ] CQL_20220422022338: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.00035969645990801684, 'time_algorithm_update': 0.05636029918758855, 'temp_loss': 3.2935571649860096, 'temp': 0.6522307830049813, 'alpha_loss': -29.75099562496119, 'alpha': 1.7110083947291952, 'critic_loss': 3935.9181261007498, 'actor_loss': 6.5976918198469745, 'time_step': 0.05680788183487909, 'td_error': 1.3497737112086194, 'init_value': -7.855973243713379, 'ave_value': -7.84779544240663} step=5190
2022-04-22 02:28.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:28.42 [info     ] CQL_20220422022338: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.0003571200232974367, 'time_algorithm_update': 0.05644651851213047, 'temp_loss': 3.200843281139528, 'temp': 0.633818931145475, 'alpha_loss': -30.911059225225724, 'alpha': 1.777442980363879, 'critic_loss': 4133.460720878116, 'actor_loss': 7.083631738761946, 'time_step': 0.05688690105614635, 'td_error': 1.3639316309481488, 'init_value': -8.548434257507324, 'ave_value': -8.535842786891136} step=5536
2022-04-22 02:28.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:29.02 [info     ] CQL_20220422022338: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.00035479785389982895, 'time_algorithm_update': 0.055247940769085305, 'temp_loss': 3.111056302324196, 'temp': 0.6159379473655899, 'alpha_loss': -32.10990675336364, 'alpha': 1.8465051151424474, 'critic_loss': 4324.41476387509, 'actor_loss': 7.637038740808564, 'time_step': 0.05568882495681674, 'td_error': 1.3796973479429653, 'init_value': -9.260003089904785, 'ave_value': -9.24162380413853} step=5882
2022-04-22 02:29.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:29.22 [info     ] CQL_20220422022338: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.0003649037697411686, 'time_algorithm_update': 0.05520603284670438, 'temp_loss': 3.0228872802216196, 'temp': 0.5985715983575479, 'alpha_loss': -33.35947780939885, 'alpha': 1.9182846966506428, 'critic_loss': 4483.9061483923415, 'actor_loss': 8.227320298983182, 'time_step': 0.055654707671589934, 'td_error': 1.391126773933662, 'init_value': -9.664212226867676, 'ave_value': -9.654632237625664} step=6228
2022-04-22 02:29.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:29.42 [info     ] CQL_20220422022338: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.00036470531728226323, 'time_algorithm_update': 0.05539907403074937, 'temp_loss': 2.9379515296461953, 'temp': 0.5817013699884359, 'alpha_loss': -34.654682170448964, 'alpha': 1.9928806386242024, 'critic_loss': 4630.287234973356, 'actor_loss': 8.87177327051328, 'time_step': 0.05585001452120742, 'td_error': 1.4062702855706313, 'init_value': -10.23486042022705, 'ave_value': -10.22764941921388} step=6574
2022-04-22 02:29.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:30.02 [info     ] CQL_20220422022338: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.0003490585812254448, 'time_algorithm_update': 0.055478953212671885, 'temp_loss': 2.8542429835810137, 'temp': 0.5653153337495176, 'alpha_loss': -36.004069741750726, 'alpha': 2.0703939902300093, 'critic_loss': 4739.674604294617, 'actor_loss': 9.521985288300266, 'time_step': 0.05591170567308547, 'td_error': 1.4230569055821023, 'init_value': -10.8478422164917, 'ave_value': -10.841566398732748} step=6920
2022-04-22 02:30.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:30.22 [info     ] CQL_20220422022338: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.00035596031674070853, 'time_algorithm_update': 0.055343319914933575, 'temp_loss': 2.774861068394832, 'temp': 0.5493933410313777, 'alpha_loss': -37.40814895850386, 'alpha': 2.150945139069089, 'critic_loss': 4826.584588376084, 'actor_loss': 10.218436301788154, 'time_step': 0.05578093101523515, 'td_error': 1.4410909907564329, 'init_value': -11.46908950805664, 'ave_value': -11.465694810859171} step=7266
2022-04-22 02:30.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:30.42 [info     ] CQL_20220422022338: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.0003583679309470116, 'time_algorithm_update': 0.05529198550075465, 'temp_loss': 2.696698990860426, 'temp': 0.5339187342307471, 'alpha_loss': -38.85293011307027, 'alpha': 2.234634626807505, 'critic_loss': 4867.108271427926, 'actor_loss': 10.918009418972655, 'time_step': 0.05572913354531878, 'td_error': 1.4623123011735917, 'init_value': -12.198748588562012, 'ave_value': -12.19525825786274} step=7612
2022-04-22 02:30.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:31.03 [info     ] CQL_20220422022338: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.0003560030391450562, 'time_algorithm_update': 0.05544529967225356, 'temp_loss': 2.620770740371219, 'temp': 0.5188844358989958, 'alpha_loss': -40.36808304979622, 'alpha': 2.3215819948670493, 'critic_loss': 4885.720148516528, 'actor_loss': 11.657642728331462, 'time_step': 0.05588206114796545, 'td_error': 1.4872028545406089, 'init_value': -13.039929389953613, 'ave_value': -13.032872407206883} step=7958
2022-04-22 02:31.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:31.23 [info     ] CQL_20220422022338: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.00036965146919206387, 'time_algorithm_update': 0.055575439695678006, 'temp_loss': 2.5464995968548547, 'temp': 0.504276325506282, 'alpha_loss': -41.94202879536358, 'alpha': 2.4119287915312486, 'critic_loss': 4919.740849665824, 'actor_loss': 12.378448282363097, 'time_step': 0.056024656130399315, 'td_error': 1.5122052341609227, 'init_value': -13.828192710876465, 'ave_value': -13.819726877316528} step=8304
2022-04-22 02:31.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:31.43 [info     ] CQL_20220422022338: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.0003639645659165575, 'time_algorithm_update': 0.0554317959471245, 'temp_loss': 2.474950472743525, 'temp': 0.4900813098415474, 'alpha_loss': -43.574399926069844, 'alpha': 2.505801999500032, 'critic_loss': 4932.079228572074, 'actor_loss': 13.14274718168843, 'time_step': 0.055874082394418, 'td_error': 1.5350137970417739, 'init_value': -14.49011516571045, 'ave_value': -14.483265061737972} step=8650
2022-04-22 02:31.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:32.03 [info     ] CQL_20220422022338: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.0003566300937895141, 'time_algorithm_update': 0.055563670362351256, 'temp_loss': 2.4049529925936217, 'temp': 0.476285811362928, 'alpha_loss': -45.270352214747085, 'alpha': 2.603333174148736, 'critic_loss': 5109.156537888367, 'actor_loss': 13.955550254424873, 'time_step': 0.05600202910472892, 'td_error': 1.5641257341126864, 'init_value': -15.320286750793457, 'ave_value': -15.311869273178948} step=8996
2022-04-22 02:32.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:32.24 [info     ] CQL_20220422022338: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.0003591913708372612, 'time_algorithm_update': 0.05559186921643384, 'temp_loss': 2.3373826811079343, 'temp': 0.4628804262318363, 'alpha_loss': -47.03227366188358, 'alpha': 2.7046592015062454, 'critic_loss': 5284.637329807171, 'actor_loss': 14.72071670245573, 'time_step': 0.056031511009084004, 'td_error': 1.5893803834410853, 'init_value': -15.997953414916992, 'ave_value': -15.990619646310241} step=9342
2022-04-22 02:32.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:32.44 [info     ] CQL_20220422022338: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.0003680803872257299, 'time_algorithm_update': 0.0553128574624916, 'temp_loss': 2.271790006946277, 'temp': 0.44985154813769235, 'alpha_loss': -48.86080537917297, 'alpha': 2.809936938258265, 'critic_loss': 5526.919128770774, 'actor_loss': 15.494842906907804, 'time_step': 0.05576436023491656, 'td_error': 1.61617997182949, 'init_value': -16.66785430908203, 'ave_value': -16.664441156726472} step=9688
2022-04-22 02:32.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:33.04 [info     ] CQL_20220422022338: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.00036818581509452333, 'time_algorithm_update': 0.05568872917594248, 'temp_loss': 2.2083736334232924, 'temp': 0.43718793436971015, 'alpha_loss': -50.764789118243094, 'alpha': 2.9193223921549802, 'critic_loss': 5676.919514033147, 'actor_loss': 16.191210997586996, 'time_step': 0.05614247418552465, 'td_error': 1.6432832058977458, 'init_value': -17.34795570373535, 'ave_value': -17.345098714525523} step=10034
2022-04-22 02:33.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:33.24 [info     ] CQL_20220422022338: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.00036689518504060074, 'time_algorithm_update': 0.05516020204290489, 'temp_loss': 2.1456021587283627, 'temp': 0.4248819920541234, 'alpha_loss': -52.747625747857064, 'alpha': 3.0329629782307355, 'critic_loss': 5733.207944307713, 'actor_loss': 16.89795097174672, 'time_step': 0.055608876867790445, 'td_error': 1.673392650748436, 'init_value': -18.09071159362793, 'ave_value': -18.086737056748458} step=10380
2022-04-22 02:33.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:33.44 [info     ] CQL_20220422022338: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.00035579149433643143, 'time_algorithm_update': 0.05450297642305407, 'temp_loss': 2.0850893300392723, 'temp': 0.4129236765162793, 'alpha_loss': -54.77712094301433, 'alpha': 3.1510085212012937, 'critic_loss': 5850.9646264225075, 'actor_loss': 17.645277117029092, 'time_step': 0.054939919813519957, 'td_error': 1.7050940407386874, 'init_value': -18.849075317382812, 'ave_value': -18.843535970522364} step=10726
2022-04-22 02:33.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:34.03 [info     ] CQL_20220422022338: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.0003492618571816152, 'time_algorithm_update': 0.05254087282743068, 'temp_loss': 2.0267396383202834, 'temp': 0.4013016763966897, 'alpha_loss': -56.92653015445423, 'alpha': 3.2736429796053494, 'critic_loss': 5700.5124652840495, 'actor_loss': 18.25103249853057, 'time_step': 0.052975033749045664, 'td_error': 1.7268322241377223, 'init_value': -19.305490493774414, 'ave_value': -19.305327519832375} step=11072
2022-04-22 02:34.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:34.22 [info     ] CQL_20220422022338: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.0003499778019899578, 'time_algorithm_update': 0.05364437806123943, 'temp_loss': 1.9695425229954582, 'temp': 0.3900068265337475, 'alpha_loss': -59.134532873341115, 'alpha': 3.4010776357154624, 'critic_loss': 5572.679048218479, 'actor_loss': 18.99212503708856, 'time_step': 0.054077147748428964, 'td_error': 1.766656110771783, 'init_value': -20.243568420410156, 'ave_value': -20.235937907244708} step=11418
2022-04-22 02:34.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:34.42 [info     ] CQL_20220422022338: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.0003531819823160337, 'time_algorithm_update': 0.05386056789773048, 'temp_loss': 1.9143113063249975, 'temp': 0.3790299243837423, 'alpha_loss': -61.44743015311357, 'alpha': 3.5334736294829088, 'critic_loss': 5754.956379267522, 'actor_loss': 19.729679366756727, 'time_step': 0.05429935110786747, 'td_error': 1.797446943449653, 'init_value': -20.883092880249023, 'ave_value': -20.877348361551167} step=11764
2022-04-22 02:34.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:35.02 [info     ] CQL_20220422022338: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.00035965442657470703, 'time_algorithm_update': 0.05388241489498601, 'temp_loss': 1.860184053465121, 'temp': 0.3683618439759822, 'alpha_loss': -63.82632577488188, 'alpha': 3.6710201681004784, 'critic_loss': 5926.063715058255, 'actor_loss': 20.434719107743632, 'time_step': 0.054324635880530915, 'td_error': 1.8260443871659446, 'init_value': -21.46182632446289, 'ave_value': -21.459001161114987} step=12110
2022-04-22 02:35.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:35.21 [info     ] CQL_20220422022338: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.0003535189380535501, 'time_algorithm_update': 0.05427312161881111, 'temp_loss': 1.8073621381913996, 'temp': 0.35799588064926896, 'alpha_loss': -66.32430789925459, 'alpha': 3.8139191438696978, 'critic_loss': 6005.478126128974, 'actor_loss': 21.110316304113134, 'time_step': 0.05471515173167851, 'td_error': 1.8638083660260858, 'init_value': -22.258596420288086, 'ave_value': -22.251448943389995} step=12456
2022-04-22 02:35.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:35.41 [info     ] CQL_20220422022338: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.000354186647889242, 'time_algorithm_update': 0.054018688339718506, 'temp_loss': 1.756919935948587, 'temp': 0.3479207800647427, 'alpha_loss': -68.90905214871974, 'alpha': 3.9624037935554637, 'critic_loss': 6178.271130159411, 'actor_loss': 21.78387327139088, 'time_step': 0.054457516339472954, 'td_error': 1.89372783322862, 'init_value': -22.84378433227539, 'ave_value': -22.838036290634975} step=12802
2022-04-22 02:35.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:36.01 [info     ] CQL_20220422022338: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.00035503696154996837, 'time_algorithm_update': 0.053754735544237785, 'temp_loss': 1.7076360831370931, 'temp': 0.3381282849118889, 'alpha_loss': -71.59050267831438, 'alpha': 4.116651489555491, 'critic_loss': 6193.005111429733, 'actor_loss': 22.399814214320543, 'time_step': 0.054193043295358644, 'td_error': 1.9260039057717586, 'init_value': -23.470224380493164, 'ave_value': -23.463905185520396} step=13148
2022-04-22 02:36.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:36.20 [info     ] CQL_20220422022338: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.0003637357943319861, 'time_algorithm_update': 0.05417073875493397, 'temp_loss': 1.659730722793954, 'temp': 0.32861034632418196, 'alpha_loss': -74.37536956257902, 'alpha': 4.27690394903194, 'critic_loss': 6303.534277061506, 'actor_loss': 23.031002033652598, 'time_step': 0.05462171852244118, 'td_error': 1.9497576389687596, 'init_value': -23.858312606811523, 'ave_value': -23.859651422116247} step=13494
2022-04-22 02:36.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:36.40 [info     ] CQL_20220422022338: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.0003605329921479859, 'time_algorithm_update': 0.05428725653301085, 'temp_loss': 1.612541894692217, 'temp': 0.31936147697054584, 'alpha_loss': -77.26723564015647, 'alpha': 4.443397269772657, 'critic_loss': 5757.53880989207, 'actor_loss': 23.438528215265, 'time_step': 0.05473541662183111, 'td_error': 1.9656656149601457, 'init_value': -24.10399055480957, 'ave_value': -24.1129472803086} step=13840
2022-04-22 02:36.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:37.00 [info     ] CQL_20220422022338: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.00036676563968548196, 'time_algorithm_update': 0.05406990009925269, 'temp_loss': 1.567712144011018, 'temp': 0.31037294347851263, 'alpha_loss': -80.2802391934257, 'alpha': 4.616372308290074, 'critic_loss': 4846.559567490065, 'actor_loss': 23.812269613232917, 'time_step': 0.05452557175145673, 'td_error': 1.9932005021547399, 'init_value': -24.63470458984375, 'ave_value': -24.639496275122667} step=14186
2022-04-22 02:37.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:37.19 [info     ] CQL_20220422022338: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.0003654178167354165, 'time_algorithm_update': 0.053845085160580675, 'temp_loss': 1.5231515451662803, 'temp': 0.30163710913217134, 'alpha_loss': -83.40104734828707, 'alpha': 4.796088768567653, 'critic_loss': 4345.307068223899, 'actor_loss': 24.37355827596146, 'time_step': 0.05429496379256937, 'td_error': 2.0212198303257956, 'init_value': -25.142969131469727, 'ave_value': -25.148160962845942} step=14532
2022-04-22 02:37.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:37.39 [info     ] CQL_20220422022338: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.000357066964827521, 'time_algorithm_update': 0.05386315191412248, 'temp_loss': 1.4804009147462127, 'temp': 0.29314738649853395, 'alpha_loss': -86.64652428599452, 'alpha': 4.982795855902523, 'critic_loss': 3878.2711640286307, 'actor_loss': 24.892726038232706, 'time_step': 0.054305498310596265, 'td_error': 2.0540208358771066, 'init_value': -25.756141662597656, 'ave_value': -25.757327127434056} step=14878
2022-04-22 02:37.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:37.59 [info     ] CQL_20220422022338: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.0003632093440590566, 'time_algorithm_update': 0.0536589939470236, 'temp_loss': 1.4387747996804343, 'temp': 0.28489654231278194, 'alpha_loss': -90.01300317841458, 'alpha': 5.176749594638802, 'critic_loss': 3804.4459397861724, 'actor_loss': 25.645512624972127, 'time_step': 0.05410607219431442, 'td_error': 2.10434538527776, 'init_value': -26.67898178100586, 'ave_value': -26.67099466843874} step=15224
2022-04-22 02:37.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:38.18 [info     ] CQL_20220422022338: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.000355644722205366, 'time_algorithm_update': 0.05395002930150556, 'temp_loss': 1.398330880038311, 'temp': 0.2768782312642632, 'alpha_loss': -93.51705507046914, 'alpha': 5.37827119386265, 'critic_loss': 4053.232987773212, 'actor_loss': 26.44900982366132, 'time_step': 0.05438801043295447, 'td_error': 2.142483461105138, 'init_value': -27.287246704101562, 'ave_value': -27.284422551675565} step=15570
2022-04-22 02:38.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:38.38 [info     ] CQL_20220422022338: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.00036647829706269193, 'time_algorithm_update': 0.05520369965217017, 'temp_loss': 1.3589294694062601, 'temp': 0.2690860667841972, 'alpha_loss': -97.17327609641015, 'alpha': 5.587635078871181, 'critic_loss': 4145.322146377122, 'actor_loss': 27.108987394784915, 'time_step': 0.05565546427158951, 'td_error': 2.1844471455186394, 'init_value': -27.9980525970459, 'ave_value': -27.992064627497726} step=15916
2022-04-22 02:38.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:38.59 [info     ] CQL_20220422022338: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.00036893483531268346, 'time_algorithm_update': 0.05546775167388034, 'temp_loss': 1.3204081389256295, 'temp': 0.2615128588469731, 'alpha_loss': -100.95305547548857, 'alpha': 5.8051500789002874, 'critic_loss': 4346.680352183435, 'actor_loss': 27.76715545985051, 'time_step': 0.05592060778182366, 'td_error': 2.2184710995184322, 'init_value': -28.52472496032715, 'ave_value': -28.521553145454302} step=16262
2022-04-22 02:38.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:39.20 [info     ] CQL_20220422022338: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.0003617367992511374, 'time_algorithm_update': 0.058471326883128616, 'temp_loss': 1.2836445714696982, 'temp': 0.2541525665390698, 'alpha_loss': -104.86800999724107, 'alpha': 6.031138126560719, 'critic_loss': 4591.220089245394, 'actor_loss': 28.34453134591869, 'time_step': 0.05891751347249643, 'td_error': 2.255160618373851, 'init_value': -29.109468460083008, 'ave_value': -29.105524715805235} step=16608
2022-04-22 02:39.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:39.41 [info     ] CQL_20220422022338: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.00036671327028660414, 'time_algorithm_update': 0.05678657506931724, 'temp_loss': 1.2474520234014257, 'temp': 0.24699911449341416, 'alpha_loss': -108.9625379970308, 'alpha': 6.265879279616251, 'critic_loss': 4870.019131875451, 'actor_loss': 28.894659240810856, 'time_step': 0.05723549244720812, 'td_error': 2.2884998049498058, 'init_value': -29.626083374023438, 'ave_value': -29.621969411948555} step=16954
2022-04-22 02:39.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:40.00 [info     ] CQL_20220422022338: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.00036021877575471913, 'time_algorithm_update': 0.054537469941067555, 'temp_loss': 1.2120478931879033, 'temp': 0.24004764114156624, 'alpha_loss': -113.19957792690035, 'alpha': 6.509805309979213, 'critic_loss': 5150.161584402095, 'actor_loss': 29.393760488212454, 'time_step': 0.05498107044683027, 'td_error': 2.315004271958258, 'init_value': -30.027122497558594, 'ave_value': -30.024072963900903} step=17300
2022-04-22 02:40.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422022338/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 02:40.02 [info     ] FQE_20220422024001: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00014526585498488093, 'time_algorithm_update': 0.0082108715930617, 'loss': 0.007102611309186701, 'time_step': 0.008420017828424293, 'init_value': 0.16048133373260498, 'ave_value': 0.21983640028818233, 'soft_opc': nan} step=166




2022-04-22 02:40.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.04 [info     ] FQE_20220422024001: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.000158322862831943, 'time_algorithm_update': 0.008720066173967108, 'loss': 0.004591015023067145, 'time_step': 0.008949485169835838, 'init_value': 0.1545741856098175, 'ave_value': 0.21921074284358066, 'soft_opc': nan} step=332




2022-04-22 02:40.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.05 [info     ] FQE_20220422024001: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.0001488220260803958, 'time_algorithm_update': 0.008413732769977615, 'loss': 0.003935481408324811, 'time_step': 0.008624812206590032, 'init_value': 0.1347787082195282, 'ave_value': 0.1945968151344238, 'soft_opc': nan} step=498




2022-04-22 02:40.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.07 [info     ] FQE_20220422024001: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00015102955232183617, 'time_algorithm_update': 0.008506371314267078, 'loss': 0.0036034372786665895, 'time_step': 0.00871962380696492, 'init_value': 0.11764243990182877, 'ave_value': 0.16592218387402125, 'soft_opc': nan} step=664




2022-04-22 02:40.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.08 [info     ] FQE_20220422024001: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00014759689928537392, 'time_algorithm_update': 0.008266441793326872, 'loss': 0.003025274293171803, 'time_step': 0.00847963539950819, 'init_value': 0.08887991309165955, 'ave_value': 0.11497000305040798, 'soft_opc': nan} step=830




2022-04-22 02:40.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.10 [info     ] FQE_20220422024001: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.0001439732241343303, 'time_algorithm_update': 0.007993343364761537, 'loss': 0.002691713505402118, 'time_step': 0.008198233972112817, 'init_value': 0.08659899234771729, 'ave_value': 0.10543881741578917, 'soft_opc': nan} step=996




2022-04-22 02:40.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.11 [info     ] FQE_20220422024001: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00014875452202486704, 'time_algorithm_update': 0.00829899885568274, 'loss': 0.002346751434414322, 'time_step': 0.008511919573128942, 'init_value': 0.07845525443553925, 'ave_value': 0.0775043903963288, 'soft_opc': nan} step=1162




2022-04-22 02:40.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.13 [info     ] FQE_20220422024001: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00015288806823362787, 'time_algorithm_update': 0.0086268473820514, 'loss': 0.002076779637017276, 'time_step': 0.008840177432600274, 'init_value': 0.026605479419231415, 'ave_value': 0.010397591967521621, 'soft_opc': nan} step=1328




2022-04-22 02:40.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.15 [info     ] FQE_20220422024001: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00015058000403714468, 'time_algorithm_update': 0.008571718112531915, 'loss': 0.0017647074899462287, 'time_step': 0.008786863591297564, 'init_value': 0.016067106276750565, 'ave_value': -0.0041201759456984096, 'soft_opc': nan} step=1494




2022-04-22 02:40.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.16 [info     ] FQE_20220422024001: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00014044577816882766, 'time_algorithm_update': 0.007760181484452213, 'loss': 0.001625928487446361, 'time_step': 0.007967665971043598, 'init_value': -0.026551399379968643, 'ave_value': -0.06471658277426015, 'soft_opc': nan} step=1660




2022-04-22 02:40.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.18 [info     ] FQE_20220422024001: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00012953597379018026, 'time_algorithm_update': 0.008154703910092273, 'loss': 0.0014861036559808102, 'time_step': 0.008342890854341438, 'init_value': -0.04505930468440056, 'ave_value': -0.10358656211965928, 'soft_opc': nan} step=1826




2022-04-22 02:40.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.19 [info     ] FQE_20220422024001: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00013197904609772097, 'time_algorithm_update': 0.008393523204757506, 'loss': 0.001319942098070239, 'time_step': 0.008585408509495747, 'init_value': -0.07507802546024323, 'ave_value': -0.13483974018411057, 'soft_opc': nan} step=1992




2022-04-22 02:40.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.21 [info     ] FQE_20220422024001: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.0001308788736182523, 'time_algorithm_update': 0.008216556296291122, 'loss': 0.001469087690719493, 'time_step': 0.00840080933398511, 'init_value': -0.12697337567806244, 'ave_value': -0.19971878661161607, 'soft_opc': nan} step=2158




2022-04-22 02:40.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.22 [info     ] FQE_20220422024001: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00012506490730377566, 'time_algorithm_update': 0.007668667529002729, 'loss': 0.0015929228137387525, 'time_step': 0.007850290781044099, 'init_value': -0.1543627679347992, 'ave_value': -0.24162022455150747, 'soft_opc': nan} step=2324




2022-04-22 02:40.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.23 [info     ] FQE_20220422024001: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00014127449816968068, 'time_algorithm_update': 0.007970521248966814, 'loss': 0.0016329991578612566, 'time_step': 0.008172265018325254, 'init_value': -0.1833200752735138, 'ave_value': -0.2832760799651009, 'soft_opc': nan} step=2490




2022-04-22 02:40.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.25 [info     ] FQE_20220422024001: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.0001440005130078419, 'time_algorithm_update': 0.008324528314981115, 'loss': 0.0018617316779991925, 'time_step': 0.008531979767672986, 'init_value': -0.21588966250419617, 'ave_value': -0.32916719400332317, 'soft_opc': nan} step=2656




2022-04-22 02:40.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.27 [info     ] FQE_20220422024001: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001459897282611893, 'time_algorithm_update': 0.0082902161471815, 'loss': 0.0018379891703974075, 'time_step': 0.008500632033290633, 'init_value': -0.2478855550289154, 'ave_value': -0.36339647882190107, 'soft_opc': nan} step=2822




2022-04-22 02:40.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.28 [info     ] FQE_20220422024001: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00014263176056275885, 'time_algorithm_update': 0.008194175111242088, 'loss': 0.0023722251397150806, 'time_step': 0.008402346128440765, 'init_value': -0.29613196849823, 'ave_value': -0.4327290423600389, 'soft_opc': nan} step=2988




2022-04-22 02:40.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.29 [info     ] FQE_20220422024001: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00014286299785935735, 'time_algorithm_update': 0.007632436522518296, 'loss': 0.0026623630040277534, 'time_step': 0.007838308093059495, 'init_value': -0.2937033772468567, 'ave_value': -0.4373014874403944, 'soft_opc': nan} step=3154




2022-04-22 02:40.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.31 [info     ] FQE_20220422024001: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00015791352972926865, 'time_algorithm_update': 0.008416056632995605, 'loss': 0.002976467233664547, 'time_step': 0.008639765073017901, 'init_value': -0.35166406631469727, 'ave_value': -0.5035468014343096, 'soft_opc': nan} step=3320




2022-04-22 02:40.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.33 [info     ] FQE_20220422024001: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.0001455760863890131, 'time_algorithm_update': 0.008328555578208831, 'loss': 0.003205933202414336, 'time_step': 0.008538882416414928, 'init_value': -0.35074079036712646, 'ave_value': -0.5004381651377624, 'soft_opc': nan} step=3486




2022-04-22 02:40.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.34 [info     ] FQE_20220422024001: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.0001488349523889013, 'time_algorithm_update': 0.008287821907594979, 'loss': 0.0036397491287543855, 'time_step': 0.008500850344278726, 'init_value': -0.32839125394821167, 'ave_value': -0.49516327122964704, 'soft_opc': nan} step=3652




2022-04-22 02:40.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.35 [info     ] FQE_20220422024001: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00014670929276799582, 'time_algorithm_update': 0.007757206997239446, 'loss': 0.004336955611550541, 'time_step': 0.00796764729970909, 'init_value': -0.38762885332107544, 'ave_value': -0.5699394327974333, 'soft_opc': nan} step=3818




2022-04-22 02:40.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.37 [info     ] FQE_20220422024001: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00015227622296436723, 'time_algorithm_update': 0.008492963859833867, 'loss': 0.0047304882655335384, 'time_step': 0.008710008069693324, 'init_value': -0.4208456873893738, 'ave_value': -0.6015221450695987, 'soft_opc': nan} step=3984




2022-04-22 02:40.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.39 [info     ] FQE_20220422024001: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.0001490374645554876, 'time_algorithm_update': 0.008423193391547146, 'loss': 0.005342460850377129, 'time_step': 0.008635115910725421, 'init_value': -0.43341583013534546, 'ave_value': -0.6109967038891203, 'soft_opc': nan} step=4150




2022-04-22 02:40.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.40 [info     ] FQE_20220422024001: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00015072362968720585, 'time_algorithm_update': 0.008410956486161933, 'loss': 0.005568712366273604, 'time_step': 0.008630985237029662, 'init_value': -0.44835618138313293, 'ave_value': -0.6298382817238972, 'soft_opc': nan} step=4316




2022-04-22 02:40.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.42 [info     ] FQE_20220422024001: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00014666476881647683, 'time_algorithm_update': 0.008057737924966467, 'loss': 0.006130915993532862, 'time_step': 0.008271327937941953, 'init_value': -0.46225374937057495, 'ave_value': -0.6498198347795453, 'soft_opc': nan} step=4482




2022-04-22 02:40.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.43 [info     ] FQE_20220422024001: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00014611324632024192, 'time_algorithm_update': 0.008076722363391554, 'loss': 0.0067559591216085685, 'time_step': 0.00828822405941515, 'init_value': -0.44718143343925476, 'ave_value': -0.6378844138285196, 'soft_opc': nan} step=4648




2022-04-22 02:40.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.45 [info     ] FQE_20220422024001: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00014931609831660627, 'time_algorithm_update': 0.008350841970328825, 'loss': 0.007165007765197296, 'time_step': 0.008567593183862158, 'init_value': -0.42488449811935425, 'ave_value': -0.6451882699143712, 'soft_opc': nan} step=4814




2022-04-22 02:40.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.46 [info     ] FQE_20220422024001: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.0001486295677093138, 'time_algorithm_update': 0.008351216833275485, 'loss': 0.007794740166604878, 'time_step': 0.008562712784273079, 'init_value': -0.42849603295326233, 'ave_value': -0.6629355619854487, 'soft_opc': nan} step=4980




2022-04-22 02:40.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.48 [info     ] FQE_20220422024001: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.0001493304608816124, 'time_algorithm_update': 0.008343574512435729, 'loss': 0.008327105485466701, 'time_step': 0.008559507059763712, 'init_value': -0.4249544143676758, 'ave_value': -0.6482285554008977, 'soft_opc': nan} step=5146




2022-04-22 02:40.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.49 [info     ] FQE_20220422024001: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00014675094420651356, 'time_algorithm_update': 0.0076946178114557845, 'loss': 0.009083120542100115, 'time_step': 0.00790323119565665, 'init_value': -0.44146037101745605, 'ave_value': -0.6847626810297706, 'soft_opc': nan} step=5312




2022-04-22 02:40.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.51 [info     ] FQE_20220422024001: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00015203205935926322, 'time_algorithm_update': 0.00838822629078325, 'loss': 0.009431002334430701, 'time_step': 0.008605661162410874, 'init_value': -0.40689265727996826, 'ave_value': -0.6513122191518411, 'soft_opc': nan} step=5478




2022-04-22 02:40.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.52 [info     ] FQE_20220422024001: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00014721341879971056, 'time_algorithm_update': 0.007785428001219968, 'loss': 0.010348409719355628, 'time_step': 0.007994539766426546, 'init_value': -0.4489971995353699, 'ave_value': -0.7082760160990261, 'soft_opc': nan} step=5644




2022-04-22 02:40.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.54 [info     ] FQE_20220422024001: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00014492977096373775, 'time_algorithm_update': 0.008309744926820318, 'loss': 0.010701614831776235, 'time_step': 0.008523780179310995, 'init_value': -0.4881919324398041, 'ave_value': -0.7396576085539015, 'soft_opc': nan} step=5810




2022-04-22 02:40.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.55 [info     ] FQE_20220422024001: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00014789133186799935, 'time_algorithm_update': 0.00781686621976186, 'loss': 0.011301047202684995, 'time_step': 0.008031227502478174, 'init_value': -0.5155197978019714, 'ave_value': -0.7697298472344473, 'soft_opc': nan} step=5976




2022-04-22 02:40.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.57 [info     ] FQE_20220422024001: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.000149357749755124, 'time_algorithm_update': 0.00838517424571945, 'loss': 0.012163370612066194, 'time_step': 0.008599894592560917, 'init_value': -0.4541645646095276, 'ave_value': -0.7304324371001876, 'soft_opc': nan} step=6142




2022-04-22 02:40.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:40.58 [info     ] FQE_20220422024001: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00015691676771784402, 'time_algorithm_update': 0.00850726466581046, 'loss': 0.012300042484266997, 'time_step': 0.00873234760330384, 'init_value': -0.4344941973686218, 'ave_value': -0.7149163178747168, 'soft_opc': nan} step=6308




2022-04-22 02:40.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:41.00 [info     ] FQE_20220422024001: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.0001488234623368964, 'time_algorithm_update': 0.00849583637283509, 'loss': 0.013379306675507182, 'time_step': 0.008712791534791509, 'init_value': -0.5247507095336914, 'ave_value': -0.8102920776199516, 'soft_opc': nan} step=6474




2022-04-22 02:41.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:41.01 [info     ] FQE_20220422024001: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00014501020132777202, 'time_algorithm_update': 0.008139123399573636, 'loss': 0.014093329800722044, 'time_step': 0.00834779997906053, 'init_value': -0.5010799765586853, 'ave_value': -0.7752889196111544, 'soft_opc': nan} step=6640




2022-04-22 02:41.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:41.03 [info     ] FQE_20220422024001: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00014612186385924556, 'time_algorithm_update': 0.008002220866191819, 'loss': 0.01427325832339127, 'time_step': 0.008216822003743735, 'init_value': -0.5453301668167114, 'ave_value': -0.8081517460160293, 'soft_opc': nan} step=6806




2022-04-22 02:41.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:41.04 [info     ] FQE_20220422024001: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.0001438525785882789, 'time_algorithm_update': 0.00837280664099268, 'loss': 0.01490711912488935, 'time_step': 0.008580710514482245, 'init_value': -0.5845183730125427, 'ave_value': -0.8789049135772763, 'soft_opc': nan} step=6972




2022-04-22 02:41.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:41.06 [info     ] FQE_20220422024001: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.0001493089170341032, 'time_algorithm_update': 0.007967355739639467, 'loss': 0.016232356679813757, 'time_step': 0.008186356130852756, 'init_value': -0.5165156722068787, 'ave_value': -0.8065710431954882, 'soft_opc': nan} step=7138




2022-04-22 02:41.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:41.07 [info     ] FQE_20220422024001: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00014541666191744516, 'time_algorithm_update': 0.008344934647341809, 'loss': 0.016713329659233497, 'time_step': 0.008554277649844986, 'init_value': -0.5602967739105225, 'ave_value': -0.8458382303767779, 'soft_opc': nan} step=7304




2022-04-22 02:41.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:41.09 [info     ] FQE_20220422024001: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00014970532382827206, 'time_algorithm_update': 0.007535525115139513, 'loss': 0.01789248205583924, 'time_step': 0.007751332708151944, 'init_value': -0.537451982498169, 'ave_value': -0.8293175286957407, 'soft_opc': nan} step=7470




2022-04-22 02:41.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:41.10 [info     ] FQE_20220422024001: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00014959329582122434, 'time_algorithm_update': 0.008467823626047158, 'loss': 0.018322760422282057, 'time_step': 0.008689027234732387, 'init_value': -0.571698784828186, 'ave_value': -0.8660504911413736, 'soft_opc': nan} step=7636




2022-04-22 02:41.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:41.12 [info     ] FQE_20220422024001: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00014662742614746094, 'time_algorithm_update': 0.008350108043257013, 'loss': 0.01950048418618817, 'time_step': 0.008564591407775879, 'init_value': -0.688391923904419, 'ave_value': -1.0038630566366755, 'soft_opc': nan} step=7802




2022-04-22 02:41.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:41.13 [info     ] FQE_20220422024001: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00014265330441026803, 'time_algorithm_update': 0.008308328777910715, 'loss': 0.021031869642355153, 'time_step': 0.008515971252717167, 'init_value': -0.6493215560913086, 'ave_value': -0.9533430739003863, 'soft_opc': nan} step=7968




2022-04-22 02:41.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:41.15 [info     ] FQE_20220422024001: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00014457645186458724, 'time_algorithm_update': 0.007815007703850069, 'loss': 0.02143152306453852, 'time_step': 0.008028130933462855, 'init_value': -0.6992759108543396, 'ave_value': -0.9918265737425368, 'soft_opc': nan} step=8134




2022-04-22 02:41.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 02:41.16 [info     ] FQE_20220422024001: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.0001541792628276779, 'time_algorithm_update': 0.008338307759847986, 'loss': 0.022534563907783994, 'time_step': 0.008563893387116581, 'init_value': -0.6796519756317139, 'ave_value': -0.971617268853158, 'soft_opc': nan} step=8300




2022-04-22 02:41.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024001/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

[ 0.00000000e+00  7.95731469e+08  4.27108923e-02  1.24000047e-02
  1.42999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.34732792e-01  6.00000000e-01  3.37421461e-01]
Read chunk # 39 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.38489108e-01  4.94000047e-02
 -1.56000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -7.49080829e-02  7.04145269e-02]
Read chunk # 40 out of 4999
torch.Size([44400, 6])
2022-04-22 02:41.17 [debug    ] RoundIterator is selected.
2022-04-22 02:41.17 [info     ] Directory is created at d3rlpy_logs/FQE_20220422024117
2022-04-22 02:41.17 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 02:41.17 [debug    ] Building models...
2022-04-22 02:41.17 [debug    ] Models have been built.
2022-04-22 02:41.17 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422024117/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size':

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 02:41.20 [info     ] FQE_20220422024117: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00015138817387957905, 'time_algorithm_update': 0.008408801500187364, 'loss': 0.025165850943669157, 'time_step': 0.008625677851743476, 'init_value': -1.09965181350708, 'ave_value': -1.079133890824275, 'soft_opc': nan} step=344




2022-04-22 02:41.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:41.23 [info     ] FQE_20220422024117: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00015080529589985692, 'time_algorithm_update': 0.008080497730609983, 'loss': 0.023141739930589357, 'time_step': 0.008294357809909555, 'init_value': -1.965552806854248, 'ave_value': -1.9043864538943445, 'soft_opc': nan} step=688




2022-04-22 02:41.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:41.26 [info     ] FQE_20220422024117: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00015170768249866574, 'time_algorithm_update': 0.008324695187945699, 'loss': 0.025520903916574668, 'time_step': 0.008542928584786348, 'init_value': -3.0838403701782227, 'ave_value': -2.9530694309521364, 'soft_opc': nan} step=1032




2022-04-22 02:41.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:41.29 [info     ] FQE_20220422024117: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00015146926391956418, 'time_algorithm_update': 0.00798922707868177, 'loss': 0.02823252170238384, 'time_step': 0.008207580377889235, 'init_value': -4.011650085449219, 'ave_value': -3.802510391001229, 'soft_opc': nan} step=1376




2022-04-22 02:41.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:41.32 [info     ] FQE_20220422024117: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.0001524977905805721, 'time_algorithm_update': 0.00839529688968215, 'loss': 0.03562471392391206, 'time_step': 0.008613389591838038, 'init_value': -5.143738746643066, 'ave_value': -4.88288287208961, 'soft_opc': nan} step=1720




2022-04-22 02:41.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:41.35 [info     ] FQE_20220422024117: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00015424087990161984, 'time_algorithm_update': 0.008015106583750525, 'loss': 0.04208271421741174, 'time_step': 0.008235389410063278, 'init_value': -5.838093280792236, 'ave_value': -5.581110008369695, 'soft_opc': nan} step=2064




2022-04-22 02:41.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:41.39 [info     ] FQE_20220422024117: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00015118718147277832, 'time_algorithm_update': 0.008317379064338153, 'loss': 0.050823229278416135, 'time_step': 0.008535386517990467, 'init_value': -6.748171806335449, 'ave_value': -6.579087148969238, 'soft_opc': nan} step=2408




2022-04-22 02:41.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:41.42 [info     ] FQE_20220422024117: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00016193542369576387, 'time_algorithm_update': 0.008121058691379636, 'loss': 0.06053335825252065, 'time_step': 0.00834955379020336, 'init_value': -7.173155307769775, 'ave_value': -7.18994159325286, 'soft_opc': nan} step=2752




2022-04-22 02:41.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:41.45 [info     ] FQE_20220422024117: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.0001534022564111754, 'time_algorithm_update': 0.00838686976321908, 'loss': 0.07154466604900568, 'time_step': 0.008609829253928607, 'init_value': -7.595808029174805, 'ave_value': -7.842075367634361, 'soft_opc': nan} step=3096




2022-04-22 02:41.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:41.48 [info     ] FQE_20220422024117: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.0001544536546219227, 'time_algorithm_update': 0.008261437332907389, 'loss': 0.08610529987802079, 'time_step': 0.008482982252919397, 'init_value': -8.196258544921875, 'ave_value': -8.837099655818294, 'soft_opc': nan} step=3440




2022-04-22 02:41.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:41.51 [info     ] FQE_20220422024117: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00015156560165937558, 'time_algorithm_update': 0.008201050203900004, 'loss': 0.09711129177188457, 'time_step': 0.008419223303018614, 'init_value': -8.40228271484375, 'ave_value': -9.374619164821263, 'soft_opc': nan} step=3784




2022-04-22 02:41.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:41.54 [info     ] FQE_20220422024117: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00015091896057128906, 'time_algorithm_update': 0.008311603651490323, 'loss': 0.11375971397298366, 'time_step': 0.00852621086808138, 'init_value': -8.938454627990723, 'ave_value': -10.468670813269444, 'soft_opc': nan} step=4128




2022-04-22 02:41.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:41.57 [info     ] FQE_20220422024117: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00015440375305885492, 'time_algorithm_update': 0.008153631936672122, 'loss': 0.12448278842710478, 'time_step': 0.00837721588999726, 'init_value': -9.19588851928711, 'ave_value': -11.083783672656025, 'soft_opc': nan} step=4472




2022-04-22 02:41.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:42.00 [info     ] FQE_20220422024117: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001545617746752362, 'time_algorithm_update': 0.008334859166034433, 'loss': 0.13725080527906675, 'time_step': 0.008558833321859671, 'init_value': -9.601637840270996, 'ave_value': -12.025761957721667, 'soft_opc': nan} step=4816




2022-04-22 02:42.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:42.03 [info     ] FQE_20220422024117: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00015271818915078806, 'time_algorithm_update': 0.008072415756624799, 'loss': 0.1476357511818669, 'time_step': 0.00829226956811062, 'init_value': -9.687870025634766, 'ave_value': -12.440096666093345, 'soft_opc': nan} step=5160




2022-04-22 02:42.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:42.07 [info     ] FQE_20220422024117: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00015477039093195007, 'time_algorithm_update': 0.008391428132389868, 'loss': 0.16074140748377283, 'time_step': 0.008615659419880357, 'init_value': -10.398346900939941, 'ave_value': -13.521674872545509, 'soft_opc': nan} step=5504




2022-04-22 02:42.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:42.10 [info     ] FQE_20220422024117: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00014946419139241063, 'time_algorithm_update': 0.00804750170818595, 'loss': 0.17333828730963516, 'time_step': 0.008267596017482668, 'init_value': -10.576899528503418, 'ave_value': -14.187650626360833, 'soft_opc': nan} step=5848




2022-04-22 02:42.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:42.13 [info     ] FQE_20220422024117: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00015174095020737758, 'time_algorithm_update': 0.008382634368053702, 'loss': 0.18456379666404668, 'time_step': 0.008601370245911354, 'init_value': -10.677765846252441, 'ave_value': -14.808250235866856, 'soft_opc': nan} step=6192




2022-04-22 02:42.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:42.16 [info     ] FQE_20220422024117: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.0001529122507849405, 'time_algorithm_update': 0.008078493351160093, 'loss': 0.20219118484068488, 'time_step': 0.008300063915030902, 'init_value': -10.847488403320312, 'ave_value': -15.524971754298553, 'soft_opc': nan} step=6536




2022-04-22 02:42.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:42.19 [info     ] FQE_20220422024117: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00016973392907963244, 'time_algorithm_update': 0.009061817512955777, 'loss': 0.2096041504748512, 'time_step': 0.00930200967677804, 'init_value': -10.994857788085938, 'ave_value': -16.086284282051764, 'soft_opc': nan} step=6880




2022-04-22 02:42.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:42.22 [info     ] FQE_20220422024117: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00015123638995858125, 'time_algorithm_update': 0.008116156555885492, 'loss': 0.22912957458107105, 'time_step': 0.008332736963449522, 'init_value': -11.55125904083252, 'ave_value': -17.205609432587753, 'soft_opc': nan} step=7224




2022-04-22 02:42.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:42.26 [info     ] FQE_20220422024117: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.0001546678154967552, 'time_algorithm_update': 0.008286627226097638, 'loss': 0.23927336179131511, 'time_step': 0.008507018172463705, 'init_value': -11.414273262023926, 'ave_value': -17.489465004098307, 'soft_opc': nan} step=7568




2022-04-22 02:42.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:42.29 [info     ] FQE_20220422024117: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.000151690355567045, 'time_algorithm_update': 0.008011181687199793, 'loss': 0.25037170055885477, 'time_step': 0.008228597945945208, 'init_value': -11.522310256958008, 'ave_value': -18.11220150941664, 'soft_opc': nan} step=7912




2022-04-22 02:42.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:42.32 [info     ] FQE_20220422024117: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00015188095181487327, 'time_algorithm_update': 0.008281025082566017, 'loss': 0.25953040938001387, 'time_step': 0.008500738892444345, 'init_value': -11.738543510437012, 'ave_value': -18.743680342521753, 'soft_opc': nan} step=8256




2022-04-22 02:42.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:42.35 [info     ] FQE_20220422024117: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00015092173288034838, 'time_algorithm_update': 0.008041137179663016, 'loss': 0.2733673017558663, 'time_step': 0.008257182531578595, 'init_value': -11.736417770385742, 'ave_value': -19.152493799377133, 'soft_opc': nan} step=8600




2022-04-22 02:42.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:42.38 [info     ] FQE_20220422024117: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.0001571663590364678, 'time_algorithm_update': 0.008394690447075422, 'loss': 0.2803124646753679, 'time_step': 0.008619595405667327, 'init_value': -11.920182228088379, 'ave_value': -19.8478301372077, 'soft_opc': nan} step=8944




2022-04-22 02:42.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:42.41 [info     ] FQE_20220422024117: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.0001502023186794547, 'time_algorithm_update': 0.00815259717231573, 'loss': 0.2911113566870606, 'time_step': 0.00836785865384479, 'init_value': -11.723564147949219, 'ave_value': -20.06801714561544, 'soft_opc': nan} step=9288




2022-04-22 02:42.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:42.44 [info     ] FQE_20220422024117: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00015163629554038825, 'time_algorithm_update': 0.008186474095943362, 'loss': 0.3046016074922802, 'time_step': 0.008405861466429954, 'init_value': -12.256771087646484, 'ave_value': -21.035849991575017, 'soft_opc': nan} step=9632




2022-04-22 02:42.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:42.47 [info     ] FQE_20220422024117: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00015476484631383143, 'time_algorithm_update': 0.008299402026242988, 'loss': 0.31737985367193644, 'time_step': 0.008520483277564826, 'init_value': -12.456207275390625, 'ave_value': -21.510727001713203, 'soft_opc': nan} step=9976




2022-04-22 02:42.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:42.50 [info     ] FQE_20220422024117: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.000151543423186901, 'time_algorithm_update': 0.00805280513541643, 'loss': 0.32729152118960436, 'time_step': 0.008270383574241815, 'init_value': -12.753847122192383, 'ave_value': -22.288653540826058, 'soft_opc': nan} step=10320




2022-04-22 02:42.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:42.53 [info     ] FQE_20220422024117: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00015102222908374875, 'time_algorithm_update': 0.008309071840241898, 'loss': 0.3488707430119258, 'time_step': 0.008527337811713996, 'init_value': -13.10105037689209, 'ave_value': -22.819213233659934, 'soft_opc': nan} step=10664




2022-04-22 02:42.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:42.57 [info     ] FQE_20220422024117: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00014866230099700218, 'time_algorithm_update': 0.008041392925173738, 'loss': 0.35081034594453697, 'time_step': 0.008256925399913344, 'init_value': -12.952827453613281, 'ave_value': -23.137644471510036, 'soft_opc': nan} step=11008




2022-04-22 02:42.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:43.00 [info     ] FQE_20220422024117: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.0001502411310062852, 'time_algorithm_update': 0.008257949768110763, 'loss': 0.3659886724618805, 'time_step': 0.008478818937789562, 'init_value': -12.933293342590332, 'ave_value': -23.513776627317206, 'soft_opc': nan} step=11352




2022-04-22 02:43.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:43.03 [info     ] FQE_20220422024117: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00014983290849730026, 'time_algorithm_update': 0.007991759583007458, 'loss': 0.3668151979583727, 'time_step': 0.00820898593858231, 'init_value': -13.03760814666748, 'ave_value': -23.97955140074094, 'soft_opc': nan} step=11696




2022-04-22 02:43.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:43.06 [info     ] FQE_20220422024117: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00015130569768506428, 'time_algorithm_update': 0.0083008186761723, 'loss': 0.3857284394352762, 'time_step': 0.008515940156093863, 'init_value': -13.583023071289062, 'ave_value': -24.73289193792923, 'soft_opc': nan} step=12040




2022-04-22 02:43.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:43.09 [info     ] FQE_20220422024117: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.000149381022120631, 'time_algorithm_update': 0.008044434148211812, 'loss': 0.3935018063057214, 'time_step': 0.008260144050731215, 'init_value': -14.036688804626465, 'ave_value': -25.399055946437088, 'soft_opc': nan} step=12384




2022-04-22 02:43.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:43.12 [info     ] FQE_20220422024117: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00015423880066982534, 'time_algorithm_update': 0.008331246847330137, 'loss': 0.39244532965253603, 'time_step': 0.008552455624868704, 'init_value': -13.579052925109863, 'ave_value': -25.257120144931047, 'soft_opc': nan} step=12728




2022-04-22 02:43.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:43.15 [info     ] FQE_20220422024117: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.0001518837241239326, 'time_algorithm_update': 0.008051644924075105, 'loss': 0.3988171396127274, 'time_step': 0.008275172738141792, 'init_value': -13.912660598754883, 'ave_value': -25.69627421237864, 'soft_opc': nan} step=13072




2022-04-22 02:43.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:43.18 [info     ] FQE_20220422024117: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00015184283256530762, 'time_algorithm_update': 0.008305707643198412, 'loss': 0.40302627323585194, 'time_step': 0.008528779412424841, 'init_value': -14.486839294433594, 'ave_value': -26.449699079050674, 'soft_opc': nan} step=13416




2022-04-22 02:43.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:43.21 [info     ] FQE_20220422024117: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00015020370483398438, 'time_algorithm_update': 0.00807371319726456, 'loss': 0.41142038634956574, 'time_step': 0.008291595897009207, 'init_value': -14.487824440002441, 'ave_value': -26.686055121819177, 'soft_opc': nan} step=13760




2022-04-22 02:43.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:43.25 [info     ] FQE_20220422024117: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00015035964721857116, 'time_algorithm_update': 0.00835885488709738, 'loss': 0.40446849698254966, 'time_step': 0.008577130561651186, 'init_value': -14.391902923583984, 'ave_value': -26.757938255946915, 'soft_opc': nan} step=14104




2022-04-22 02:43.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:43.28 [info     ] FQE_20220422024117: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.0001469996086386747, 'time_algorithm_update': 0.008139103651046753, 'loss': 0.4150328318030725, 'time_step': 0.008351484703463178, 'init_value': -14.34018325805664, 'ave_value': -26.753049410329208, 'soft_opc': nan} step=14448




2022-04-22 02:43.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:43.31 [info     ] FQE_20220422024117: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00015156698781390522, 'time_algorithm_update': 0.00825070017992064, 'loss': 0.4099842782841649, 'time_step': 0.008467725543088691, 'init_value': -14.071390151977539, 'ave_value': -26.730829462972846, 'soft_opc': nan} step=14792




2022-04-22 02:43.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:43.34 [info     ] FQE_20220422024117: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00015591604765071424, 'time_algorithm_update': 0.008384626965190089, 'loss': 0.4241720176069066, 'time_step': 0.008609457764514657, 'init_value': -14.875377655029297, 'ave_value': -27.36865050824913, 'soft_opc': nan} step=15136




2022-04-22 02:43.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:43.37 [info     ] FQE_20220422024117: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00015455415082532307, 'time_algorithm_update': 0.008655733147332834, 'loss': 0.4380116580097481, 'time_step': 0.00887959502464117, 'init_value': -14.869843482971191, 'ave_value': -27.33961369647636, 'soft_opc': nan} step=15480




2022-04-22 02:43.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:43.41 [info     ] FQE_20220422024117: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00015180540639300679, 'time_algorithm_update': 0.009655782649683397, 'loss': 0.4389908600945112, 'time_step': 0.009873103263766267, 'init_value': -15.177972793579102, 'ave_value': -27.519926512724645, 'soft_opc': nan} step=15824




2022-04-22 02:43.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:43.44 [info     ] FQE_20220422024117: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00015541425971097724, 'time_algorithm_update': 0.00962378327236619, 'loss': 0.456738059996454, 'time_step': 0.00984697702319123, 'init_value': -15.392570495605469, 'ave_value': -27.648791910694527, 'soft_opc': nan} step=16168




2022-04-22 02:43.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:43.48 [info     ] FQE_20220422024117: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.000153522158777991, 'time_algorithm_update': 0.009697048470031383, 'loss': 0.475710847017052, 'time_step': 0.009917528823364613, 'init_value': -15.520513534545898, 'ave_value': -27.795200605515962, 'soft_opc': nan} step=16512




2022-04-22 02:43.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:43.52 [info     ] FQE_20220422024117: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00015223303506540698, 'time_algorithm_update': 0.0092548644819925, 'loss': 0.4937766118754828, 'time_step': 0.009476133557253106, 'init_value': -15.974313735961914, 'ave_value': -28.089547144614897, 'soft_opc': nan} step=16856




2022-04-22 02:43.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 02:43.55 [info     ] FQE_20220422024117: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.0001554288143335387, 'time_algorithm_update': 0.009739053803820943, 'loss': 0.5020636246122723, 'time_step': 0.009966448989025382, 'init_value': -15.9827880859375, 'ave_value': -28.03112192398256, 'soft_opc': nan} step=17200




2022-04-22 02:43.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422024117/model_17200.pt
search iteration:  14
using hyper params:  [0.004505295358483005, 0.002713860905839413, 1.1921663106646839e-05, 5]
2022-04-22 02:43.55 [debug    ] RoundIterator is selected.
2022-04-22 02:43.55 [info     ] Directory is created at d3rlpy_logs/CQL_20220422024355
2022-04-22 02:43.55 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 02:43.55 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 02:43.55 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422024355/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.004505295358483005, 'actor_optim_factory': {'opti

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:44.17 [info     ] CQL_20220422024355: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.00035814742821489454, 'time_algorithm_update': 0.05824650436467518, 'temp_loss': 4.960777279269489, 'temp': 0.9978677325510565, 'alpha_loss': -17.70839045640361, 'alpha': 1.0177041695297109, 'critic_loss': 119.04318076337692, 'actor_loss': 1.9847977770374001, 'time_step': 0.058695113038741094, 'td_error': 1.2848695633054603, 'init_value': -6.140929222106934, 'ave_value': -5.651770291906671} step=346
2022-04-22 02:44.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:44.38 [info     ] CQL_20220422024355: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.00036154523750261074, 'time_algorithm_update': 0.059291013403434976, 'temp_loss': 5.014692514618008, 'temp': 0.9937054955890413, 'alpha_loss': -18.371284931381314, 'alpha': 1.0541730295715994, 'critic_loss': 135.3710920300787, 'actor_loss': 7.822839313848859, 'time_step': 0.059745712776404586, 'td_error': 1.4094760762471286, 'init_value': -11.238969802856445, 'ave_value': -10.582992648128764} step=692
2022-04-22 02:44.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:44.59 [info     ] CQL_20220422024355: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.00036416370744650077, 'time_algorithm_update': 0.058492035535029595, 'temp_loss': 4.994448616325511, 'temp': 0.9896078125245309, 'alpha_loss': -19.052768911240417, 'alpha': 1.0924577985195756, 'critic_loss': 246.9084688307922, 'actor_loss': 12.55124403010903, 'time_step': 0.05894861676100362, 'td_error': 1.524040939646475, 'init_value': -15.005971908569336, 'ave_value': -14.109073189368685} step=1038
2022-04-22 02:44.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:45.20 [info     ] CQL_20220422024355: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.00036962390635054927, 'time_algorithm_update': 0.05576022167426313, 'temp_loss': 4.974033867003601, 'temp': 0.9855427418141006, 'alpha_loss': -19.73971393893909, 'alpha': 1.132641695827418, 'critic_loss': 405.48698689896247, 'actor_loss': 15.901513124476967, 'time_step': 0.05622142174340397, 'td_error': 1.5908766406982748, 'init_value': -16.851282119750977, 'ave_value': -15.950783547093485} step=1384
2022-04-22 02:45.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:45.40 [info     ] CQL_20220422024355: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.00037189370634927913, 'time_algorithm_update': 0.05611794601286078, 'temp_loss': 4.95441230184081, 'temp': 0.9815038177319345, 'alpha_loss': -20.467395021736277, 'alpha': 1.1747365115005846, 'critic_loss': 608.2701890537504, 'actor_loss': 17.086376096471884, 'time_step': 0.05658413082188954, 'td_error': 1.6162091093783424, 'init_value': -17.603713989257812, 'ave_value': -16.836509621974244} step=1730
2022-04-22 02:45.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:46.00 [info     ] CQL_20220422024355: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.0003585229719305314, 'time_algorithm_update': 0.0547339606147281, 'temp_loss': 4.934048739471876, 'temp': 0.9774881237848646, 'alpha_loss': -21.221293091084917, 'alpha': 1.2187844928289424, 'critic_loss': 857.8833586411669, 'actor_loss': 15.754500386342837, 'time_step': 0.0551844015286837, 'td_error': 1.4826945503004965, 'init_value': -14.875661849975586, 'ave_value': -14.340417108992481} step=2076
2022-04-22 02:46.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:46.20 [info     ] CQL_20220422024355: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.00036186978996144556, 'time_algorithm_update': 0.055246346258703685, 'temp_loss': 4.915863609038336, 'temp': 0.9734937057674276, 'alpha_loss': -22.0124392757526, 'alpha': 1.2648298888537235, 'critic_loss': 1153.3269870295, 'actor_loss': 11.583913877520258, 'time_step': 0.05569642058686714, 'td_error': 1.3600948169634366, 'init_value': -10.173666954040527, 'ave_value': -9.874728145323528} step=2422
2022-04-22 02:46.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:46.40 [info     ] CQL_20220422024355: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.0003586959287610357, 'time_algorithm_update': 0.055302135517142414, 'temp_loss': 4.894967405782269, 'temp': 0.9695193123266187, 'alpha_loss': -22.83710490210208, 'alpha': 1.3129040740128886, 'critic_loss': 1498.7129290806765, 'actor_loss': 6.416858554575485, 'time_step': 0.05575245653273742, 'td_error': 1.3025637345845589, 'init_value': -6.440319061279297, 'ave_value': -6.330064435681216} step=2768
2022-04-22 02:46.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:47.01 [info     ] CQL_20220422024355: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.00037174693421821375, 'time_algorithm_update': 0.056497979026309327, 'temp_loss': 4.875775040918692, 'temp': 0.9655639617084768, 'alpha_loss': -23.70331023883268, 'alpha': 1.3630615707077731, 'critic_loss': 1849.8307191705428, 'actor_loss': 4.170127183715732, 'time_step': 0.056962729878508284, 'td_error': 1.297565424014172, 'init_value': -5.677492618560791, 'ave_value': -5.625618952764273} step=3114
2022-04-22 02:47.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:47.23 [info     ] CQL_20220422024355: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.0004117392391138683, 'time_algorithm_update': 0.0604446747399479, 'temp_loss': 4.857065473677795, 'temp': 0.9616272294452425, 'alpha_loss': -24.61543316923814, 'alpha': 1.4153645720785064, 'critic_loss': 2179.5620529968614, 'actor_loss': 3.7558379138825257, 'time_step': 0.060953362828734294, 'td_error': 1.2990035479696977, 'init_value': -5.58980655670166, 'ave_value': -5.551219711376062} step=3460
2022-04-22 02:47.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:47.45 [info     ] CQL_20220422024355: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.00041978758883614073, 'time_algorithm_update': 0.061414189421372604, 'temp_loss': 4.836901674380881, 'temp': 0.9577078922635558, 'alpha_loss': -25.56186048695118, 'alpha': 1.4698626933070276, 'critic_loss': 2507.156784145818, 'actor_loss': 3.76148809380614, 'time_step': 0.06193454968446941, 'td_error': 1.3022489046905141, 'init_value': -5.649499893188477, 'ave_value': -5.615949957808255} step=3806
2022-04-22 02:47.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:48.08 [info     ] CQL_20220422024355: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.0004169741117885347, 'time_algorithm_update': 0.06105612399261122, 'temp_loss': 4.8163324380885655, 'temp': 0.9538070161563124, 'alpha_loss': -26.550373000216624, 'alpha': 1.5266088974269139, 'critic_loss': 2849.4834746432443, 'actor_loss': 3.8567119223534028, 'time_step': 0.06157583790707451, 'td_error': 1.305808976801588, 'init_value': -5.771465301513672, 'ave_value': -5.7467507858421065} step=4152
2022-04-22 02:48.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:48.30 [info     ] CQL_20220422024355: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.0004066883484063121, 'time_algorithm_update': 0.061114083824819226, 'temp_loss': 4.798510641031871, 'temp': 0.9499231335744692, 'alpha_loss': -27.571460106469303, 'alpha': 1.5856598391698276, 'critic_loss': 3194.4112139575054, 'actor_loss': 4.021739338863791, 'time_step': 0.06162178240759524, 'td_error': 1.311102109054449, 'init_value': -6.013697147369385, 'ave_value': -5.993852505744945} step=4498
2022-04-22 02:48.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:48.52 [info     ] CQL_20220422024355: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.00041430327244576694, 'time_algorithm_update': 0.0608137191375556, 'temp_loss': 4.7780987458422, 'temp': 0.9460554021286827, 'alpha_loss': -28.64450759556941, 'alpha': 1.6470961711999308, 'critic_loss': 3568.3075191078847, 'actor_loss': 4.245907977826334, 'time_step': 0.0613324270083036, 'td_error': 1.315945802607681, 'init_value': -6.235271453857422, 'ave_value': -6.21867720005692} step=4844
2022-04-22 02:48.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:49.14 [info     ] CQL_20220422024355: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.00041412273583384606, 'time_algorithm_update': 0.06051258131258749, 'temp_loss': 4.758707555043215, 'temp': 0.9422044194158102, 'alpha_loss': -29.751898335583636, 'alpha': 1.710988161191775, 'critic_loss': 3958.4960514134755, 'actor_loss': 4.4845651725813145, 'time_step': 0.06103124025929181, 'td_error': 1.32268233051477, 'init_value': -6.599020957946777, 'ave_value': -6.581445432783247} step=5190
2022-04-22 02:49.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:49.36 [info     ] CQL_20220422024355: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.00041112872217431923, 'time_algorithm_update': 0.06004296424071913, 'temp_loss': 4.7407301946871545, 'temp': 0.9383694112645409, 'alpha_loss': -30.906965525853153, 'alpha': 1.7774133110322015, 'critic_loss': 4365.857733048456, 'actor_loss': 4.830644998936295, 'time_step': 0.06055724896447507, 'td_error': 1.3281090509817448, 'init_value': -6.822049617767334, 'ave_value': -6.810919152282889} step=5536
2022-04-22 02:49.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:49.57 [info     ] CQL_20220422024355: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.00041547331506806303, 'time_algorithm_update': 0.05964131437974169, 'temp_loss': 4.719107669212915, 'temp': 0.9345505769886723, 'alpha_loss': -32.106694800316255, 'alpha': 1.8464646680506667, 'critic_loss': 4802.099215645321, 'actor_loss': 5.164297462198776, 'time_step': 0.060162405058138635, 'td_error': 1.334556027073964, 'init_value': -7.1176934242248535, 'ave_value': -7.108737994755202} step=5882
2022-04-22 02:49.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:50.20 [info     ] CQL_20220422024355: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.0004256054156088416, 'time_algorithm_update': 0.06043497882137409, 'temp_loss': 4.7007791582559575, 'temp': 0.9307482037585595, 'alpha_loss': -33.35756601763598, 'alpha': 1.9182365519462983, 'critic_loss': 5257.107224304552, 'actor_loss': 5.556283119785992, 'time_step': 0.06096617472654133, 'td_error': 1.3435103302778086, 'init_value': -7.552114486694336, 'ave_value': -7.54398850304869} step=6228
2022-04-22 02:50.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:50.42 [info     ] CQL_20220422024355: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.0004121650850152694, 'time_algorithm_update': 0.06175671423101701, 'temp_loss': 4.681406197520349, 'temp': 0.9269616462591755, 'alpha_loss': -34.65475912590247, 'alpha': 1.992828558979696, 'critic_loss': 5716.922992684248, 'actor_loss': 5.9537825350127465, 'time_step': 0.0622732618640613, 'td_error': 1.3540500662683808, 'init_value': -8.056537628173828, 'ave_value': -8.048340874930139} step=6574
2022-04-22 02:50.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:51.05 [info     ] CQL_20220422024355: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.00042190854949069164, 'time_algorithm_update': 0.062092673571812626, 'temp_loss': 4.66282373770124, 'temp': 0.9231900078368325, 'alpha_loss': -35.99980894540776, 'alpha': 2.0703387556737556, 'critic_loss': 6203.7691855017165, 'actor_loss': 6.377794629576578, 'time_step': 0.06262294405457601, 'td_error': 1.3637592419327487, 'init_value': -8.477005958557129, 'ave_value': -8.469769918912958} step=6920
2022-04-22 02:51.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:51.27 [info     ] CQL_20220422024355: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.0004185010932084453, 'time_algorithm_update': 0.06258980318300986, 'temp_loss': 4.642905317979052, 'temp': 0.9194339660206282, 'alpha_loss': -37.40897607527716, 'alpha': 2.1508863551079194, 'critic_loss': 6697.498195052836, 'actor_loss': 6.842697237268348, 'time_step': 0.06311447289637748, 'td_error': 1.373257909672201, 'init_value': -8.869702339172363, 'ave_value': -8.863593610399082} step=7266
2022-04-22 02:51.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:51.50 [info     ] CQL_20220422024355: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.0004186278822794126, 'time_algorithm_update': 0.06223246816954861, 'temp_loss': 4.624418209053878, 'temp': 0.9156938467411636, 'alpha_loss': -38.85935382071258, 'alpha': 2.2345828265813044, 'critic_loss': 7220.664770931177, 'actor_loss': 7.336235426753932, 'time_step': 0.06276001819985451, 'td_error': 1.3869252892769144, 'init_value': -9.45068073272705, 'ave_value': -9.444467572229854} step=7612
2022-04-22 02:51.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:52.13 [info     ] CQL_20220422024355: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.00041523351834688575, 'time_algorithm_update': 0.06253086486992808, 'temp_loss': 4.606000437212817, 'temp': 0.9119682625539041, 'alpha_loss': -40.37308656549178, 'alpha': 2.321541684211334, 'critic_loss': 7738.1236212405165, 'actor_loss': 7.80583943934799, 'time_step': 0.0630553561138969, 'td_error': 1.3958660511019017, 'init_value': -9.78689956665039, 'ave_value': -9.782732871766902} step=7958
2022-04-22 02:52.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:52.36 [info     ] CQL_20220422024355: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.0004100303429399612, 'time_algorithm_update': 0.06260823927862796, 'temp_loss': 4.5866108174957985, 'temp': 0.9082583831569363, 'alpha_loss': -41.94317585057606, 'alpha': 2.4118879206607797, 'critic_loss': 8249.416310569455, 'actor_loss': 8.305187086149447, 'time_step': 0.06312577503954055, 'td_error': 1.4110084782384404, 'init_value': -10.377880096435547, 'ave_value': -10.373685619739517} step=8304
2022-04-22 02:52.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:52.58 [info     ] CQL_20220422024355: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.0004166619626083815, 'time_algorithm_update': 0.06214816377342092, 'temp_loss': 4.567614350015718, 'temp': 0.9045633964800421, 'alpha_loss': -43.57358552127904, 'alpha': 2.5057551536945937, 'critic_loss': 8787.447717214596, 'actor_loss': 8.843466778022016, 'time_step': 0.06266921379662663, 'td_error': 1.4204200049429772, 'init_value': -10.707101821899414, 'ave_value': -10.705514701094431} step=8650
2022-04-22 02:52.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:53.21 [info     ] CQL_20220422024355: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.0003998789484101224, 'time_algorithm_update': 0.0614220834191824, 'temp_loss': 4.550411947889824, 'temp': 0.9008832961493145, 'alpha_loss': -45.27015940715812, 'alpha': 2.6032749696963093, 'critic_loss': 9357.320219359646, 'actor_loss': 9.345263304737951, 'time_step': 0.06192510320961131, 'td_error': 1.439307319841744, 'init_value': -11.404696464538574, 'ave_value': -11.401632684279868} step=8996
2022-04-22 02:53.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:53.43 [info     ] CQL_20220422024355: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.0004137699314624588, 'time_algorithm_update': 0.06261788007151874, 'temp_loss': 4.531521156344111, 'temp': 0.8972178950819666, 'alpha_loss': -47.03236519256768, 'alpha': 2.704597184423766, 'critic_loss': 9929.763079163657, 'actor_loss': 9.867679852281691, 'time_step': 0.0631404743029203, 'td_error': 1.4533476785360582, 'init_value': -11.885096549987793, 'ave_value': -11.8824364269215} step=9342
2022-04-22 02:53.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:54.06 [info     ] CQL_20220422024355: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.0004220132882884472, 'time_algorithm_update': 0.062318984483707845, 'temp_loss': 4.513236314575107, 'temp': 0.8935677414340091, 'alpha_loss': -48.869201197100516, 'alpha': 2.8098743678517426, 'critic_loss': 10498.065220827311, 'actor_loss': 10.376990717959542, 'time_step': 0.06283443304844674, 'td_error': 1.4685421392862834, 'init_value': -12.389283180236816, 'ave_value': -12.387115323684858} step=9688
2022-04-22 02:54.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:54.29 [info     ] CQL_20220422024355: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.00041921634894575, 'time_algorithm_update': 0.06329699954545567, 'temp_loss': 4.495215188561147, 'temp': 0.8899323900655515, 'alpha_loss': -50.75185183707, 'alpha': 2.9192399854604907, 'critic_loss': 11086.97985628161, 'actor_loss': 10.91638340977575, 'time_step': 0.06381075506265453, 'td_error': 1.4818161008602933, 'init_value': -12.81125545501709, 'ave_value': -12.81017732145437} step=10034
2022-04-22 02:54.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:54.52 [info     ] CQL_20220422024355: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.0004231096003096917, 'time_algorithm_update': 0.06380509158779431, 'temp_loss': 4.4751669023767375, 'temp': 0.8863119879209926, 'alpha_loss': -52.73250012039449, 'alpha': 3.032849053426974, 'critic_loss': 11679.30815062771, 'actor_loss': 11.438468789778693, 'time_step': 0.06432394967602857, 'td_error': 1.5031923598579398, 'init_value': -13.49468994140625, 'ave_value': -13.492457718756482} step=10380
2022-04-22 02:54.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:55.16 [info     ] CQL_20220422024355: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.00041818894402829206, 'time_algorithm_update': 0.06414179650345289, 'temp_loss': 4.457983962373238, 'temp': 0.882707052492682, 'alpha_loss': -54.79722128851565, 'alpha': 3.150908401246705, 'critic_loss': 12129.254617503613, 'actor_loss': 11.916212966676392, 'time_step': 0.06465447086819334, 'td_error': 1.5158978144203892, 'init_value': -13.868795394897461, 'ave_value': -13.867232476890567} step=10726
2022-04-22 02:55.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:55.39 [info     ] CQL_20220422024355: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.000411884633102858, 'time_algorithm_update': 0.06448914618850443, 'temp_loss': 4.437641116235986, 'temp': 0.8791165641277512, 'alpha_loss': -56.915906652549786, 'alpha': 3.2735527953660557, 'critic_loss': 12631.954742255239, 'actor_loss': 12.420375377456576, 'time_step': 0.06499703564395794, 'td_error': 1.5339426875668238, 'init_value': -14.404012680053711, 'ave_value': -14.402579037778013} step=11072
2022-04-22 02:55.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:56.03 [info     ] CQL_20220422024355: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.0004103941724479543, 'time_algorithm_update': 0.06413431388105272, 'temp_loss': 4.422008260826155, 'temp': 0.8755404952633588, 'alpha_loss': -59.142544873187994, 'alpha': 3.4009806944455714, 'critic_loss': 12563.135897421424, 'actor_loss': 12.776031163386527, 'time_step': 0.0646393912376007, 'td_error': 1.5356658657001263, 'init_value': -14.427037239074707, 'ave_value': -14.429012412536535} step=11418
2022-04-22 02:56.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:56.26 [info     ] CQL_20220422024355: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.000413902233101729, 'time_algorithm_update': 0.06392404661013212, 'temp_loss': 4.404797489243435, 'temp': 0.8719779292627566, 'alpha_loss': -61.42759202946128, 'alpha': 3.5333508246206824, 'critic_loss': 11145.389374661308, 'actor_loss': 12.905685587425452, 'time_step': 0.06443528701804277, 'td_error': 1.5400251017876252, 'init_value': -14.535959243774414, 'ave_value': -14.539897019742167} step=11764
2022-04-22 02:56.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:56.49 [info     ] CQL_20220422024355: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.00043187596205342024, 'time_algorithm_update': 0.06381172182932066, 'temp_loss': 4.385139581095966, 'temp': 0.8684306036185667, 'alpha_loss': -63.83867775360284, 'alpha': 3.670893026914211, 'critic_loss': 9115.034275650289, 'actor_loss': 13.058987350133114, 'time_step': 0.06435074764869117, 'td_error': 1.5559519210104908, 'init_value': -15.004969596862793, 'ave_value': -15.006528410316927} step=12110
2022-04-22 02:56.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:57.12 [info     ] CQL_20220422024355: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.00041438458282823506, 'time_algorithm_update': 0.062397650900603716, 'temp_loss': 4.367297539132179, 'temp': 0.8648978936878932, 'alpha_loss': -66.31558192258625, 'alpha': 3.8137987666047377, 'critic_loss': 7683.293250993497, 'actor_loss': 13.391743833619046, 'time_step': 0.06292144549375324, 'td_error': 1.5674727140739704, 'init_value': -15.327951431274414, 'ave_value': -15.329422161487564} step=12456
2022-04-22 02:57.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:57.33 [info     ] CQL_20220422024355: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.00040809956589186124, 'time_algorithm_update': 0.05959842108577662, 'temp_loss': 4.349442680447088, 'temp': 0.8613799086884956, 'alpha_loss': -68.9101386318317, 'alpha': 3.9622699773380523, 'critic_loss': 6577.649439464415, 'actor_loss': 13.789537796395363, 'time_step': 0.06011358437510584, 'td_error': 1.5802929512406179, 'init_value': -15.668950080871582, 'ave_value': -15.67070817743901} step=12802
2022-04-22 02:57.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:57.54 [info     ] CQL_20220422024355: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.0004086122347440334, 'time_algorithm_update': 0.057552415511511655, 'temp_loss': 4.331581002715006, 'temp': 0.8578760160531612, 'alpha_loss': -71.58750445856525, 'alpha': 4.116533495787251, 'critic_loss': 5857.493992447164, 'actor_loss': 14.417347500089965, 'time_step': 0.05806562459537749, 'td_error': 1.624252109742079, 'init_value': -16.887645721435547, 'ave_value': -16.882656684539057} step=13148
2022-04-22 02:57.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:58.15 [info     ] CQL_20220422024355: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.00041038659266653775, 'time_algorithm_update': 0.057661044115276006, 'temp_loss': 4.314911033376793, 'temp': 0.8543860864088025, 'alpha_loss': -74.3707766009204, 'alpha': 4.276776477780645, 'critic_loss': 6172.242111294256, 'actor_loss': 15.638603249037196, 'time_step': 0.05817114824504521, 'td_error': 1.6575558519570375, 'init_value': -17.69941520690918, 'ave_value': -17.69643429176906} step=13494
2022-04-22 02:58.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:58.36 [info     ] CQL_20220422024355: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.00041825371670585145, 'time_algorithm_update': 0.05775068398844989, 'temp_loss': 4.297304085913421, 'temp': 0.850909555923043, 'alpha_loss': -77.26362087271806, 'alpha': 4.443242394166186, 'critic_loss': 6265.130203158869, 'actor_loss': 16.374530568977313, 'time_step': 0.05827125441821324, 'td_error': 1.6936480986746998, 'init_value': -18.570985794067383, 'ave_value': -18.56801832840306} step=13840
2022-04-22 02:58.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:58.57 [info     ] CQL_20220422024355: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.00041540234075116285, 'time_algorithm_update': 0.05794639807905076, 'temp_loss': 4.279350662507074, 'temp': 0.8474478992079034, 'alpha_loss': -80.27306515908654, 'alpha': 4.616211964215847, 'critic_loss': 6496.120821385025, 'actor_loss': 17.167118050459493, 'time_step': 0.0584628375279421, 'td_error': 1.722493703416355, 'init_value': -19.222267150878906, 'ave_value': -19.219718594141852} step=14186
2022-04-22 02:58.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:59.19 [info     ] CQL_20220422024355: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.0004258293636961479, 'time_algorithm_update': 0.05813292823085895, 'temp_loss': 4.262368946406194, 'temp': 0.8440002879999966, 'alpha_loss': -83.3933570509012, 'alpha': 4.795897603724044, 'critic_loss': 6781.702577447616, 'actor_loss': 17.936899306457168, 'time_step': 0.05866295133711975, 'td_error': 1.7577548582266713, 'init_value': -20.01433563232422, 'ave_value': -20.011059284345862} step=14532
2022-04-22 02:59.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 02:59.40 [info     ] CQL_20220422024355: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.0004274493697061704, 'time_algorithm_update': 0.05807112889482796, 'temp_loss': 4.245202267101045, 'temp': 0.8405667357362074, 'alpha_loss': -86.63488116843163, 'alpha': 4.982557974798831, 'critic_loss': 6856.862656080654, 'actor_loss': 18.558370198817613, 'time_step': 0.05860133598305586, 'td_error': 1.7825356716231784, 'init_value': -20.54090118408203, 'ave_value': -20.5395030224702} step=14878
2022-04-22 02:59.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:00.00 [info     ] CQL_20220422024355: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.0004189558800934367, 'time_algorithm_update': 0.0566104622934595, 'temp_loss': 4.228390227852529, 'temp': 0.8371468741080664, 'alpha_loss': -90.01783851667636, 'alpha': 5.1765126120837435, 'critic_loss': 7024.06320137509, 'actor_loss': 19.249624704349937, 'time_step': 0.0571335740172105, 'td_error': 1.8133240210709558, 'init_value': -21.185401916503906, 'ave_value': -21.184469610461132} step=15224
2022-04-22 03:00.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:00.21 [info     ] CQL_20220422024355: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.0004191005850113885, 'time_algorithm_update': 0.05642623019356259, 'temp_loss': 4.2104510475445345, 'temp': 0.8337410583316935, 'alpha_loss': -93.5223597708465, 'alpha': 5.378041166790648, 'critic_loss': 7162.449468535495, 'actor_loss': 19.927952997946324, 'time_step': 0.056945531354474194, 'td_error': 1.8491105337938576, 'init_value': -21.928133010864258, 'ave_value': -21.926087609802508} step=15570
2022-04-22 03:00.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:00.41 [info     ] CQL_20220422024355: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.00040851507572769427, 'time_algorithm_update': 0.055468037638361056, 'temp_loss': 4.1931893453432645, 'temp': 0.830349428633045, 'alpha_loss': -97.18305795041123, 'alpha': 5.5873919194833395, 'critic_loss': 7262.270613653812, 'actor_loss': 20.57113769839954, 'time_step': 0.05597538961840503, 'td_error': 1.8772924055365918, 'init_value': -22.482479095458984, 'ave_value': -22.481934510073202} step=15916
2022-04-22 03:00.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:01.02 [info     ] CQL_20220422024355: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.00041673018064113023, 'time_algorithm_update': 0.05690955013208995, 'temp_loss': 4.17745758894551, 'temp': 0.8269711853451811, 'alpha_loss': -100.92908962889214, 'alpha': 5.804880800963827, 'critic_loss': 6920.731491882677, 'actor_loss': 21.07193050494773, 'time_step': 0.05743036035857449, 'td_error': 1.9023638432551544, 'init_value': -22.964574813842773, 'ave_value': -22.964989214590116} step=16262
2022-04-22 03:01.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:01.23 [info     ] CQL_20220422024355: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.00042349548009089653, 'time_algorithm_update': 0.0582017037220773, 'temp_loss': 4.15940343851299, 'temp': 0.8236067772256157, 'alpha_loss': -104.86408460760393, 'alpha': 6.03081227727019, 'critic_loss': 6624.01369445448, 'actor_loss': 21.76254929559079, 'time_step': 0.05872511381358769, 'td_error': 1.9465630669897205, 'init_value': -23.825748443603516, 'ave_value': -23.82368067546499} step=16608
2022-04-22 03:01.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:01.45 [info     ] CQL_20220422024355: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.0004202520227156623, 'time_algorithm_update': 0.0600633779702159, 'temp_loss': 4.143095905381131, 'temp': 0.820256027699895, 'alpha_loss': -108.96567504530009, 'alpha': 6.265553263570532, 'critic_loss': 6808.634438222543, 'actor_loss': 22.496173654677552, 'time_step': 0.06058446382511558, 'td_error': 1.9750529826304724, 'init_value': -24.338590621948242, 'ave_value': -24.339031150749136} step=16954
2022-04-22 03:01.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:02.07 [info     ] CQL_20220422024355: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.000406745541302455, 'time_algorithm_update': 0.06011115471062633, 'temp_loss': 4.126335467906356, 'temp': 0.8169187953361886, 'alpha_loss': -113.19978658863575, 'alpha': 6.509472505205628, 'critic_loss': 6717.518749435513, 'actor_loss': 23.149805041406886, 'time_step': 0.060619766312527516, 'td_error': 2.0236320682712394, 'init_value': -25.242713928222656, 'ave_value': -25.240192947460045} step=17300
2022-04-22 03:02.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422024355/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.519

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 03:02.09 [info     ] FQE_20220422030207: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00017545596662774143, 'time_algorithm_update': 0.008856967271092427, 'loss': 0.006932012895290482, 'time_step': 0.009109047522027808, 'init_value': -0.10336567461490631, 'ave_value': -0.058750978353861213, 'soft_opc': nan} step=166




2022-04-22 03:02.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:02.10 [info     ] FQE_20220422030207: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.0001777726483632283, 'time_algorithm_update': 0.008494599755988064, 'loss': 0.004574900425701256, 'time_step': 0.008747981255313, 'init_value': -0.14384499192237854, 'ave_value': -0.07570344578843934, 'soft_opc': nan} step=332




2022-04-22 03:02.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:02.12 [info     ] FQE_20220422030207: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00017693961959287343, 'time_algorithm_update': 0.009553535875067654, 'loss': 0.00378215367658939, 'time_step': 0.009806717734739005, 'init_value': -0.15185882151126862, 'ave_value': -0.07920312043027701, 'soft_opc': nan} step=498




2022-04-22 03:02.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:02.14 [info     ] FQE_20220422030207: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00017872201391013273, 'time_algorithm_update': 0.009600080639482981, 'loss': 0.0032419211403691174, 'time_step': 0.009857651699020201, 'init_value': -0.18409289419651031, 'ave_value': -0.09155558974456948, 'soft_opc': nan} step=664




2022-04-22 03:02.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:02.15 [info     ] FQE_20220422030207: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00017618414867355163, 'time_algorithm_update': 0.009150044027581272, 'loss': 0.002789074609039284, 'time_step': 0.009403277592486646, 'init_value': -0.2024127095937729, 'ave_value': -0.10424241619855837, 'soft_opc': nan} step=830




2022-04-22 03:02.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:02.17 [info     ] FQE_20220422030207: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.000177178038171975, 'time_algorithm_update': 0.009641220770686507, 'loss': 0.0024143935157356403, 'time_step': 0.009893365653164416, 'init_value': -0.2432340383529663, 'ave_value': -0.1342374792247004, 'soft_opc': nan} step=996




2022-04-22 03:02.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:02.19 [info     ] FQE_20220422030207: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00018057047602642015, 'time_algorithm_update': 0.009213845413851451, 'loss': 0.002139759818035885, 'time_step': 0.009467020092240298, 'init_value': -0.31851720809936523, 'ave_value': -0.20393543970511036, 'soft_opc': nan} step=1162




2022-04-22 03:02.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:02.21 [info     ] FQE_20220422030207: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.000176131007183029, 'time_algorithm_update': 0.009583860994821572, 'loss': 0.0018377823044029794, 'time_step': 0.0098379275884973, 'init_value': -0.33496344089508057, 'ave_value': -0.21659131693393663, 'soft_opc': nan} step=1328




2022-04-22 03:02.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:02.22 [info     ] FQE_20220422030207: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00018159452691135635, 'time_algorithm_update': 0.009500961705862758, 'loss': 0.0015881957951933146, 'time_step': 0.009755739246506289, 'init_value': -0.3558387756347656, 'ave_value': -0.232616032532475, 'soft_opc': nan} step=1494




2022-04-22 03:02.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:02.24 [info     ] FQE_20220422030207: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.0001801654516932476, 'time_algorithm_update': 0.009515351559742388, 'loss': 0.00165604138662702, 'time_step': 0.009771059794598315, 'init_value': -0.40680500864982605, 'ave_value': -0.2686377905661593, 'soft_opc': nan} step=1660




2022-04-22 03:02.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:02.26 [info     ] FQE_20220422030207: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00018245340829872223, 'time_algorithm_update': 0.00921128456851086, 'loss': 0.0015988331509007882, 'time_step': 0.009470266031931681, 'init_value': -0.4436357319355011, 'ave_value': -0.29799125389707787, 'soft_opc': nan} step=1826




2022-04-22 03:02.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:02.28 [info     ] FQE_20220422030207: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00018817401794065912, 'time_algorithm_update': 0.009464058531336037, 'loss': 0.0016312285847835668, 'time_step': 0.009727070130497575, 'init_value': -0.487403929233551, 'ave_value': -0.33318012604271774, 'soft_opc': nan} step=1992




2022-04-22 03:02.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:02.29 [info     ] FQE_20220422030207: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00017992990562714725, 'time_algorithm_update': 0.009584076433296663, 'loss': 0.0019488682232640055, 'time_step': 0.009841018412486616, 'init_value': -0.5408446788787842, 'ave_value': -0.3612657134773562, 'soft_opc': nan} step=2158




2022-04-22 03:02.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:02.31 [info     ] FQE_20220422030207: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00017852811928255013, 'time_algorithm_update': 0.009145334542515766, 'loss': 0.00200636493475513, 'time_step': 0.009398115686623448, 'init_value': -0.5918149352073669, 'ave_value': -0.40408151449954455, 'soft_opc': nan} step=2324




2022-04-22 03:02.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:02.33 [info     ] FQE_20220422030207: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00017774105072021484, 'time_algorithm_update': 0.009644392025039857, 'loss': 0.002147462439733131, 'time_step': 0.009903290185583643, 'init_value': -0.6218106150627136, 'ave_value': -0.4123706530662136, 'soft_opc': nan} step=2490




2022-04-22 03:02.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:02.35 [info     ] FQE_20220422030207: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.0001761539872870388, 'time_algorithm_update': 0.008977655904838839, 'loss': 0.0024635259981452295, 'time_step': 0.009231412267110434, 'init_value': -0.692878007888794, 'ave_value': -0.45600614759164887, 'soft_opc': nan} step=2656




2022-04-22 03:02.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:02.36 [info     ] FQE_20220422030207: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001813790884362646, 'time_algorithm_update': 0.009715664817626217, 'loss': 0.002590588536969083, 'time_step': 0.009973793144685677, 'init_value': -0.7613555788993835, 'ave_value': -0.5037188550239211, 'soft_opc': nan} step=2822




2022-04-22 03:02.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:02.38 [info     ] FQE_20220422030207: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00018046562930187546, 'time_algorithm_update': 0.0095320494778185, 'loss': 0.002741960596843028, 'time_step': 0.009787851069346968, 'init_value': -0.7742947340011597, 'ave_value': -0.4961425309178529, 'soft_opc': nan} step=2988




2022-04-22 03:02.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:02.40 [info     ] FQE_20220422030207: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.0001786458923156003, 'time_algorithm_update': 0.009600514388946166, 'loss': 0.00300344308075237, 'time_step': 0.009855122451322624, 'init_value': -0.8258460164070129, 'ave_value': -0.5226270858450113, 'soft_opc': nan} step=3154




2022-04-22 03:02.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:02.42 [info     ] FQE_20220422030207: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.0001813417457672487, 'time_algorithm_update': 0.009354194962834737, 'loss': 0.003403939101606585, 'time_step': 0.009615096701196877, 'init_value': -0.8774397373199463, 'ave_value': -0.5699817382848008, 'soft_opc': nan} step=3320




2022-04-22 03:02.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:02.43 [info     ] FQE_20220422030207: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00018118088503918015, 'time_algorithm_update': 0.009476908718247011, 'loss': 0.0036355653268927484, 'time_step': 0.009735650326832232, 'init_value': -0.9221892356872559, 'ave_value': -0.6057743839625906, 'soft_opc': nan} step=3486




2022-04-22 03:02.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:02.45 [info     ] FQE_20220422030207: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00018175395138292428, 'time_algorithm_update': 0.009078940713261983, 'loss': 0.003909901519451874, 'time_step': 0.009337199739662998, 'init_value': -0.9328293800354004, 'ave_value': -0.5944486555769234, 'soft_opc': nan} step=3652




2022-04-22 03:02.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:02.47 [info     ] FQE_20220422030207: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00017725272351000682, 'time_algorithm_update': 0.00953916325626603, 'loss': 0.004173606182679994, 'time_step': 0.00979365210935294, 'init_value': -1.0044916868209839, 'ave_value': -0.6504776909318123, 'soft_opc': nan} step=3818




2022-04-22 03:02.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:02.49 [info     ] FQE_20220422030207: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00017628899539809628, 'time_algorithm_update': 0.009496565324714384, 'loss': 0.004366286411384928, 'time_step': 0.009752731725394008, 'init_value': -0.9859777688980103, 'ave_value': -0.6221542150797407, 'soft_opc': nan} step=3984




2022-04-22 03:02.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:02.50 [info     ] FQE_20220422030207: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00018272486077733786, 'time_algorithm_update': 0.009026052003883454, 'loss': 0.004775784465804112, 'time_step': 0.009288670068763825, 'init_value': -1.0936105251312256, 'ave_value': -0.704175651308384, 'soft_opc': nan} step=4150




2022-04-22 03:02.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:02.52 [info     ] FQE_20220422030207: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00017241828412894742, 'time_algorithm_update': 0.009601162140627941, 'loss': 0.005020639758854431, 'time_step': 0.009848698075995388, 'init_value': -1.144417643547058, 'ave_value': -0.7514211867818491, 'soft_opc': nan} step=4316




2022-04-22 03:02.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:02.54 [info     ] FQE_20220422030207: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.0001772125083279897, 'time_algorithm_update': 0.009693928511745959, 'loss': 0.005396513532915893, 'time_step': 0.00995190172310335, 'init_value': -1.2016395330429077, 'ave_value': -0.7775400352202826, 'soft_opc': nan} step=4482




2022-04-22 03:02.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:02.56 [info     ] FQE_20220422030207: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00018114497862666487, 'time_algorithm_update': 0.00956650814378118, 'loss': 0.005640702863526811, 'time_step': 0.00982358513108219, 'init_value': -1.2241564989089966, 'ave_value': -0.7876325296120601, 'soft_opc': nan} step=4648




2022-04-22 03:02.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:02.57 [info     ] FQE_20220422030207: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00017357447061193995, 'time_algorithm_update': 0.009441635694848486, 'loss': 0.005943298243415396, 'time_step': 0.009692961911121047, 'init_value': -1.2831745147705078, 'ave_value': -0.8281616285778918, 'soft_opc': nan} step=4814




2022-04-22 03:02.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:02.59 [info     ] FQE_20220422030207: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00017824948552143142, 'time_algorithm_update': 0.009273813431521496, 'loss': 0.006549285320461312, 'time_step': 0.009530180908111205, 'init_value': -1.280275583267212, 'ave_value': -0.8226816290611955, 'soft_opc': nan} step=4980




2022-04-22 03:02.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:03.01 [info     ] FQE_20220422030207: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00017357159809893872, 'time_algorithm_update': 0.009254122354898107, 'loss': 0.007136062169636892, 'time_step': 0.009507225220461926, 'init_value': -1.3436306715011597, 'ave_value': -0.883111505193619, 'soft_opc': nan} step=5146




2022-04-22 03:03.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:03.03 [info     ] FQE_20220422030207: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00017655039408120764, 'time_algorithm_update': 0.009617516793400407, 'loss': 0.006903007159820275, 'time_step': 0.009874767567737993, 'init_value': -1.3531981706619263, 'ave_value': -0.8850847889845436, 'soft_opc': nan} step=5312




2022-04-22 03:03.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:03.04 [info     ] FQE_20220422030207: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.0001789144722812147, 'time_algorithm_update': 0.009630965899272138, 'loss': 0.007209114051151864, 'time_step': 0.009888933365603527, 'init_value': -1.3520715236663818, 'ave_value': -0.8689065532591026, 'soft_opc': nan} step=5478




2022-04-22 03:03.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:03.06 [info     ] FQE_20220422030207: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.0001805417508964079, 'time_algorithm_update': 0.009142822529896196, 'loss': 0.007639811048143913, 'time_step': 0.009405309895435011, 'init_value': -1.4359188079833984, 'ave_value': -0.9418288288218482, 'soft_opc': nan} step=5644




2022-04-22 03:03.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:03.08 [info     ] FQE_20220422030207: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00017688935061535203, 'time_algorithm_update': 0.00954162643616458, 'loss': 0.007898113052311622, 'time_step': 0.00979487436363496, 'init_value': -1.481724500656128, 'ave_value': -0.9838518183611267, 'soft_opc': nan} step=5810




2022-04-22 03:03.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:03.10 [info     ] FQE_20220422030207: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00017625596149858222, 'time_algorithm_update': 0.009568737213870129, 'loss': 0.008011895317460847, 'time_step': 0.009824623544532132, 'init_value': -1.490180492401123, 'ave_value': -0.9868468586791743, 'soft_opc': nan} step=5976




2022-04-22 03:03.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:03.11 [info     ] FQE_20220422030207: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00017330876315932675, 'time_algorithm_update': 0.0095735975058682, 'loss': 0.008055709350113589, 'time_step': 0.00982344725045813, 'init_value': -1.5055038928985596, 'ave_value': -0.9743987899550514, 'soft_opc': nan} step=6142




2022-04-22 03:03.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:03.13 [info     ] FQE_20220422030207: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.0001794114170304264, 'time_algorithm_update': 0.00959282467164189, 'loss': 0.008359797025051708, 'time_step': 0.009850738996482757, 'init_value': -1.5275652408599854, 'ave_value': -0.9955954135462709, 'soft_opc': nan} step=6308




2022-04-22 03:03.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:03.15 [info     ] FQE_20220422030207: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00017674428870879025, 'time_algorithm_update': 0.008558339383228716, 'loss': 0.008461192645432696, 'time_step': 0.008811999516314771, 'init_value': -1.5757728815078735, 'ave_value': -1.0370459319234968, 'soft_opc': nan} step=6474




2022-04-22 03:03.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:03.16 [info     ] FQE_20220422030207: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001844282609870635, 'time_algorithm_update': 0.00959315070186753, 'loss': 0.008601232025953268, 'time_step': 0.009852873273642666, 'init_value': -1.577681541442871, 'ave_value': -1.045802439324692, 'soft_opc': nan} step=6640




2022-04-22 03:03.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:03.18 [info     ] FQE_20220422030207: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00019808131528187948, 'time_algorithm_update': 0.009635840553835213, 'loss': 0.009038385065869955, 'time_step': 0.009913262114467391, 'init_value': -1.5870912075042725, 'ave_value': -1.0408565124077302, 'soft_opc': nan} step=6806




2022-04-22 03:03.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:03.20 [info     ] FQE_20220422030207: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00017716942063297135, 'time_algorithm_update': 0.009550753846225968, 'loss': 0.00957868229692482, 'time_step': 0.009802052773625017, 'init_value': -1.615905523300171, 'ave_value': -1.0545338921634866, 'soft_opc': nan} step=6972




2022-04-22 03:03.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:03.22 [info     ] FQE_20220422030207: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00017751412219311818, 'time_algorithm_update': 0.009137947875333119, 'loss': 0.009334300112411245, 'time_step': 0.009387971406959626, 'init_value': -1.627650260925293, 'ave_value': -1.0289360810700503, 'soft_opc': nan} step=7138




2022-04-22 03:03.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:03.23 [info     ] FQE_20220422030207: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00017798377806881824, 'time_algorithm_update': 0.009622955896768225, 'loss': 0.009632661239123421, 'time_step': 0.009877614228122205, 'init_value': -1.6607688665390015, 'ave_value': -1.0543800381070993, 'soft_opc': nan} step=7304




2022-04-22 03:03.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:03.25 [info     ] FQE_20220422030207: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00017845056143151708, 'time_algorithm_update': 0.009590278188866305, 'loss': 0.00992220827428826, 'time_step': 0.009843979973390878, 'init_value': -1.6431281566619873, 'ave_value': -1.0260107442501698, 'soft_opc': nan} step=7470




2022-04-22 03:03.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:03.27 [info     ] FQE_20220422030207: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00017917156219482422, 'time_algorithm_update': 0.009746505553463855, 'loss': 0.010175455520799136, 'time_step': 0.01000039979635951, 'init_value': -1.7041652202606201, 'ave_value': -1.047124101552139, 'soft_opc': nan} step=7636




2022-04-22 03:03.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:03.29 [info     ] FQE_20220422030207: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00017753566604062734, 'time_algorithm_update': 0.009572721389402827, 'loss': 0.010561351034950173, 'time_step': 0.009823454431740635, 'init_value': -1.74311363697052, 'ave_value': -1.0790946756296598, 'soft_opc': nan} step=7802




2022-04-22 03:03.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:03.30 [info     ] FQE_20220422030207: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00017758880753115, 'time_algorithm_update': 0.008582033306719309, 'loss': 0.0106211117584839, 'time_step': 0.008835888770689448, 'init_value': -1.68109929561615, 'ave_value': -1.0150289758091775, 'soft_opc': nan} step=7968




2022-04-22 03:03.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:03.32 [info     ] FQE_20220422030207: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00017564124371632035, 'time_algorithm_update': 0.009691598903701967, 'loss': 0.010683529369297427, 'time_step': 0.00994387161300843, 'init_value': -1.7807221412658691, 'ave_value': -1.109714763380822, 'soft_opc': nan} step=8134




2022-04-22 03:03.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 03:03.34 [info     ] FQE_20220422030207: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00017711053411644627, 'time_algorithm_update': 0.009439518652766585, 'loss': 0.01104756579271529, 'time_step': 0.009696433343083025, 'init_value': -1.7707189321517944, 'ave_value': -1.1013625135141853, 'soft_opc': nan} step=8300




2022-04-22 03:03.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030207/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-22 03:03.34 [info     ] Directory is created at d3rlpy_logs/FQE_20220422030334
2022-04-22 03:03.34 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 03:03.34 [debug    ] Building models...
2022-04-22 03:03.34 [debug    ] Models have been built.
2022-04-22 03:03.34 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422030334/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 03:03.38 [info     ] FQE_20220422030334: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00018288021863893021, 'time_algorithm_update': 0.009338020585304084, 'loss': 0.023518846746599084, 'time_step': 0.009597338216249333, 'init_value': -1.0954771041870117, 'ave_value': -1.1209633418233007, 'soft_opc': nan} step=344




2022-04-22 03:03.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:03.42 [info     ] FQE_20220422030334: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00018404320228931515, 'time_algorithm_update': 0.009525310854579127, 'loss': 0.021749507365098526, 'time_step': 0.009788680215214575, 'init_value': -1.8704705238342285, 'ave_value': -1.9246136471629143, 'soft_opc': nan} step=688




2022-04-22 03:03.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:03.45 [info     ] FQE_20220422030334: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00017932958381120548, 'time_algorithm_update': 0.009280592203140259, 'loss': 0.02438815837985901, 'time_step': 0.009537807037664015, 'init_value': -2.7865614891052246, 'ave_value': -2.95522295250012, 'soft_opc': nan} step=1032




2022-04-22 03:03.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:03.49 [info     ] FQE_20220422030334: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00018104564311892488, 'time_algorithm_update': 0.009388413540152617, 'loss': 0.025996491429395974, 'time_step': 0.009647677804148474, 'init_value': -3.4186227321624756, 'ave_value': -3.7669135359896195, 'soft_opc': nan} step=1376




2022-04-22 03:03.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:03.52 [info     ] FQE_20220422030334: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00017573251280673716, 'time_algorithm_update': 0.00958406786585963, 'loss': 0.03077054595260686, 'time_step': 0.009836005610088969, 'init_value': -4.331418991088867, 'ave_value': -4.968331279921102, 'soft_opc': nan} step=1720




2022-04-22 03:03.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:03.56 [info     ] FQE_20220422030334: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.0001746963622958161, 'time_algorithm_update': 0.009238226469172987, 'loss': 0.035815857675706234, 'time_step': 0.00948954044386398, 'init_value': -4.664824485778809, 'ave_value': -5.657279613452989, 'soft_opc': nan} step=2064




2022-04-22 03:03.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:03.59 [info     ] FQE_20220422030334: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001741737820381342, 'time_algorithm_update': 0.009377621634061946, 'loss': 0.04256228897937162, 'time_step': 0.009629647399103918, 'init_value': -5.2518792152404785, 'ave_value': -6.6053625171785955, 'soft_opc': nan} step=2408




2022-04-22 03:03.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:04.03 [info     ] FQE_20220422030334: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00017844313798948776, 'time_algorithm_update': 0.009324116762294325, 'loss': 0.05044369167529133, 'time_step': 0.009581000998962757, 'init_value': -5.474830627441406, 'ave_value': -7.266252040406605, 'soft_opc': nan} step=2752




2022-04-22 03:04.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:04.06 [info     ] FQE_20220422030334: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00017614697301110557, 'time_algorithm_update': 0.009487480618232904, 'loss': 0.057036877347178064, 'time_step': 0.009740711644638416, 'init_value': -5.8322014808654785, 'ave_value': -7.974597631408288, 'soft_opc': nan} step=3096




2022-04-22 03:04.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:04.10 [info     ] FQE_20220422030334: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.0001751607240632523, 'time_algorithm_update': 0.009241168582162191, 'loss': 0.06475014671098528, 'time_step': 0.009494162576143132, 'init_value': -6.222168922424316, 'ave_value': -8.790904596501642, 'soft_opc': nan} step=3440




2022-04-22 03:04.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:04.14 [info     ] FQE_20220422030334: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00018104079157807106, 'time_algorithm_update': 0.009546618128931799, 'loss': 0.07466699362754128, 'time_step': 0.009805593379708223, 'init_value': -6.507074356079102, 'ave_value': -9.473346364041706, 'soft_opc': nan} step=3784




2022-04-22 03:04.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:04.17 [info     ] FQE_20220422030334: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.0001827998216762099, 'time_algorithm_update': 0.00925308673880821, 'loss': 0.09026905488179521, 'time_step': 0.009516101243884065, 'init_value': -6.8860273361206055, 'ave_value': -10.277413869629035, 'soft_opc': nan} step=4128




2022-04-22 03:04.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:04.21 [info     ] FQE_20220422030334: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00017757401909939077, 'time_algorithm_update': 0.009884703991024993, 'loss': 0.10153295528847552, 'time_step': 0.010144979454750238, 'init_value': -7.005077362060547, 'ave_value': -10.717289430225218, 'soft_opc': nan} step=4472




2022-04-22 03:04.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:04.25 [info     ] FQE_20220422030334: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00017544349958730298, 'time_algorithm_update': 0.009877928467684014, 'loss': 0.11582311370604953, 'time_step': 0.010134670623513154, 'init_value': -7.443281173706055, 'ave_value': -11.326021560556716, 'soft_opc': nan} step=4816




2022-04-22 03:04.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:04.28 [info     ] FQE_20220422030334: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.0001766979694366455, 'time_algorithm_update': 0.009734687417052513, 'loss': 0.13642154579310742, 'time_step': 0.009988206070522929, 'init_value': -7.609707832336426, 'ave_value': -11.73927447174315, 'soft_opc': nan} step=5160




2022-04-22 03:04.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:04.32 [info     ] FQE_20220422030334: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00018031583275905874, 'time_algorithm_update': 0.0098621339298958, 'loss': 0.153012759295877, 'time_step': 0.010122014339580092, 'init_value': -7.903654098510742, 'ave_value': -12.154058523221059, 'soft_opc': nan} step=5504




2022-04-22 03:04.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:04.36 [info     ] FQE_20220422030334: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00017946196156878804, 'time_algorithm_update': 0.009634034578190294, 'loss': 0.18029123528990462, 'time_step': 0.009891862093016159, 'init_value': -8.376949310302734, 'ave_value': -12.665723496474124, 'soft_opc': nan} step=5848




2022-04-22 03:04.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:04.40 [info     ] FQE_20220422030334: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00018065821292788484, 'time_algorithm_update': 0.010008798782215562, 'loss': 0.20515267294290107, 'time_step': 0.010271053674609162, 'init_value': -8.940675735473633, 'ave_value': -13.364429083654473, 'soft_opc': nan} step=6192




2022-04-22 03:04.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:04.43 [info     ] FQE_20220422030334: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00017931225687958474, 'time_algorithm_update': 0.009592373703801355, 'loss': 0.2421072151100393, 'time_step': 0.009851187467575073, 'init_value': -9.413022994995117, 'ave_value': -13.85431563396443, 'soft_opc': nan} step=6536




2022-04-22 03:04.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:04.47 [info     ] FQE_20220422030334: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00018115930779035702, 'time_algorithm_update': 0.009786576725715814, 'loss': 0.2689105706495168, 'time_step': 0.010045903366665507, 'init_value': -9.867071151733398, 'ave_value': -14.312134170055657, 'soft_opc': nan} step=6880




2022-04-22 03:04.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:04.51 [info     ] FQE_20220422030334: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00017801966778067656, 'time_algorithm_update': 0.009617874095606249, 'loss': 0.2958851099490773, 'time_step': 0.00987413248350454, 'init_value': -10.885570526123047, 'ave_value': -15.321918142258047, 'soft_opc': nan} step=7224




2022-04-22 03:04.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:04.54 [info     ] FQE_20220422030334: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.0001776156037352806, 'time_algorithm_update': 0.009931602450304253, 'loss': 0.3190684433387549, 'time_step': 0.010185089915297752, 'init_value': -11.06692886352539, 'ave_value': -15.484223909850593, 'soft_opc': nan} step=7568




2022-04-22 03:04.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:04.58 [info     ] FQE_20220422030334: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00018137693405151367, 'time_algorithm_update': 0.009902580532916757, 'loss': 0.3341283813487132, 'time_step': 0.010166692872380102, 'init_value': -11.196073532104492, 'ave_value': -15.69036399681826, 'soft_opc': nan} step=7912




2022-04-22 03:04.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:05.02 [info     ] FQE_20220422030334: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00018175396808358125, 'time_algorithm_update': 0.009587033543475839, 'loss': 0.33834062712158747, 'time_step': 0.009851448757703914, 'init_value': -11.658394813537598, 'ave_value': -16.192575469284176, 'soft_opc': nan} step=8256




2022-04-22 03:05.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:05.06 [info     ] FQE_20220422030334: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00017733144205670025, 'time_algorithm_update': 0.010125588539034822, 'loss': 0.35134555616545987, 'time_step': 0.010382133860920751, 'init_value': -12.212267875671387, 'ave_value': -16.870788035897522, 'soft_opc': nan} step=8600




2022-04-22 03:05.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:05.09 [info     ] FQE_20220422030334: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00018959197887154512, 'time_algorithm_update': 0.009606475053831588, 'loss': 0.3671929630894907, 'time_step': 0.009875174178633579, 'init_value': -12.53056526184082, 'ave_value': -17.159929571864573, 'soft_opc': nan} step=8944




2022-04-22 03:05.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:05.13 [info     ] FQE_20220422030334: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00017709440963212833, 'time_algorithm_update': 0.010049240533695665, 'loss': 0.3741151699308999, 'time_step': 0.010308522817700408, 'init_value': -13.078485488891602, 'ave_value': -17.805885627583876, 'soft_opc': nan} step=9288




2022-04-22 03:05.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:05.17 [info     ] FQE_20220422030334: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00018176228501075921, 'time_algorithm_update': 0.009462597758270973, 'loss': 0.38290185866292653, 'time_step': 0.00972647653069607, 'init_value': -13.362569808959961, 'ave_value': -18.24237507790461, 'soft_opc': nan} step=9632




2022-04-22 03:05.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:05.20 [info     ] FQE_20220422030334: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00017768837684808777, 'time_algorithm_update': 0.009859728258709574, 'loss': 0.3877720208513702, 'time_step': 0.010119156088939932, 'init_value': -13.4741849899292, 'ave_value': -18.54115682241616, 'soft_opc': nan} step=9976




2022-04-22 03:05.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:05.24 [info     ] FQE_20220422030334: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.000181918227395346, 'time_algorithm_update': 0.00967068727626357, 'loss': 0.39856852717740937, 'time_step': 0.009933125141055085, 'init_value': -14.267160415649414, 'ave_value': -19.278424618970433, 'soft_opc': nan} step=10320




2022-04-22 03:05.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:05.28 [info     ] FQE_20220422030334: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00017864274424175884, 'time_algorithm_update': 0.00982802898384804, 'loss': 0.4104780127824999, 'time_step': 0.010087763154229452, 'init_value': -14.747367858886719, 'ave_value': -19.9096219829678, 'soft_opc': nan} step=10664




2022-04-22 03:05.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:05.31 [info     ] FQE_20220422030334: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00018028187197308208, 'time_algorithm_update': 0.009648126918216085, 'loss': 0.41755998942481226, 'time_step': 0.009910241115924924, 'init_value': -15.694782257080078, 'ave_value': -20.775471871728833, 'soft_opc': nan} step=11008




2022-04-22 03:05.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:05.35 [info     ] FQE_20220422030334: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.0001826618993005087, 'time_algorithm_update': 0.009890934062558551, 'loss': 0.43688281362897957, 'time_step': 0.01015423065008119, 'init_value': -16.026227951049805, 'ave_value': -21.309709695075547, 'soft_opc': nan} step=11352




2022-04-22 03:05.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:05.39 [info     ] FQE_20220422030334: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00018762571867122205, 'time_algorithm_update': 0.009897702655126883, 'loss': 0.45084561653391914, 'time_step': 0.010164202645767567, 'init_value': -16.65371322631836, 'ave_value': -21.934524360822664, 'soft_opc': nan} step=11696




2022-04-22 03:05.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:05.43 [info     ] FQE_20220422030334: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00017946958541870117, 'time_algorithm_update': 0.009798760330954264, 'loss': 0.4624610715004247, 'time_step': 0.010058742622996485, 'init_value': -16.919479370117188, 'ave_value': -22.23183030053839, 'soft_opc': nan} step=12040




2022-04-22 03:05.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:05.46 [info     ] FQE_20220422030334: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00017779926921046056, 'time_algorithm_update': 0.009757706592249315, 'loss': 0.47603059450684243, 'time_step': 0.010016382433647332, 'init_value': -16.975730895996094, 'ave_value': -22.585134375296736, 'soft_opc': nan} step=12384




2022-04-22 03:05.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:05.50 [info     ] FQE_20220422030334: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.0001869964045147563, 'time_algorithm_update': 0.00961291374162186, 'loss': 0.47749336668138587, 'time_step': 0.00987865966419841, 'init_value': -17.23145294189453, 'ave_value': -23.0246367718521, 'soft_opc': nan} step=12728




2022-04-22 03:05.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:05.54 [info     ] FQE_20220422030334: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00018407300461170285, 'time_algorithm_update': 0.009914995625961659, 'loss': 0.485906878383397, 'time_step': 0.010179693615713786, 'init_value': -18.114593505859375, 'ave_value': -24.291618269393304, 'soft_opc': nan} step=13072




2022-04-22 03:05.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:05.57 [info     ] FQE_20220422030334: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.0001822335775508437, 'time_algorithm_update': 0.009718536637550177, 'loss': 0.5045972299372214, 'time_step': 0.009980408258216326, 'init_value': -18.121658325195312, 'ave_value': -24.413813352594907, 'soft_opc': nan} step=13416




2022-04-22 03:05.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:06.01 [info     ] FQE_20220422030334: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00018016404883806095, 'time_algorithm_update': 0.009953195272490035, 'loss': 0.5044695849606204, 'time_step': 0.01021214487940766, 'init_value': -18.271442413330078, 'ave_value': -24.894093705024908, 'soft_opc': nan} step=13760




2022-04-22 03:06.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:06.05 [info     ] FQE_20220422030334: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00019063506015511446, 'time_algorithm_update': 0.009401309628819311, 'loss': 0.5183695241873867, 'time_step': 0.009669974099758059, 'init_value': -18.292068481445312, 'ave_value': -24.961867408544194, 'soft_opc': nan} step=14104




2022-04-22 03:06.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:06.09 [info     ] FQE_20220422030334: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00018247060997541562, 'time_algorithm_update': 0.010013736264650212, 'loss': 0.5231667505902087, 'time_step': 0.010277773751768955, 'init_value': -18.64364242553711, 'ave_value': -25.40877348436378, 'soft_opc': nan} step=14448




2022-04-22 03:06.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:06.12 [info     ] FQE_20220422030334: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00018155158952225085, 'time_algorithm_update': 0.009645085002100744, 'loss': 0.5357981886454793, 'time_step': 0.009904669467792955, 'init_value': -19.239261627197266, 'ave_value': -25.944110138546506, 'soft_opc': nan} step=14792




2022-04-22 03:06.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:06.16 [info     ] FQE_20220422030334: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00018108722775481468, 'time_algorithm_update': 0.010101620541062466, 'loss': 0.5543429150332718, 'time_step': 0.010360836289649787, 'init_value': -19.145843505859375, 'ave_value': -26.15292822925483, 'soft_opc': nan} step=15136




2022-04-22 03:06.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:06.20 [info     ] FQE_20220422030334: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00017983899560085562, 'time_algorithm_update': 0.009630593449570412, 'loss': 0.5644294169114166, 'time_step': 0.009888684333756913, 'init_value': -19.597013473510742, 'ave_value': -26.72945845568435, 'soft_opc': nan} step=15480




2022-04-22 03:06.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:06.23 [info     ] FQE_20220422030334: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00018540787142376568, 'time_algorithm_update': 0.009868428457614988, 'loss': 0.5699792824479816, 'time_step': 0.010133114665053611, 'init_value': -19.923067092895508, 'ave_value': -27.433321010213145, 'soft_opc': nan} step=15824




2022-04-22 03:06.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:06.27 [info     ] FQE_20220422030334: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.0001827949701353561, 'time_algorithm_update': 0.010030322989752127, 'loss': 0.5837690006714141, 'time_step': 0.010292265304299287, 'init_value': -19.637348175048828, 'ave_value': -27.628629976821024, 'soft_opc': nan} step=16168




2022-04-22 03:06.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:06.31 [info     ] FQE_20220422030334: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00017847709877546444, 'time_algorithm_update': 0.009617555973141692, 'loss': 0.5842382054570195, 'time_step': 0.009873489307802777, 'init_value': -20.062410354614258, 'ave_value': -28.023189116299555, 'soft_opc': nan} step=16512




2022-04-22 03:06.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:06.35 [info     ] FQE_20220422030334: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00017756916755853699, 'time_algorithm_update': 0.009839263766310936, 'loss': 0.5973493561235278, 'time_step': 0.010097210490426351, 'init_value': -20.169347763061523, 'ave_value': -28.231598039663623, 'soft_opc': nan} step=16856




2022-04-22 03:06.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 03:06.38 [info     ] FQE_20220422030334: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.0001810324746508931, 'time_algorithm_update': 0.009631332963011986, 'loss': 0.612309800228104, 'time_step': 0.009892546853353812, 'init_value': -20.545791625976562, 'ave_value': -28.455182623059848, 'soft_opc': nan} step=17200




2022-04-22 03:06.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422030334/model_17200.pt
search iteration:  15
using hyper params:  [0.008760983541537109, 0.0013811500183162182, 8.777506401549547e-05, 1]
2022-04-22 03:06.38 [debug    ] RoundIterator is selected.
2022-04-22 03:06.38 [info     ] Directory is created at d3rlpy_logs/CQL_20220422030638
2022-04-22 03:06.38 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 03:06.38 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 03:06.38 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422030638/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.008760983541537109, 'actor_optim_factory': {'opti

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:07.01 [info     ] CQL_20220422030638: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.0003595138560829824, 'time_algorithm_update': 0.06335095242957849, 'temp_loss': 4.933823351226101, 'temp': 0.98467721794382, 'alpha_loss': -17.684824866366526, 'alpha': 1.017684637466607, 'critic_loss': 28.561622558990656, 'actor_loss': -1.9410899255661607, 'time_step': 0.06381521541948264, 'td_error': 1.2305069773393926, 'init_value': 0.08916059136390686, 'ave_value': 0.2298859545350739} step=346
2022-04-22 03:07.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:07.24 [info     ] CQL_20220422030638: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.00033752146483845795, 'time_algorithm_update': 0.062020408624858525, 'temp_loss': 4.820271955060132, 'temp': 0.9554039208875226, 'alpha_loss': -18.335625009040612, 'alpha': 1.0541214036803714, 'critic_loss': 29.06586418262107, 'actor_loss': -1.8024779544400342, 'time_step': 0.062457737895105614, 'td_error': 1.2152548761142394, 'init_value': 0.046245552599430084, 'ave_value': 0.2751939335091666} step=692
2022-04-22 03:07.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:07.47 [info     ] CQL_20220422030638: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.0003549818358669391, 'time_algorithm_update': 0.06303036212921143, 'temp_loss': 4.682504328689134, 'temp': 0.9274645762981018, 'alpha_loss': -18.998386140503634, 'alpha': 1.09238613340896, 'critic_loss': 37.41158988158827, 'actor_loss': -1.3810325974673894, 'time_step': 0.06348801623879141, 'td_error': 1.2123565950789887, 'init_value': -0.29822713136672974, 'ave_value': -0.021268699975263004} step=1038
2022-04-22 03:07.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:08.10 [info     ] CQL_20220422030638: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.0003478024047234155, 'time_algorithm_update': 0.06315244484499011, 'temp_loss': 4.548463616067964, 'temp': 0.9006537512547708, 'alpha_loss': -19.7012698057759, 'alpha': 1.1325510016755562, 'critic_loss': 47.51869267810976, 'actor_loss': -0.8254603717712997, 'time_step': 0.0636030056573063, 'td_error': 1.2070348913275555, 'init_value': -0.7731019258499146, 'ave_value': -0.43710601569182955} step=1384
2022-04-22 03:08.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:08.33 [info     ] CQL_20220422030638: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.0003541466817690458, 'time_algorithm_update': 0.0631173649275234, 'temp_loss': 4.418426475083897, 'temp': 0.8748668776771237, 'alpha_loss': -20.433765168823946, 'alpha': 1.1746544800052754, 'critic_loss': 58.6712343182867, 'actor_loss': -0.31133808473372737, 'time_step': 0.06357465520759538, 'td_error': 1.2117220659977834, 'init_value': -1.158478856086731, 'ave_value': -0.799192441296894} step=1730
2022-04-22 03:08.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:08.56 [info     ] CQL_20220422030638: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.0003564378429699495, 'time_algorithm_update': 0.06336905914924049, 'temp_loss': 4.292892533230644, 'temp': 0.8500204696131579, 'alpha_loss': -21.198850631713867, 'alpha': 1.2187259927650407, 'critic_loss': 71.15598178047665, 'actor_loss': 0.1569580922409289, 'time_step': 0.06383028195772557, 'td_error': 1.211195840818398, 'init_value': -1.5630711317062378, 'ave_value': -1.2043856381967262} step=2076
2022-04-22 03:08.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:09.19 [info     ] CQL_20220422030638: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.00035493773532051573, 'time_algorithm_update': 0.06260132651797609, 'temp_loss': 4.172778213644303, 'temp': 0.8260405005058112, 'alpha_loss': -22.0006196733155, 'alpha': 1.2648051136490928, 'critic_loss': 85.91007367172682, 'actor_loss': 0.5316483010403338, 'time_step': 0.06306247008329181, 'td_error': 1.2120262004777207, 'init_value': -1.8890594244003296, 'ave_value': -1.5135307839063459} step=2422
2022-04-22 03:09.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:09.41 [info     ] CQL_20220422030638: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.00036340848558899985, 'time_algorithm_update': 0.0610475815789548, 'temp_loss': 4.054945612918435, 'temp': 0.8028605747429621, 'alpha_loss': -22.83497181930983, 'alpha': 1.3129251516623304, 'critic_loss': 103.17455322618429, 'actor_loss': 0.7711746504024274, 'time_step': 0.06151313450984183, 'td_error': 1.2138170227985356, 'init_value': -1.8797844648361206, 'ave_value': -1.5182066404859056} step=2768
2022-04-22 03:09.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:10.03 [info     ] CQL_20220422030638: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.0003568313025325709, 'time_algorithm_update': 0.05995752012109481, 'temp_loss': 3.9420125849674204, 'temp': 0.7804368630999086, 'alpha_loss': -23.70594736330771, 'alpha': 1.3631268891295947, 'critic_loss': 123.46599938276876, 'actor_loss': 0.8662652029639724, 'time_step': 0.06041594805745031, 'td_error': 1.2133827868264224, 'init_value': -1.8923451900482178, 'ave_value': -1.5421195680592623} step=3114
2022-04-22 03:10.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:10.24 [info     ] CQL_20220422030638: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.0003471877533576392, 'time_algorithm_update': 0.05956495979617786, 'temp_loss': 3.8317110848564635, 'temp': 0.7587171078072807, 'alpha_loss': -24.619592666625977, 'alpha': 1.4154676333328202, 'critic_loss': 147.4420540429264, 'actor_loss': 0.8013042568988193, 'time_step': 0.06001642466969573, 'td_error': 1.2142954687426561, 'init_value': -1.7686171531677246, 'ave_value': -1.422683347074157} step=3460
2022-04-22 03:10.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:10.46 [info     ] CQL_20220422030638: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.000360881662093146, 'time_algorithm_update': 0.059195931936275065, 'temp_loss': 3.7255553082923667, 'temp': 0.7376648482215198, 'alpha_loss': -25.563533838084666, 'alpha': 1.469989429319525, 'critic_loss': 173.70049621052826, 'actor_loss': 0.5370306646548255, 'time_step': 0.05965917785732732, 'td_error': 1.2164224752528898, 'init_value': -1.5043929815292358, 'ave_value': -1.2105608323164154} step=3806
2022-04-22 03:10.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:11.07 [info     ] CQL_20220422030638: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.00036331339378577437, 'time_algorithm_update': 0.059255814965749755, 'temp_loss': 3.622815049452589, 'temp': 0.7172415778471556, 'alpha_loss': -26.550996174012994, 'alpha': 1.526753080029019, 'critic_loss': 203.145172471945, 'actor_loss': 0.08856418975924997, 'time_step': 0.05972147263543454, 'td_error': 1.2207105482579395, 'init_value': -1.1223951578140259, 'ave_value': -0.8634501429856699} step=4152
2022-04-22 03:11.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:11.29 [info     ] CQL_20220422030638: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.0003570524943357258, 'time_algorithm_update': 0.05873271909063262, 'temp_loss': 3.5233395513082515, 'temp': 0.6974193555426735, 'alpha_loss': -27.581365006507475, 'alpha': 1.585825603132303, 'critic_loss': 235.2020890781645, 'actor_loss': -0.4487191214996008, 'time_step': 0.05919465370949982, 'td_error': 1.223112198690876, 'init_value': -0.4633330702781677, 'ave_value': -0.2701855810460687} step=4498
2022-04-22 03:11.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:11.50 [info     ] CQL_20220422030638: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.0003619993353165643, 'time_algorithm_update': 0.058796134298247406, 'temp_loss': 3.424598232859132, 'temp': 0.6781738765666939, 'alpha_loss': -28.643390275150367, 'alpha': 1.6472755708446392, 'critic_loss': 269.3238016470319, 'actor_loss': -1.0036872644300405, 'time_step': 0.059262060705636964, 'td_error': 1.2265874116563196, 'init_value': -0.026961348950862885, 'ave_value': 0.125565879940704} step=4844
2022-04-22 03:11.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:12.12 [info     ] CQL_20220422030638: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.00038106593093431063, 'time_algorithm_update': 0.05954902985192448, 'temp_loss': 3.330967914162344, 'temp': 0.6594839338966877, 'alpha_loss': -29.759414209795825, 'alpha': 1.7111771055039642, 'critic_loss': 304.22447777896946, 'actor_loss': -1.4612844951580026, 'time_step': 0.06003463681722652, 'td_error': 1.2299248917702597, 'init_value': 0.44784852862358093, 'ave_value': 0.5535278057023325} step=5190
2022-04-22 03:12.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:12.34 [info     ] CQL_20220422030638: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.00034870370964094395, 'time_algorithm_update': 0.06014759416525074, 'temp_loss': 3.2387267220227014, 'temp': 0.641324674118461, 'alpha_loss': -30.910879653313255, 'alpha': 1.7776157070446565, 'critic_loss': 337.50707293383647, 'actor_loss': -1.8260247197454376, 'time_step': 0.06059452426226842, 'td_error': 1.2317935462225587, 'init_value': 0.8166434168815613, 'ave_value': 0.8939018918474233} step=5536
2022-04-22 03:12.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:12.55 [info     ] CQL_20220422030638: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.0003598914670117329, 'time_algorithm_update': 0.059824805728272895, 'temp_loss': 3.1500965763378694, 'temp': 0.6236759138245114, 'alpha_loss': -32.11119656204488, 'alpha': 1.8466759789196743, 'critic_loss': 368.16383388276734, 'actor_loss': -2.1585813374877665, 'time_step': 0.06028529949959992, 'td_error': 1.232890853491043, 'init_value': 1.1129176616668701, 'ave_value': 1.1728460114394506} step=5882
2022-04-22 03:12.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:13.17 [info     ] CQL_20220422030638: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.0003654515812162719, 'time_algorithm_update': 0.05937278684164058, 'temp_loss': 3.0626401901245117, 'temp': 0.6065246402528245, 'alpha_loss': -33.361506114805366, 'alpha': 1.9184577713122946, 'critic_loss': 399.20863095322096, 'actor_loss': -2.454455036648436, 'time_step': 0.0598362739375561, 'td_error': 1.2338391988804747, 'init_value': 1.4551740884780884, 'ave_value': 1.4920282484853138} step=6228
2022-04-22 03:13.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:13.38 [info     ] CQL_20220422030638: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.00035436856264323857, 'time_algorithm_update': 0.059624742221281016, 'temp_loss': 2.9785846940354803, 'temp': 0.5898499003035484, 'alpha_loss': -34.66078011837998, 'alpha': 1.9930609299268336, 'critic_loss': 428.28943550242167, 'actor_loss': -2.7572316406779205, 'time_step': 0.0600791790581852, 'td_error': 1.2344472844891463, 'init_value': 1.7429786920547485, 'ave_value': 1.7807874457764932} step=6574
2022-04-22 03:13.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:14.00 [info     ] CQL_20220422030638: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.0003705768915959176, 'time_algorithm_update': 0.05972144920701925, 'temp_loss': 2.897440788373782, 'temp': 0.5736395366963624, 'alpha_loss': -36.00330497212493, 'alpha': 2.070579986351763, 'critic_loss': 458.8147030868971, 'actor_loss': -3.0347467171663496, 'time_step': 0.06018819767615698, 'td_error': 1.2349905560459944, 'init_value': 2.058594226837158, 'ave_value': 2.0836042507839974} step=6920
2022-04-22 03:14.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:14.22 [info     ] CQL_20220422030638: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.0003581722347722577, 'time_algorithm_update': 0.05932223865751586, 'temp_loss': 2.8175064566507504, 'temp': 0.557876533511057, 'alpha_loss': -37.40713371982464, 'alpha': 2.151130526052045, 'critic_loss': 488.9760434365686, 'actor_loss': -3.321128739097904, 'time_step': 0.05977827758458308, 'td_error': 1.2354102670948135, 'init_value': 2.3517885208129883, 'ave_value': 2.3696125293693706} step=7266
2022-04-22 03:14.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:14.43 [info     ] CQL_20220422030638: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.00036764971782706374, 'time_algorithm_update': 0.05833850981872206, 'temp_loss': 2.739316258816361, 'temp': 0.5425514651171733, 'alpha_loss': -38.8577717025845, 'alpha': 2.234827705890457, 'critic_loss': 520.4192676874943, 'actor_loss': -3.5808780331143066, 'time_step': 0.05880425293321554, 'td_error': 1.2355701426999155, 'init_value': 2.620734453201294, 'ave_value': 2.6371291260291976} step=7612
2022-04-22 03:14.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:15.05 [info     ] CQL_20220422030638: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.00035250393641477373, 'time_algorithm_update': 0.05972777832450205, 'temp_loss': 2.6642566967561754, 'temp': 0.5276512884335711, 'alpha_loss': -40.37215451146826, 'alpha': 2.321787963712836, 'critic_loss': 554.0801775276317, 'actor_loss': -3.8505037788710843, 'time_step': 0.060179090913320556, 'td_error': 1.2361760768607895, 'init_value': 2.940347909927368, 'ave_value': 2.9523493222570805} step=7958
2022-04-22 03:15.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:15.26 [info     ] CQL_20220422030638: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.00035883236482653314, 'time_algorithm_update': 0.05978495606108208, 'temp_loss': 2.5913897111925777, 'temp': 0.5131587057099866, 'alpha_loss': -41.944252190562345, 'alpha': 2.412142271251348, 'critic_loss': 587.4579511873984, 'actor_loss': -4.12011672995683, 'time_step': 0.06024396695153562, 'td_error': 1.236766550738053, 'init_value': 3.231029987335205, 'ave_value': 3.2395586800270024} step=8304
2022-04-22 03:15.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:15.48 [info     ] CQL_20220422030638: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.0003551809773968823, 'time_algorithm_update': 0.060124679796957556, 'temp_loss': 2.520087085707339, 'temp': 0.49906518133734, 'alpha_loss': -43.57918472510542, 'alpha': 2.5060258380250433, 'critic_loss': 623.3244985238665, 'actor_loss': -4.397714350264885, 'time_step': 0.06057751247648559, 'td_error': 1.2377489320723154, 'init_value': 3.561100721359253, 'ave_value': 3.567053216619478} step=8650
2022-04-22 03:15.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:16.10 [info     ] CQL_20220422030638: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.00037077327684170936, 'time_algorithm_update': 0.060449655345409595, 'temp_loss': 2.4514220270807345, 'temp': 0.48535874305088395, 'alpha_loss': -45.2699650450249, 'alpha': 2.603559936402161, 'critic_loss': 668.6390417903834, 'actor_loss': -4.66403132229182, 'time_step': 0.06092394225170158, 'td_error': 1.2383405289410008, 'init_value': 3.8345460891723633, 'ave_value': 3.8400589327271826} step=8996
2022-04-22 03:16.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:16.32 [info     ] CQL_20220422030638: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.00035605747575704765, 'time_algorithm_update': 0.05990955044079378, 'temp_loss': 2.3839049925004816, 'temp': 0.4720298555717303, 'alpha_loss': -47.03715279198796, 'alpha': 2.704891320597919, 'critic_loss': 723.7936466635996, 'actor_loss': -4.916410010674096, 'time_step': 0.060369172537257904, 'td_error': 1.2391241546833216, 'init_value': 4.099520683288574, 'ave_value': 4.104140825009335} step=9342
2022-04-22 03:16.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:16.54 [info     ] CQL_20220422030638: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.0003839331555228702, 'time_algorithm_update': 0.059970094978464825, 'temp_loss': 2.318456567091749, 'temp': 0.45906716137263126, 'alpha_loss': -48.865655006011785, 'alpha': 2.8101927593264278, 'critic_loss': 784.6586432484534, 'actor_loss': -5.143369084837809, 'time_step': 0.0604543741038769, 'td_error': 1.2398155656734027, 'init_value': 4.343218803405762, 'ave_value': 4.34845760502822} step=9688
2022-04-22 03:16.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:17.16 [info     ] CQL_20220422030638: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.0003640624140039345, 'time_algorithm_update': 0.060707251460566, 'temp_loss': 2.254907871946434, 'temp': 0.44645921773993213, 'alpha_loss': -50.766073215903575, 'alpha': 2.9195856407198604, 'critic_loss': 852.4967120396609, 'actor_loss': -5.373481706387735, 'time_step': 0.061173709830796785, 'td_error': 1.2412861461539624, 'init_value': 4.6401190757751465, 'ave_value': 4.643040884541583} step=10034
2022-04-22 03:17.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:17.38 [info     ] CQL_20220422030638: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.0003695425959680811, 'time_algorithm_update': 0.05998287586807516, 'temp_loss': 2.193009525365223, 'temp': 0.4341995830653031, 'alpha_loss': -52.73901256936134, 'alpha': 3.0332417384737487, 'critic_loss': 923.7727006680703, 'actor_loss': -5.587577126618755, 'time_step': 0.06045433482682774, 'td_error': 1.2420207827376593, 'init_value': 4.848196029663086, 'ave_value': 4.8521526742512835} step=10380
2022-04-22 03:17.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:17.59 [info     ] CQL_20220422030638: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.000359515923296096, 'time_algorithm_update': 0.06023673170563803, 'temp_loss': 2.1334487068859826, 'temp': 0.42227442545353333, 'alpha_loss': -54.79445285466365, 'alpha': 3.151319381818606, 'critic_loss': 1004.2480696308819, 'actor_loss': -5.779445360161666, 'time_step': 0.06069597274581821, 'td_error': 1.2430212890923713, 'init_value': 5.074146747589111, 'ave_value': 5.078139029955514} step=10726
2022-04-22 03:17.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:18.21 [info     ] CQL_20220422030638: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.00036430910143549043, 'time_algorithm_update': 0.05988328305283034, 'temp_loss': 2.0739531179384, 'temp': 0.41067958851425634, 'alpha_loss': -56.932186446437946, 'alpha': 3.2739998894619804, 'critic_loss': 1082.6432586846324, 'actor_loss': -5.982394456863403, 'time_step': 0.06034921359464612, 'td_error': 1.244296934273318, 'init_value': 5.286650657653809, 'ave_value': 5.2890372977883064} step=11072
2022-04-22 03:18.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:18.43 [info     ] CQL_20220422030638: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.00034710506483309533, 'time_algorithm_update': 0.05991735072494242, 'temp_loss': 2.0168879686752494, 'temp': 0.3994045977647594, 'alpha_loss': -59.14546539879948, 'alpha': 3.401461113395029, 'critic_loss': 1170.0355796152458, 'actor_loss': -6.1514330723382145, 'time_step': 0.06036340845802616, 'td_error': 1.2449997848397816, 'init_value': 5.45575475692749, 'ave_value': 5.459894650148633} step=11418
2022-04-22 03:18.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:19.06 [info     ] CQL_20220422030638: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.0003509686861424088, 'time_algorithm_update': 0.062859623418378, 'temp_loss': 1.9616798617247213, 'temp': 0.38843910456392805, 'alpha_loss': -61.451777915734084, 'alpha': 3.533882428455904, 'critic_loss': 1261.146634316858, 'actor_loss': -6.304834808228333, 'time_step': 0.06330497347550586, 'td_error': 1.2458587371144072, 'init_value': 5.608940601348877, 'ave_value': 5.612401853256624} step=11764
2022-04-22 03:19.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:19.29 [info     ] CQL_20220422030638: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.0003604771773939188, 'time_algorithm_update': 0.06363890970373429, 'temp_loss': 1.9076390345661627, 'temp': 0.37777375606443153, 'alpha_loss': -63.84181921192676, 'alpha': 3.6714591752587027, 'critic_loss': 1351.7189299302295, 'actor_loss': -6.457899439541591, 'time_step': 0.06409579412096497, 'td_error': 1.2467482343704661, 'init_value': 5.7639665603637695, 'ave_value': 5.767807200803978} step=12110
2022-04-22 03:19.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:19.52 [info     ] CQL_20220422030638: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.00035571914187745553, 'time_algorithm_update': 0.06402397500297238, 'temp_loss': 1.8556397457343305, 'temp': 0.3674007774777495, 'alpha_loss': -66.3239303985772, 'alpha': 3.8143809565229914, 'critic_loss': 1448.225565122042, 'actor_loss': -6.58852482531112, 'time_step': 0.0644777337939753, 'td_error': 1.248344572138813, 'init_value': 5.959533214569092, 'ave_value': 5.962180526655621} step=12456
2022-04-22 03:19.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:20.15 [info     ] CQL_20220422030638: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.0003755423374947785, 'time_algorithm_update': 0.06406427945704818, 'temp_loss': 1.8043858984302235, 'temp': 0.3573133813680252, 'alpha_loss': -68.91129479380702, 'alpha': 3.9628768442683135, 'critic_loss': 1548.3785975461751, 'actor_loss': -6.713119268417358, 'time_step': 0.06453486329558268, 'td_error': 1.2490377348956379, 'init_value': 6.078179836273193, 'ave_value': 6.081187459223075} step=12802
2022-04-22 03:20.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:20.38 [info     ] CQL_20220422030638: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.00034854866865742415, 'time_algorithm_update': 0.06295759140411554, 'temp_loss': 1.754451996329203, 'temp': 0.34750443085425164, 'alpha_loss': -71.59419446735713, 'alpha': 4.117158321975973, 'critic_loss': 1653.9457203418533, 'actor_loss': -6.831387227670306, 'time_step': 0.06339947956834914, 'td_error': 1.2502482974925877, 'init_value': 6.230594635009766, 'ave_value': 6.232835866286892} step=13148
2022-04-22 03:20.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:21.01 [info     ] CQL_20220422030638: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.000356224920019249, 'time_algorithm_update': 0.06250891589015894, 'temp_loss': 1.7066810765018352, 'temp': 0.3379635750558335, 'alpha_loss': -74.37641697260686, 'alpha': 4.277429995509241, 'critic_loss': 1753.0666877878884, 'actor_loss': -6.9492752345311155, 'time_step': 0.06296062193854007, 'td_error': 1.2510524738374318, 'init_value': 6.358030319213867, 'ave_value': 6.360866441948259} step=13494
2022-04-22 03:21.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:21.23 [info     ] CQL_20220422030638: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.00034038731128494177, 'time_algorithm_update': 0.0609247615571656, 'temp_loss': 1.66046551231704, 'temp': 0.32868280291901847, 'alpha_loss': -77.27219545221052, 'alpha': 4.443950972805133, 'critic_loss': 1862.2777734657243, 'actor_loss': -7.055910118742485, 'time_step': 0.061358832899545655, 'td_error': 1.2514478771232558, 'init_value': 6.436432838439941, 'ave_value': 6.440052651974645} step=13840
2022-04-22 03:21.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:21.45 [info     ] CQL_20220422030638: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.0003532763850482213, 'time_algorithm_update': 0.059611040732764096, 'temp_loss': 1.6141896475257211, 'temp': 0.3196562488127306, 'alpha_loss': -80.28537895775943, 'alpha': 4.616957671380456, 'critic_loss': 1964.8089610193506, 'actor_loss': -7.15144066452291, 'time_step': 0.06006043632595525, 'td_error': 1.2533160256964206, 'init_value': 6.6313605308532715, 'ave_value': 6.633037783105886} step=14186
2022-04-22 03:21.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:22.06 [info     ] CQL_20220422030638: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.0003518010839561506, 'time_algorithm_update': 0.05900189504458036, 'temp_loss': 1.5701227687686854, 'temp': 0.3108799943345131, 'alpha_loss': -83.40897005555257, 'alpha': 4.796703118120314, 'critic_loss': 2038.4951404725884, 'actor_loss': -7.283080114794604, 'time_step': 0.05945068356618716, 'td_error': 1.2540517620472258, 'init_value': 6.731871604919434, 'ave_value': 6.73385256504549} step=14532
2022-04-22 03:22.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:22.28 [info     ] CQL_20220422030638: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.0003511733402406549, 'time_algorithm_update': 0.05917740626142204, 'temp_loss': 1.526952748009235, 'temp': 0.30234333226791005, 'alpha_loss': -86.65920689753715, 'alpha': 4.983429956987414, 'critic_loss': 2142.7825278574333, 'actor_loss': -7.364629187335858, 'time_step': 0.05962686524914868, 'td_error': 1.2550042163328055, 'init_value': 6.8390889167785645, 'ave_value': 6.840776175512076} step=14878
2022-04-22 03:22.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:22.49 [info     ] CQL_20220422030638: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.000347014107456097, 'time_algorithm_update': 0.05944770746837462, 'temp_loss': 1.485066194410269, 'temp': 0.29404172060117556, 'alpha_loss': -90.02439900216339, 'alpha': 5.17743735230727, 'critic_loss': 2249.670081342576, 'actor_loss': -7.444122707223617, 'time_step': 0.05989456452386228, 'td_error': 1.2554463103141058, 'init_value': 6.908779144287109, 'ave_value': 6.9105316329872855} step=15224
2022-04-22 03:22.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:23.11 [info     ] CQL_20220422030638: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.0003521063424259252, 'time_algorithm_update': 0.059308159558070186, 'temp_loss': 1.4440434499283057, 'temp': 0.28596835294899914, 'alpha_loss': -93.52716029172687, 'alpha': 5.378971838537669, 'critic_loss': 2329.806149521315, 'actor_loss': -7.534751303623177, 'time_step': 0.05975898841902011, 'td_error': 1.2565955195948952, 'init_value': 7.032537937164307, 'ave_value': 7.034219669515683} step=15570
2022-04-22 03:23.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:23.32 [info     ] CQL_20220422030638: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.0003367345457132152, 'time_algorithm_update': 0.05745307007276943, 'temp_loss': 1.4048903908343673, 'temp': 0.27811575108180847, 'alpha_loss': -97.18335415173128, 'alpha': 5.588370919916671, 'critic_loss': 2435.29483226269, 'actor_loss': -7.601182405659229, 'time_step': 0.05787825239876102, 'td_error': 1.25773708654654, 'init_value': 7.1428046226501465, 'ave_value': 7.1436553614124865} step=15916
2022-04-22 03:23.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:23.53 [info     ] CQL_20220422030638: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.0003389099429797575, 'time_algorithm_update': 0.059279900065736274, 'temp_loss': 1.3655931556844987, 'temp': 0.27047969125254306, 'alpha_loss': -100.96265863407554, 'alpha': 5.805928827021163, 'critic_loss': 2469.381429506864, 'actor_loss': -7.716172947359912, 'time_step': 0.05970943663161614, 'td_error': 1.2590106400625078, 'init_value': 7.277541160583496, 'ave_value': 7.278028231765942} step=16262
2022-04-22 03:23.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:24.15 [info     ] CQL_20220422030638: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.00035890265007239546, 'time_algorithm_update': 0.05964055915788419, 'temp_loss': 1.3281989221628, 'temp': 0.26305551981994874, 'alpha_loss': -104.89083156695945, 'alpha': 6.031956370855342, 'critic_loss': 2329.970705241826, 'actor_loss': -7.951483031917859, 'time_step': 0.060094417175116564, 'td_error': 1.2633668686575268, 'init_value': 7.646164894104004, 'ave_value': 7.643301703840593} step=16608
2022-04-22 03:24.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:24.37 [info     ] CQL_20220422030638: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.0003437837424305822, 'time_algorithm_update': 0.05995720935005673, 'temp_loss': 1.2917753840457498, 'temp': 0.25583275773621705, 'alpha_loss': -108.97272098960215, 'alpha': 6.266783872780772, 'critic_loss': 2039.6225194324647, 'actor_loss': -8.255222524521669, 'time_step': 0.06039538548860936, 'td_error': 1.2648451439080903, 'init_value': 7.827968597412109, 'ave_value': 7.826155109685877} step=16954
2022-04-22 03:24.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:24.58 [info     ] CQL_20220422030638: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.0003461203823199851, 'time_algorithm_update': 0.059397740860205855, 'temp_loss': 1.2564358656116992, 'temp': 0.2488090808508713, 'alpha_loss': -113.2189308342906, 'alpha': 6.510757166526221, 'critic_loss': 1950.1030393390986, 'actor_loss': -8.403481943758926, 'time_step': 0.059832273191110244, 'td_error': 1.2661758029128178, 'init_value': 7.978687286376953, 'ave_value': 7.977532991406589} step=17300
2022-04-22 03:24.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422030638/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.5191

Epoch 1/50:   0%|          | 0/177 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 03:25.00 [info     ] FQE_20220422032459: epoch=1 step=177 epoch=1 metrics={'time_sample_batch': 0.00016687953539487333, 'time_algorithm_update': 0.008675538887411861, 'loss': 0.006883965260585991, 'time_step': 0.0089175552971619, 'init_value': -0.07693496346473694, 'ave_value': -0.03182795298573849, 'soft_opc': nan} step=177




2022-04-22 03:25.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_177.pt


Epoch 2/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.02 [info     ] FQE_20220422032459: epoch=2 step=354 epoch=2 metrics={'time_sample_batch': 0.0001678938246042715, 'time_algorithm_update': 0.008861454193201442, 'loss': 0.00442558527387426, 'time_step': 0.009103242960353355, 'init_value': -0.1457357257604599, 'ave_value': -0.07023685032630468, 'soft_opc': nan} step=354




2022-04-22 03:25.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_354.pt


Epoch 3/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.04 [info     ] FQE_20220422032459: epoch=3 step=531 epoch=3 metrics={'time_sample_batch': 0.00017413850558006157, 'time_algorithm_update': 0.008411072068295236, 'loss': 0.003674562654121237, 'time_step': 0.008651248479293565, 'init_value': -0.2029791921377182, 'ave_value': -0.1132477138280958, 'soft_opc': nan} step=531




2022-04-22 03:25.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_531.pt


Epoch 4/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.06 [info     ] FQE_20220422032459: epoch=4 step=708 epoch=4 metrics={'time_sample_batch': 0.00017030495034772798, 'time_algorithm_update': 0.009134684578847077, 'loss': 0.0032392172182326095, 'time_step': 0.009379151177271611, 'init_value': -0.2696511149406433, 'ave_value': -0.15870529116989973, 'soft_opc': nan} step=708




2022-04-22 03:25.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_708.pt


Epoch 5/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.07 [info     ] FQE_20220422032459: epoch=5 step=885 epoch=5 metrics={'time_sample_batch': 0.00017060398381982147, 'time_algorithm_update': 0.009179010229595637, 'loss': 0.0028962088613323656, 'time_step': 0.00942160315432791, 'init_value': -0.32340332865715027, 'ave_value': -0.1932918124437869, 'soft_opc': nan} step=885




2022-04-22 03:25.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_885.pt


Epoch 6/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.09 [info     ] FQE_20220422032459: epoch=6 step=1062 epoch=6 metrics={'time_sample_batch': 0.00016830196488375044, 'time_algorithm_update': 0.008586211393108476, 'loss': 0.00267226753972134, 'time_step': 0.008826401274083025, 'init_value': -0.33341696858406067, 'ave_value': -0.1853970485999509, 'soft_opc': nan} step=1062




2022-04-22 03:25.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_1062.pt


Epoch 7/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.11 [info     ] FQE_20220422032459: epoch=7 step=1239 epoch=7 metrics={'time_sample_batch': 0.00018427331568831104, 'time_algorithm_update': 0.009101129521084371, 'loss': 0.0023622642689526585, 'time_step': 0.009358727999326199, 'init_value': -0.38553813099861145, 'ave_value': -0.21084162485827734, 'soft_opc': nan} step=1239




2022-04-22 03:25.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_1239.pt


Epoch 8/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.13 [info     ] FQE_20220422032459: epoch=8 step=1416 epoch=8 metrics={'time_sample_batch': 0.0001851825390831899, 'time_algorithm_update': 0.009101529579378117, 'loss': 0.0020129500281876006, 'time_step': 0.009361850339814095, 'init_value': -0.4579158127307892, 'ave_value': -0.28017017328524374, 'soft_opc': nan} step=1416




2022-04-22 03:25.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_1416.pt


Epoch 9/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.14 [info     ] FQE_20220422032459: epoch=9 step=1593 epoch=9 metrics={'time_sample_batch': 0.00017149030825512558, 'time_algorithm_update': 0.009132198021236786, 'loss': 0.0018332485865280663, 'time_step': 0.009383283765976039, 'init_value': -0.4682641327381134, 'ave_value': -0.27586917015793805, 'soft_opc': nan} step=1593




2022-04-22 03:25.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_1593.pt


Epoch 10/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.16 [info     ] FQE_20220422032459: epoch=10 step=1770 epoch=10 metrics={'time_sample_batch': 0.00017047736604334945, 'time_algorithm_update': 0.008763302517476056, 'loss': 0.0018172396296282736, 'time_step': 0.009003187976988023, 'init_value': -0.5376439690589905, 'ave_value': -0.33601765792380583, 'soft_opc': nan} step=1770




2022-04-22 03:25.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_1770.pt


Epoch 11/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.18 [info     ] FQE_20220422032459: epoch=11 step=1947 epoch=11 metrics={'time_sample_batch': 0.0001752457376253807, 'time_algorithm_update': 0.00913824065256927, 'loss': 0.0016422972147547676, 'time_step': 0.009388238023229911, 'init_value': -0.5828778147697449, 'ave_value': -0.370811000282879, 'soft_opc': nan} step=1947




2022-04-22 03:25.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_1947.pt


Epoch 12/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.20 [info     ] FQE_20220422032459: epoch=12 step=2124 epoch=12 metrics={'time_sample_batch': 0.00017016620959265757, 'time_algorithm_update': 0.009082484380000056, 'loss': 0.0016257465957784972, 'time_step': 0.009326491652235473, 'init_value': -0.6549479365348816, 'ave_value': -0.42264288031124136, 'soft_opc': nan} step=2124




2022-04-22 03:25.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_2124.pt


Epoch 13/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.21 [info     ] FQE_20220422032459: epoch=13 step=2301 epoch=13 metrics={'time_sample_batch': 0.00017234699874274474, 'time_algorithm_update': 0.00885007071629756, 'loss': 0.0016314177344734701, 'time_step': 0.009097464340554792, 'init_value': -0.6818008422851562, 'ave_value': -0.43565878944175024, 'soft_opc': nan} step=2301




2022-04-22 03:25.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_2301.pt


Epoch 14/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.23 [info     ] FQE_20220422032459: epoch=14 step=2478 epoch=14 metrics={'time_sample_batch': 0.00017346904776190634, 'time_algorithm_update': 0.00899365796881207, 'loss': 0.0017859283790300142, 'time_step': 0.00923843168269443, 'init_value': -0.7362893223762512, 'ave_value': -0.4809626321221615, 'soft_opc': nan} step=2478




2022-04-22 03:25.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_2478.pt


Epoch 15/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.25 [info     ] FQE_20220422032459: epoch=15 step=2655 epoch=15 metrics={'time_sample_batch': 0.00017059051384360103, 'time_algorithm_update': 0.009102005069538698, 'loss': 0.0018643111716969054, 'time_step': 0.009349478166655632, 'init_value': -0.8127617835998535, 'ave_value': -0.5391427536984463, 'soft_opc': nan} step=2655




2022-04-22 03:25.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_2655.pt


Epoch 16/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.26 [info     ] FQE_20220422032459: epoch=16 step=2832 epoch=16 metrics={'time_sample_batch': 0.00018529838087868555, 'time_algorithm_update': 0.009079666460974741, 'loss': 0.0019348739418806244, 'time_step': 0.009338501483033605, 'init_value': -0.86613529920578, 'ave_value': -0.5942292819286252, 'soft_opc': nan} step=2832




2022-04-22 03:25.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_2832.pt


Epoch 17/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.28 [info     ] FQE_20220422032459: epoch=17 step=3009 epoch=17 metrics={'time_sample_batch': 0.00017166002995550297, 'time_algorithm_update': 0.009198008284056928, 'loss': 0.002044801397372753, 'time_step': 0.009448134966489285, 'init_value': -0.903346836566925, 'ave_value': -0.621373773350551, 'soft_opc': nan} step=3009




2022-04-22 03:25.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_3009.pt


Epoch 18/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.30 [info     ] FQE_20220422032459: epoch=18 step=3186 epoch=18 metrics={'time_sample_batch': 0.00016894852374233095, 'time_algorithm_update': 0.009088923028633419, 'loss': 0.002210401287821969, 'time_step': 0.009333141779495497, 'init_value': -0.9304764270782471, 'ave_value': -0.6349661219294543, 'soft_opc': nan} step=3186




2022-04-22 03:25.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_3186.pt


Epoch 19/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.32 [info     ] FQE_20220422032459: epoch=19 step=3363 epoch=19 metrics={'time_sample_batch': 0.0001728844507939398, 'time_algorithm_update': 0.009314605745218568, 'loss': 0.0023239076054999658, 'time_step': 0.009562590701431877, 'init_value': -0.9555814266204834, 'ave_value': -0.6652655364279901, 'soft_opc': nan} step=3363




2022-04-22 03:25.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_3363.pt


Epoch 20/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.34 [info     ] FQE_20220422032459: epoch=20 step=3540 epoch=20 metrics={'time_sample_batch': 0.0001749467041532872, 'time_algorithm_update': 0.008855895134015272, 'loss': 0.0025209316087355257, 'time_step': 0.009103952828100172, 'init_value': -1.015355110168457, 'ave_value': -0.7168768043610247, 'soft_opc': nan} step=3540




2022-04-22 03:25.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_3540.pt


Epoch 21/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.35 [info     ] FQE_20220422032459: epoch=21 step=3717 epoch=21 metrics={'time_sample_batch': 0.0001731525033207263, 'time_algorithm_update': 0.009194704098890057, 'loss': 0.00265387687924197, 'time_step': 0.00944124642065016, 'init_value': -1.0583093166351318, 'ave_value': -0.7516975995813225, 'soft_opc': nan} step=3717




2022-04-22 03:25.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_3717.pt


Epoch 22/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.37 [info     ] FQE_20220422032459: epoch=22 step=3894 epoch=22 metrics={'time_sample_batch': 0.00018035894059865488, 'time_algorithm_update': 0.009099483489990234, 'loss': 0.002782631309316257, 'time_step': 0.009353843785948673, 'init_value': -1.0937556028366089, 'ave_value': -0.7777008918536318, 'soft_opc': nan} step=3894




2022-04-22 03:25.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_3894.pt


Epoch 23/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.39 [info     ] FQE_20220422032459: epoch=23 step=4071 epoch=23 metrics={'time_sample_batch': 0.00017068210968189995, 'time_algorithm_update': 0.008630984247067553, 'loss': 0.003020861393760227, 'time_step': 0.008876063729410117, 'init_value': -1.1181789636611938, 'ave_value': -0.7981130012766914, 'soft_opc': nan} step=4071




2022-04-22 03:25.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_4071.pt


Epoch 24/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.41 [info     ] FQE_20220422032459: epoch=24 step=4248 epoch=24 metrics={'time_sample_batch': 0.00016593394306419932, 'time_algorithm_update': 0.00914559930057849, 'loss': 0.0032301293630126633, 'time_step': 0.009387257408961064, 'init_value': -1.1552807092666626, 'ave_value': -0.8239474794662393, 'soft_opc': nan} step=4248




2022-04-22 03:25.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_4248.pt


Epoch 25/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.42 [info     ] FQE_20220422032459: epoch=25 step=4425 epoch=25 metrics={'time_sample_batch': 0.00016960047059139962, 'time_algorithm_update': 0.009119666902358922, 'loss': 0.0032414119676844673, 'time_step': 0.009366516339576851, 'init_value': -1.1751219034194946, 'ave_value': -0.836222087365908, 'soft_opc': nan} step=4425




2022-04-22 03:25.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_4425.pt


Epoch 26/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.44 [info     ] FQE_20220422032459: epoch=26 step=4602 epoch=26 metrics={'time_sample_batch': 0.0001739566609010858, 'time_algorithm_update': 0.009112923832262977, 'loss': 0.0034228515502796797, 'time_step': 0.009358486886751853, 'init_value': -1.2356315851211548, 'ave_value': -0.8915918563668792, 'soft_opc': nan} step=4602




2022-04-22 03:25.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_4602.pt


Epoch 27/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.46 [info     ] FQE_20220422032459: epoch=27 step=4779 epoch=27 metrics={'time_sample_batch': 0.00017123707270218154, 'time_algorithm_update': 0.008954254247374454, 'loss': 0.003509339253782521, 'time_step': 0.009201941517113293, 'init_value': -1.2443934679031372, 'ave_value': -0.8923966889266853, 'soft_opc': nan} step=4779




2022-04-22 03:25.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_4779.pt


Epoch 28/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.48 [info     ] FQE_20220422032459: epoch=28 step=4956 epoch=28 metrics={'time_sample_batch': 0.0001682090220478295, 'time_algorithm_update': 0.009155700435746188, 'loss': 0.003924887206755305, 'time_step': 0.009397576757743534, 'init_value': -1.298392415046692, 'ave_value': -0.9390495624415599, 'soft_opc': nan} step=4956




2022-04-22 03:25.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_4956.pt


Epoch 29/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.49 [info     ] FQE_20220422032459: epoch=29 step=5133 epoch=29 metrics={'time_sample_batch': 0.00016992644401593398, 'time_algorithm_update': 0.009179470902782375, 'loss': 0.00405721852718984, 'time_step': 0.009424798232687395, 'init_value': -1.343824028968811, 'ave_value': -0.9649109567227485, 'soft_opc': nan} step=5133




2022-04-22 03:25.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_5133.pt


Epoch 30/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.51 [info     ] FQE_20220422032459: epoch=30 step=5310 epoch=30 metrics={'time_sample_batch': 0.00017124380769029175, 'time_algorithm_update': 0.008901612233307403, 'loss': 0.004420761733926243, 'time_step': 0.009150774465442378, 'init_value': -1.454837441444397, 'ave_value': -1.0855747326112188, 'soft_opc': nan} step=5310




2022-04-22 03:25.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_5310.pt


Epoch 31/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.53 [info     ] FQE_20220422032459: epoch=31 step=5487 epoch=31 metrics={'time_sample_batch': 0.00017220152299956412, 'time_algorithm_update': 0.008683879496687549, 'loss': 0.004497219519005657, 'time_step': 0.008929146211699577, 'init_value': -1.452419400215149, 'ave_value': -1.0737479296845718, 'soft_opc': nan} step=5487




2022-04-22 03:25.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_5487.pt


Epoch 32/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.55 [info     ] FQE_20220422032459: epoch=32 step=5664 epoch=32 metrics={'time_sample_batch': 0.00016858887537724555, 'time_algorithm_update': 0.009215659340896175, 'loss': 0.004899878621148735, 'time_step': 0.009460803479124598, 'init_value': -1.5291167497634888, 'ave_value': -1.139629500552833, 'soft_opc': nan} step=5664




2022-04-22 03:25.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_5664.pt


Epoch 33/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.56 [info     ] FQE_20220422032459: epoch=33 step=5841 epoch=33 metrics={'time_sample_batch': 0.0001683410278147897, 'time_algorithm_update': 0.008994431145447122, 'loss': 0.005117425834825841, 'time_step': 0.009242993963640289, 'init_value': -1.6087560653686523, 'ave_value': -1.2097425389889482, 'soft_opc': nan} step=5841




2022-04-22 03:25.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_5841.pt


Epoch 34/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:25.58 [info     ] FQE_20220422032459: epoch=34 step=6018 epoch=34 metrics={'time_sample_batch': 0.00016714758792165982, 'time_algorithm_update': 0.009001345284241067, 'loss': 0.005213607224802839, 'time_step': 0.009239575283675545, 'init_value': -1.6500855684280396, 'ave_value': -1.2401862597367068, 'soft_opc': nan} step=6018




2022-04-22 03:25.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_6018.pt


Epoch 35/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:26.00 [info     ] FQE_20220422032459: epoch=35 step=6195 epoch=35 metrics={'time_sample_batch': 0.00018928818783517612, 'time_algorithm_update': 0.009236110805791651, 'loss': 0.005541613434821546, 'time_step': 0.009498923511828407, 'init_value': -1.6978139877319336, 'ave_value': -1.275298355260232, 'soft_opc': nan} step=6195




2022-04-22 03:26.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_6195.pt


Epoch 36/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:26.02 [info     ] FQE_20220422032459: epoch=36 step=6372 epoch=36 metrics={'time_sample_batch': 0.00018023771081267104, 'time_algorithm_update': 0.009234262725054208, 'loss': 0.0055762534945648125, 'time_step': 0.009486559420655676, 'init_value': -1.7512584924697876, 'ave_value': -1.3279431654347313, 'soft_opc': nan} step=6372




2022-04-22 03:26.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_6372.pt


Epoch 37/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:26.03 [info     ] FQE_20220422032459: epoch=37 step=6549 epoch=37 metrics={'time_sample_batch': 0.0001713178925595041, 'time_algorithm_update': 0.00886815281237586, 'loss': 0.005682127266112021, 'time_step': 0.009117231530658269, 'init_value': -1.7750712633132935, 'ave_value': -1.3344778651142264, 'soft_opc': nan} step=6549




2022-04-22 03:26.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_6549.pt


Epoch 38/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:26.05 [info     ] FQE_20220422032459: epoch=38 step=6726 epoch=38 metrics={'time_sample_batch': 0.00017093803923008805, 'time_algorithm_update': 0.009303934830056745, 'loss': 0.0058988508861117315, 'time_step': 0.009546958794028072, 'init_value': -1.782379150390625, 'ave_value': -1.3388029420921752, 'soft_opc': nan} step=6726




2022-04-22 03:26.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_6726.pt


Epoch 39/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:26.07 [info     ] FQE_20220422032459: epoch=39 step=6903 epoch=39 metrics={'time_sample_batch': 0.00017134752650718905, 'time_algorithm_update': 0.008624471513564977, 'loss': 0.006001452293337749, 'time_step': 0.00887143544558078, 'init_value': -1.828052043914795, 'ave_value': -1.3794546090357296, 'soft_opc': nan} step=6903




2022-04-22 03:26.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_6903.pt


Epoch 40/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:26.09 [info     ] FQE_20220422032459: epoch=40 step=7080 epoch=40 metrics={'time_sample_batch': 0.00017441464009258033, 'time_algorithm_update': 0.009524593245511675, 'loss': 0.006366901835827893, 'time_step': 0.009774613515131892, 'init_value': -1.8241499662399292, 'ave_value': -1.3662374124736398, 'soft_opc': nan} step=7080




2022-04-22 03:26.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_7080.pt


Epoch 41/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:26.11 [info     ] FQE_20220422032459: epoch=41 step=7257 epoch=41 metrics={'time_sample_batch': 0.00018703735480874272, 'time_algorithm_update': 0.00999136833147814, 'loss': 0.006664952747313316, 'time_step': 0.010255767800713663, 'init_value': -1.8640857934951782, 'ave_value': -1.3900589620655364, 'soft_opc': nan} step=7257




2022-04-22 03:26.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_7257.pt


Epoch 42/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:26.13 [info     ] FQE_20220422032459: epoch=42 step=7434 epoch=42 metrics={'time_sample_batch': 0.00017531308750648282, 'time_algorithm_update': 0.010046447064243468, 'loss': 0.006407001605625712, 'time_step': 0.010302430492336466, 'init_value': -1.872216820716858, 'ave_value': -1.3964675410060554, 'soft_opc': nan} step=7434




2022-04-22 03:26.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_7434.pt


Epoch 43/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:26.14 [info     ] FQE_20220422032459: epoch=43 step=7611 epoch=43 metrics={'time_sample_batch': 0.0001709865311444816, 'time_algorithm_update': 0.009786061647921632, 'loss': 0.006734643772272077, 'time_step': 0.010027935275923733, 'init_value': -1.9312496185302734, 'ave_value': -1.464888777380233, 'soft_opc': nan} step=7611




2022-04-22 03:26.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_7611.pt


Epoch 44/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:26.16 [info     ] FQE_20220422032459: epoch=44 step=7788 epoch=44 metrics={'time_sample_batch': 0.00017335185896878862, 'time_algorithm_update': 0.00926704325918424, 'loss': 0.0070617043335611624, 'time_step': 0.009512788158352091, 'init_value': -1.9489647150039673, 'ave_value': -1.4620633336964313, 'soft_opc': nan} step=7788




2022-04-22 03:26.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_7788.pt


Epoch 45/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:26.18 [info     ] FQE_20220422032459: epoch=45 step=7965 epoch=45 metrics={'time_sample_batch': 0.00017219613500907597, 'time_algorithm_update': 0.009978220287689382, 'loss': 0.007146036175755773, 'time_step': 0.010226597220210706, 'init_value': -2.0087709426879883, 'ave_value': -1.4947478718138314, 'soft_opc': nan} step=7965




2022-04-22 03:26.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_7965.pt


Epoch 46/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:26.20 [info     ] FQE_20220422032459: epoch=46 step=8142 epoch=46 metrics={'time_sample_batch': 0.00017613340905830684, 'time_algorithm_update': 0.009988138231180482, 'loss': 0.007336165994161735, 'time_step': 0.010239570154308599, 'init_value': -2.025953531265259, 'ave_value': -1.5274880479226958, 'soft_opc': nan} step=8142




2022-04-22 03:26.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_8142.pt


Epoch 47/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:26.22 [info     ] FQE_20220422032459: epoch=47 step=8319 epoch=47 metrics={'time_sample_batch': 0.0001671785688669668, 'time_algorithm_update': 0.009662210604565291, 'loss': 0.007422678257597842, 'time_step': 0.009901481833161608, 'init_value': -2.0629236698150635, 'ave_value': -1.556975305456299, 'soft_opc': nan} step=8319




2022-04-22 03:26.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_8319.pt


Epoch 48/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:26.24 [info     ] FQE_20220422032459: epoch=48 step=8496 epoch=48 metrics={'time_sample_batch': 0.0001695142627435889, 'time_algorithm_update': 0.009054314618730276, 'loss': 0.007954238661062393, 'time_step': 0.00929761471721412, 'init_value': -2.104910135269165, 'ave_value': -1.5883121524487172, 'soft_opc': nan} step=8496




2022-04-22 03:26.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_8496.pt


Epoch 49/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:26.26 [info     ] FQE_20220422032459: epoch=49 step=8673 epoch=49 metrics={'time_sample_batch': 0.00017613879704879502, 'time_algorithm_update': 0.009997342265931899, 'loss': 0.007959654024936322, 'time_step': 0.01025466460966121, 'init_value': -2.1635591983795166, 'ave_value': -1.615887977491628, 'soft_opc': nan} step=8673




2022-04-22 03:26.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_8673.pt


Epoch 50/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:26.28 [info     ] FQE_20220422032459: epoch=50 step=8850 epoch=50 metrics={'time_sample_batch': 0.00017420316146591963, 'time_algorithm_update': 0.009893368866484044, 'loss': 0.0080941726337187, 'time_step': 0.010143324480218402, 'init_value': -2.1705939769744873, 'ave_value': -1.6371786439919973, 'soft_opc': nan} step=8850




2022-04-22 03:26.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032459/model_8850.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-22 03:26.28 [info     ] Directory is created at d3rlpy_logs/FQE_20220422032628
2022-04-22 03:26.28 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 03:26.28 [debug    ] Building models...
2022-04-22 03:26.28 [debug    ] Models have been built.
2022-04-22 03:26.28 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422032628/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/355 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 03:26.32 [info     ] FQE_20220422032628: epoch=1 step=355 epoch=1 metrics={'time_sample_batch': 0.0001730744267853213, 'time_algorithm_update': 0.009794952500034386, 'loss': 0.025207238854237007, 'time_step': 0.010043864183022942, 'init_value': -1.07277512550354, 'ave_value': -1.0494174416921314, 'soft_opc': nan} step=355




2022-04-22 03:26.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_355.pt


Epoch 2/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:26.36 [info     ] FQE_20220422032628: epoch=2 step=710 epoch=2 metrics={'time_sample_batch': 0.00017491797326316295, 'time_algorithm_update': 0.009791818806822871, 'loss': 0.024128599294369488, 'time_step': 0.010044428999994841, 'init_value': -2.216568946838379, 'ave_value': -2.156262113949814, 'soft_opc': nan} step=710




2022-04-22 03:26.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_710.pt


Epoch 3/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:26.39 [info     ] FQE_20220422032628: epoch=3 step=1065 epoch=3 metrics={'time_sample_batch': 0.0001783478427940691, 'time_algorithm_update': 0.009685554638714858, 'loss': 0.025802692818179936, 'time_step': 0.00994212526670644, 'init_value': -2.722695827484131, 'ave_value': -2.596800054193617, 'soft_opc': nan} step=1065




2022-04-22 03:26.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_1065.pt


Epoch 4/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:26.43 [info     ] FQE_20220422032628: epoch=4 step=1420 epoch=4 metrics={'time_sample_batch': 0.00018425323593784386, 'time_algorithm_update': 0.009738757576740963, 'loss': 0.03085820806330778, 'time_step': 0.01000115434888383, 'init_value': -3.6638131141662598, 'ave_value': -3.4995168562868235, 'soft_opc': nan} step=1420




2022-04-22 03:26.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_1420.pt


Epoch 5/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:26.47 [info     ] FQE_20220422032628: epoch=5 step=1775 epoch=5 metrics={'time_sample_batch': 0.0001879537609261526, 'time_algorithm_update': 0.00990509180955484, 'loss': 0.03684572004254016, 'time_step': 0.010170254908816916, 'init_value': -4.177981853485107, 'ave_value': -3.929025273049967, 'soft_opc': nan} step=1775




2022-04-22 03:26.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_1775.pt


Epoch 6/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:26.51 [info     ] FQE_20220422032628: epoch=6 step=2130 epoch=6 metrics={'time_sample_batch': 0.00017548950625137543, 'time_algorithm_update': 0.00970603177245234, 'loss': 0.04606554148010385, 'time_step': 0.00995898851206605, 'init_value': -4.921982288360596, 'ave_value': -4.53261610375408, 'soft_opc': nan} step=2130




2022-04-22 03:26.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_2130.pt


Epoch 7/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:26.55 [info     ] FQE_20220422032628: epoch=7 step=2485 epoch=7 metrics={'time_sample_batch': 0.00017755266646264306, 'time_algorithm_update': 0.009682830622498419, 'loss': 0.05802934019183609, 'time_step': 0.009937451590954417, 'init_value': -5.401552677154541, 'ave_value': -4.938246271010378, 'soft_opc': nan} step=2485




2022-04-22 03:26.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_2485.pt


Epoch 8/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:26.58 [info     ] FQE_20220422032628: epoch=8 step=2840 epoch=8 metrics={'time_sample_batch': 0.0001755721132520219, 'time_algorithm_update': 0.009634533734388754, 'loss': 0.07667946922527233, 'time_step': 0.009888335348854603, 'init_value': -6.038367748260498, 'ave_value': -5.455742522219299, 'soft_opc': nan} step=2840




2022-04-22 03:26.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_2840.pt


Epoch 9/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:27.02 [info     ] FQE_20220422032628: epoch=9 step=3195 epoch=9 metrics={'time_sample_batch': 0.0001779515978316186, 'time_algorithm_update': 0.009735833423238405, 'loss': 0.09239296556883295, 'time_step': 0.009991174348643128, 'init_value': -6.493292808532715, 'ave_value': -5.87041097766836, 'soft_opc': nan} step=3195




2022-04-22 03:27.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_3195.pt


Epoch 10/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:27.06 [info     ] FQE_20220422032628: epoch=10 step=3550 epoch=10 metrics={'time_sample_batch': 0.00017896504469320807, 'time_algorithm_update': 0.009653677738888164, 'loss': 0.11230425957432935, 'time_step': 0.009906360465036311, 'init_value': -7.158973217010498, 'ave_value': -6.43453045314372, 'soft_opc': nan} step=3550




2022-04-22 03:27.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_3550.pt


Epoch 11/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:27.10 [info     ] FQE_20220422032628: epoch=11 step=3905 epoch=11 metrics={'time_sample_batch': 0.0001756177821629484, 'time_algorithm_update': 0.009735956326336928, 'loss': 0.13276274100890462, 'time_step': 0.009989297222083722, 'init_value': -7.800282001495361, 'ave_value': -7.085431286953438, 'soft_opc': nan} step=3905




2022-04-22 03:27.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_3905.pt


Epoch 12/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:27.13 [info     ] FQE_20220422032628: epoch=12 step=4260 epoch=12 metrics={'time_sample_batch': 0.00017660033534949935, 'time_algorithm_update': 0.009587835258161518, 'loss': 0.16231595075781077, 'time_step': 0.009840043161956358, 'init_value': -7.8861083984375, 'ave_value': -7.21953793829319, 'soft_opc': nan} step=4260




2022-04-22 03:27.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_4260.pt


Epoch 13/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:27.17 [info     ] FQE_20220422032628: epoch=13 step=4615 epoch=13 metrics={'time_sample_batch': 0.00017830217388314261, 'time_algorithm_update': 0.009915302169155068, 'loss': 0.1936725020198755, 'time_step': 0.010173875513211103, 'init_value': -8.35090446472168, 'ave_value': -7.712055360684063, 'soft_opc': nan} step=4615




2022-04-22 03:27.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_4615.pt


Epoch 14/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:27.21 [info     ] FQE_20220422032628: epoch=14 step=4970 epoch=14 metrics={'time_sample_batch': 0.0001791900312396842, 'time_algorithm_update': 0.009676088413722079, 'loss': 0.2223821510049239, 'time_step': 0.009931771184357119, 'init_value': -8.146037101745605, 'ave_value': -7.642021358351641, 'soft_opc': nan} step=4970




2022-04-22 03:27.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_4970.pt


Epoch 15/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:27.25 [info     ] FQE_20220422032628: epoch=15 step=5325 epoch=15 metrics={'time_sample_batch': 0.00018049696801413954, 'time_algorithm_update': 0.00978928888347787, 'loss': 0.255088451940199, 'time_step': 0.010047228235593984, 'init_value': -8.557574272155762, 'ave_value': -8.220182636819368, 'soft_opc': nan} step=5325




2022-04-22 03:27.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_5325.pt


Epoch 16/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:27.28 [info     ] FQE_20220422032628: epoch=16 step=5680 epoch=16 metrics={'time_sample_batch': 0.00018385430456886829, 'time_algorithm_update': 0.009759312616267675, 'loss': 0.281096774056344, 'time_step': 0.010023624796262929, 'init_value': -8.41676139831543, 'ave_value': -8.20105460820722, 'soft_opc': nan} step=5680




2022-04-22 03:27.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_5680.pt


Epoch 17/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:27.32 [info     ] FQE_20220422032628: epoch=17 step=6035 epoch=17 metrics={'time_sample_batch': 0.00017914772033691406, 'time_algorithm_update': 0.00968191254306847, 'loss': 0.3254263020527195, 'time_step': 0.009937545615182796, 'init_value': -8.652352333068848, 'ave_value': -8.628534286066008, 'soft_opc': nan} step=6035




2022-04-22 03:27.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_6035.pt


Epoch 18/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:27.36 [info     ] FQE_20220422032628: epoch=18 step=6390 epoch=18 metrics={'time_sample_batch': 0.00018125386305258308, 'time_algorithm_update': 0.010081485963203537, 'loss': 0.35838855191750424, 'time_step': 0.010342066724535444, 'init_value': -8.99270248413086, 'ave_value': -9.036802223567854, 'soft_opc': nan} step=6390




2022-04-22 03:27.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_6390.pt


Epoch 19/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:27.40 [info     ] FQE_20220422032628: epoch=19 step=6745 epoch=19 metrics={'time_sample_batch': 0.0001765701132760921, 'time_algorithm_update': 0.009449930594001017, 'loss': 0.39913862822446183, 'time_step': 0.00970481214388995, 'init_value': -9.11363410949707, 'ave_value': -9.313736605946277, 'soft_opc': nan} step=6745




2022-04-22 03:27.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_6745.pt


Epoch 20/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:27.44 [info     ] FQE_20220422032628: epoch=20 step=7100 epoch=20 metrics={'time_sample_batch': 0.00018680599373830875, 'time_algorithm_update': 0.010036758637764084, 'loss': 0.4508205922976346, 'time_step': 0.010304120560766945, 'init_value': -9.075496673583984, 'ave_value': -9.455260544897392, 'soft_opc': nan} step=7100




2022-04-22 03:27.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_7100.pt


Epoch 21/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:27.47 [info     ] FQE_20220422032628: epoch=21 step=7455 epoch=21 metrics={'time_sample_batch': 0.00017722828287473867, 'time_algorithm_update': 0.009551329008290466, 'loss': 0.5196741116508632, 'time_step': 0.009803847863640584, 'init_value': -9.176666259765625, 'ave_value': -9.907357457145915, 'soft_opc': nan} step=7455




2022-04-22 03:27.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_7455.pt


Epoch 22/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:27.51 [info     ] FQE_20220422032628: epoch=22 step=7810 epoch=22 metrics={'time_sample_batch': 0.00017793816579899318, 'time_algorithm_update': 0.009802960677885674, 'loss': 0.5899464907675562, 'time_step': 0.010061452086542693, 'init_value': -9.460149765014648, 'ave_value': -10.450511904342275, 'soft_opc': nan} step=7810




2022-04-22 03:27.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_7810.pt


Epoch 23/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:27.55 [info     ] FQE_20220422032628: epoch=23 step=8165 epoch=23 metrics={'time_sample_batch': 0.0001811329747589541, 'time_algorithm_update': 0.00961155421297315, 'loss': 0.6554000434111541, 'time_step': 0.009872249818184006, 'init_value': -9.320480346679688, 'ave_value': -10.699904509164883, 'soft_opc': nan} step=8165




2022-04-22 03:27.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_8165.pt


Epoch 24/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:27.59 [info     ] FQE_20220422032628: epoch=24 step=8520 epoch=24 metrics={'time_sample_batch': 0.00017910742423903774, 'time_algorithm_update': 0.009741448012875839, 'loss': 0.7502292537668221, 'time_step': 0.009998087815835442, 'init_value': -9.372068405151367, 'ave_value': -11.157101708538399, 'soft_opc': nan} step=8520




2022-04-22 03:27.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_8520.pt


Epoch 25/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:28.02 [info     ] FQE_20220422032628: epoch=25 step=8875 epoch=25 metrics={'time_sample_batch': 0.00017852447402309365, 'time_algorithm_update': 0.009695505088483783, 'loss': 0.8269871286103423, 'time_step': 0.009953489437909193, 'init_value': -9.405779838562012, 'ave_value': -11.682632483407241, 'soft_opc': nan} step=8875




2022-04-22 03:28.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_8875.pt


Epoch 26/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:28.06 [info     ] FQE_20220422032628: epoch=26 step=9230 epoch=26 metrics={'time_sample_batch': 0.0001798253663828675, 'time_algorithm_update': 0.00986150217727876, 'loss': 0.8972541006203268, 'time_step': 0.010117998929090903, 'init_value': -9.131044387817383, 'ave_value': -11.828830952241422, 'soft_opc': nan} step=9230




2022-04-22 03:28.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_9230.pt


Epoch 27/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:28.10 [info     ] FQE_20220422032628: epoch=27 step=9585 epoch=27 metrics={'time_sample_batch': 0.0001766829423501458, 'time_algorithm_update': 0.009554918719009615, 'loss': 0.9466465795438894, 'time_step': 0.009808672649759641, 'init_value': -8.903672218322754, 'ave_value': -12.160109220234201, 'soft_opc': nan} step=9585




2022-04-22 03:28.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_9585.pt


Epoch 28/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:28.14 [info     ] FQE_20220422032628: epoch=28 step=9940 epoch=28 metrics={'time_sample_batch': 0.00017616110788264746, 'time_algorithm_update': 0.009792057225401973, 'loss': 0.989047910000237, 'time_step': 0.010042475310849472, 'init_value': -8.698873519897461, 'ave_value': -12.477443146110156, 'soft_opc': nan} step=9940




2022-04-22 03:28.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_9940.pt


Epoch 29/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:28.18 [info     ] FQE_20220422032628: epoch=29 step=10295 epoch=29 metrics={'time_sample_batch': 0.00017586828957141286, 'time_algorithm_update': 0.009588931312023754, 'loss': 1.0507860974736616, 'time_step': 0.009836861785029022, 'init_value': -8.791924476623535, 'ave_value': -13.075533526690924, 'soft_opc': nan} step=10295




2022-04-22 03:28.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_10295.pt


Epoch 30/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:28.21 [info     ] FQE_20220422032628: epoch=30 step=10650 epoch=30 metrics={'time_sample_batch': 0.000186989340983646, 'time_algorithm_update': 0.009995862128029408, 'loss': 1.1024407064830752, 'time_step': 0.010262704231369664, 'init_value': -8.291876792907715, 'ave_value': -12.966175164883547, 'soft_opc': nan} step=10650




2022-04-22 03:28.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_10650.pt


Epoch 31/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:28.25 [info     ] FQE_20220422032628: epoch=31 step=11005 epoch=31 metrics={'time_sample_batch': 0.0001817709963086625, 'time_algorithm_update': 0.009737576229471556, 'loss': 1.1198903596107388, 'time_step': 0.009993457794189453, 'init_value': -8.020910263061523, 'ave_value': -13.221288130480792, 'soft_opc': nan} step=11005




2022-04-22 03:28.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_11005.pt


Epoch 32/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:28.29 [info     ] FQE_20220422032628: epoch=32 step=11360 epoch=32 metrics={'time_sample_batch': 0.00018230626280878632, 'time_algorithm_update': 0.009516813385654504, 'loss': 1.1788829867583765, 'time_step': 0.009775142938318387, 'init_value': -8.18753719329834, 'ave_value': -13.691801487711981, 'soft_opc': nan} step=11360




2022-04-22 03:28.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_11360.pt


Epoch 33/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:28.33 [info     ] FQE_20220422032628: epoch=33 step=11715 epoch=33 metrics={'time_sample_batch': 0.00017823568532164668, 'time_algorithm_update': 0.009921915430418203, 'loss': 1.203920591769504, 'time_step': 0.010177814456778513, 'init_value': -7.838464260101318, 'ave_value': -13.94850173553205, 'soft_opc': nan} step=11715




2022-04-22 03:28.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_11715.pt


Epoch 34/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:28.36 [info     ] FQE_20220422032628: epoch=34 step=12070 epoch=34 metrics={'time_sample_batch': 0.0001783330675581811, 'time_algorithm_update': 0.009540875528899716, 'loss': 1.215590593050903, 'time_step': 0.009793957186416841, 'init_value': -7.775859355926514, 'ave_value': -14.079249564670457, 'soft_opc': nan} step=12070




2022-04-22 03:28.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_12070.pt


Epoch 35/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:28.40 [info     ] FQE_20220422032628: epoch=35 step=12425 epoch=35 metrics={'time_sample_batch': 0.0001728635438731019, 'time_algorithm_update': 0.00993648649941028, 'loss': 1.2244903978957258, 'time_step': 0.010180535114986795, 'init_value': -7.571971416473389, 'ave_value': -14.10159783082723, 'soft_opc': nan} step=12425




2022-04-22 03:28.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_12425.pt


Epoch 36/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:28.44 [info     ] FQE_20220422032628: epoch=36 step=12780 epoch=36 metrics={'time_sample_batch': 0.00017733573913574218, 'time_algorithm_update': 0.009437348137439136, 'loss': 1.2719947615139922, 'time_step': 0.00968984885954521, 'init_value': -7.91758394241333, 'ave_value': -14.383090043819703, 'soft_opc': nan} step=12780




2022-04-22 03:28.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_12780.pt


Epoch 37/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:28.48 [info     ] FQE_20220422032628: epoch=37 step=13135 epoch=37 metrics={'time_sample_batch': 0.0001758528427338936, 'time_algorithm_update': 0.009830946317860778, 'loss': 1.2752135400620985, 'time_step': 0.010079625626684914, 'init_value': -8.293951034545898, 'ave_value': -14.805953818168419, 'soft_opc': nan} step=13135




2022-04-22 03:28.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_13135.pt


Epoch 38/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:28.52 [info     ] FQE_20220422032628: epoch=38 step=13490 epoch=38 metrics={'time_sample_batch': 0.00018623446075009627, 'time_algorithm_update': 0.009673351637074645, 'loss': 1.2768855370895964, 'time_step': 0.009934542884289379, 'init_value': -8.145854949951172, 'ave_value': -14.726686164774623, 'soft_opc': nan} step=13490




2022-04-22 03:28.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_13490.pt


Epoch 39/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:28.55 [info     ] FQE_20220422032628: epoch=39 step=13845 epoch=39 metrics={'time_sample_batch': 0.000184806635682012, 'time_algorithm_update': 0.009853214613148864, 'loss': 1.2901256975153803, 'time_step': 0.0101180654176524, 'init_value': -8.984699249267578, 'ave_value': -15.411738308986104, 'soft_opc': nan} step=13845




2022-04-22 03:28.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_13845.pt


Epoch 40/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:28.59 [info     ] FQE_20220422032628: epoch=40 step=14200 epoch=40 metrics={'time_sample_batch': 0.0001773330527292171, 'time_algorithm_update': 0.00960861864224286, 'loss': 1.26758618203687, 'time_step': 0.009864011952574823, 'init_value': -9.055277824401855, 'ave_value': -15.624755988383193, 'soft_opc': nan} step=14200




2022-04-22 03:28.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_14200.pt


Epoch 41/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:29.03 [info     ] FQE_20220422032628: epoch=41 step=14555 epoch=41 metrics={'time_sample_batch': 0.00018091336102552818, 'time_algorithm_update': 0.009701483686205367, 'loss': 1.2712431093651644, 'time_step': 0.009960886458276023, 'init_value': -9.345582962036133, 'ave_value': -15.81838988335935, 'soft_opc': nan} step=14555




2022-04-22 03:29.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_14555.pt


Epoch 42/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:29.07 [info     ] FQE_20220422032628: epoch=42 step=14910 epoch=42 metrics={'time_sample_batch': 0.0001798690204889002, 'time_algorithm_update': 0.009660599936901684, 'loss': 1.263373025685129, 'time_step': 0.00991666552046655, 'init_value': -9.530654907226562, 'ave_value': -15.973969561833902, 'soft_opc': nan} step=14910




2022-04-22 03:29.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_14910.pt


Epoch 43/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:29.10 [info     ] FQE_20220422032628: epoch=43 step=15265 epoch=43 metrics={'time_sample_batch': 0.00018330291962959398, 'time_algorithm_update': 0.00995129128577004, 'loss': 1.2811213063731999, 'time_step': 0.010213090332461075, 'init_value': -10.066134452819824, 'ave_value': -16.506871877209388, 'soft_opc': nan} step=15265




2022-04-22 03:29.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_15265.pt


Epoch 44/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:29.14 [info     ] FQE_20220422032628: epoch=44 step=15620 epoch=44 metrics={'time_sample_batch': 0.0001766466758620571, 'time_algorithm_update': 0.009501097235881106, 'loss': 1.2603131668248646, 'time_step': 0.009754159416950924, 'init_value': -10.23521900177002, 'ave_value': -16.493280313803577, 'soft_opc': nan} step=15620




2022-04-22 03:29.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_15620.pt


Epoch 45/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:29.18 [info     ] FQE_20220422032628: epoch=45 step=15975 epoch=45 metrics={'time_sample_batch': 0.00018298458045637103, 'time_algorithm_update': 0.009919314988901917, 'loss': 1.2224211165183028, 'time_step': 0.010180052905015542, 'init_value': -10.453883171081543, 'ave_value': -16.75188549214751, 'soft_opc': nan} step=15975




2022-04-22 03:29.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_15975.pt


Epoch 46/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:29.22 [info     ] FQE_20220422032628: epoch=46 step=16330 epoch=46 metrics={'time_sample_batch': 0.00018457426151759188, 'time_algorithm_update': 0.009856723060070629, 'loss': 1.1959036710186743, 'time_step': 0.01011804459800183, 'init_value': -10.426652908325195, 'ave_value': -16.665299543549462, 'soft_opc': nan} step=16330




2022-04-22 03:29.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_16330.pt


Epoch 47/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:29.26 [info     ] FQE_20220422032628: epoch=47 step=16685 epoch=47 metrics={'time_sample_batch': 0.00017810472300354863, 'time_algorithm_update': 0.009599875732206962, 'loss': 1.1849156446020368, 'time_step': 0.00985363973698146, 'init_value': -10.59758472442627, 'ave_value': -16.814946358675854, 'soft_opc': nan} step=16685




2022-04-22 03:29.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_16685.pt


Epoch 48/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:29.29 [info     ] FQE_20220422032628: epoch=48 step=17040 epoch=48 metrics={'time_sample_batch': 0.00017976693704094686, 'time_algorithm_update': 0.009771946114553532, 'loss': 1.1517296144974902, 'time_step': 0.0100309754761172, 'init_value': -10.421406745910645, 'ave_value': -16.362422568948897, 'soft_opc': nan} step=17040




2022-04-22 03:29.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_17040.pt


Epoch 49/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:29.33 [info     ] FQE_20220422032628: epoch=49 step=17395 epoch=49 metrics={'time_sample_batch': 0.00018257356025803257, 'time_algorithm_update': 0.009072690614512269, 'loss': 1.132205378862334, 'time_step': 0.00933120284281986, 'init_value': -10.698129653930664, 'ave_value': -16.28152731606303, 'soft_opc': nan} step=17395




2022-04-22 03:29.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_17395.pt


Epoch 50/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:29.37 [info     ] FQE_20220422032628: epoch=50 step=17750 epoch=50 metrics={'time_sample_batch': 0.00018046540273746974, 'time_algorithm_update': 0.00992511426898795, 'loss': 1.1095200052458636, 'time_step': 0.010182157704527949, 'init_value': -11.1481351852417, 'ave_value': -16.430929364867154, 'soft_opc': nan} step=17750




2022-04-22 03:29.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422032628/model_17750.pt
search iteration:  16
using hyper params:  [0.0012113859961386786, 0.0008942139322357279, 7.482193884505977e-05, 5]
2022-04-22 03:29.37 [debug    ] RoundIterator is selected.
2022-04-22 03:29.37 [info     ] Directory is created at d3rlpy_logs/CQL_20220422032937
2022-04-22 03:29.37 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 03:29.37 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 03:29.37 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422032937/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.0012113859961386786, 'actor_optim_factory': {'op

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:30.00 [info     ] CQL_20220422032937: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.00042965990959564386, 'time_algorithm_update': 0.06382246375772994, 'temp_loss': 4.652606362552312, 'temp': 0.9863304382114741, 'alpha_loss': -17.536722954987102, 'alpha': 1.0177477536173913, 'critic_loss': 180.80333215790677, 'actor_loss': -0.7197144866054286, 'time_step': 0.06435310151535652, 'td_error': 1.2981897120960741, 'init_value': -1.9189485311508179, 'ave_value': -1.615359197600072} step=346
2022-04-22 03:30.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:30.23 [info     ] CQL_20220422032937: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.00040572847245056505, 'time_algorithm_update': 0.06391082609319962, 'temp_loss': 4.837076497215756, 'temp': 0.960076779127121, 'alpha_loss': -18.339927144133288, 'alpha': 1.0543683195389764, 'critic_loss': 124.54923953899758, 'actor_loss': 3.1113474968540875, 'time_step': 0.06441803267925461, 'td_error': 1.2851341895693464, 'init_value': -6.553714275360107, 'ave_value': -5.860471671531414} step=692
2022-04-22 03:30.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:30.46 [info     ] CQL_20220422032937: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.0004393971724317253, 'time_algorithm_update': 0.062486177234980414, 'temp_loss': 4.723225109839026, 'temp': 0.9354507943798351, 'alpha_loss': -19.02448864203657, 'alpha': 1.0928119000671916, 'critic_loss': 148.49862573877235, 'actor_loss': 7.645862900452807, 'time_step': 0.06302818948822904, 'td_error': 1.3814017853521154, 'init_value': -10.300880432128906, 'ave_value': -9.416395365892287} step=1038
2022-04-22 03:30.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:31.09 [info     ] CQL_20220422032937: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.00041231599157256197, 'time_algorithm_update': 0.06263903593052329, 'temp_loss': 4.605471375360654, 'temp': 0.9119468882593805, 'alpha_loss': -19.720801403067703, 'alpha': 1.1330760893105083, 'critic_loss': 209.83823425645772, 'actor_loss': 12.031396102354016, 'time_step': 0.0631520699903455, 'td_error': 1.5132125710886983, 'init_value': -14.606783866882324, 'ave_value': -13.593267596870195} step=1384
2022-04-22 03:31.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:31.32 [info     ] CQL_20220422032937: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.00043634114237879054, 'time_algorithm_update': 0.06275888123264202, 'temp_loss': 4.4914635233796405, 'temp': 0.8893309860904782, 'alpha_loss': -20.45181984984117, 'alpha': 1.1752478249500253, 'critic_loss': 286.92865938396125, 'actor_loss': 15.943071164147701, 'time_step': 0.06330329765474176, 'td_error': 1.6285119329808644, 'init_value': -17.748680114746094, 'ave_value': -16.73442457162017} step=1730
2022-04-22 03:31.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:31.54 [info     ] CQL_20220422032937: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.0004099187134318269, 'time_algorithm_update': 0.06275539660040354, 'temp_loss': 4.380700070044898, 'temp': 0.8674855867906802, 'alpha_loss': -21.227025985717773, 'alpha': 1.2193732485605802, 'critic_loss': 377.67476834864976, 'actor_loss': 19.19828412711965, 'time_step': 0.06326790283181075, 'td_error': 1.7496406178266528, 'init_value': -20.724945068359375, 'ave_value': -19.66576127363868} step=2076
2022-04-22 03:31.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:32.16 [info     ] CQL_20220422032937: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.0004218417096000186, 'time_algorithm_update': 0.058512414810974474, 'temp_loss': 4.274543926205938, 'temp': 0.8463332647877622, 'alpha_loss': -22.02223833447936, 'alpha': 1.2655031656943305, 'critic_loss': 480.024586936642, 'actor_loss': 21.61705137263833, 'time_step': 0.059035703625982205, 'td_error': 1.8425043502022964, 'init_value': -22.628456115722656, 'ave_value': -21.605198669931006} step=2422
2022-04-22 03:32.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:32.37 [info     ] CQL_20220422032937: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.00041942651561229904, 'time_algorithm_update': 0.058681147636016666, 'temp_loss': 4.17095866230871, 'temp': 0.8258138328273862, 'alpha_loss': -22.861305969987992, 'alpha': 1.31366276189771, 'critic_loss': 593.2040394689307, 'actor_loss': 23.213368399294815, 'time_step': 0.05920222246577974, 'td_error': 1.894760244368495, 'init_value': -24.005168914794922, 'ave_value': -23.06424566642432} step=2768
2022-04-22 03:32.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:32.58 [info     ] CQL_20220422032937: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.00043641900740606937, 'time_algorithm_update': 0.05813262641774437, 'temp_loss': 4.069171971668398, 'temp': 0.805879796171464, 'alpha_loss': -23.733409787878134, 'alpha': 1.3639050756575744, 'critic_loss': 713.5671279113417, 'actor_loss': 23.956666549506217, 'time_step': 0.05867284915350765, 'td_error': 1.9236333259526024, 'init_value': -24.4302921295166, 'ave_value': -23.60483529627634} step=3114
2022-04-22 03:32.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:33.19 [info     ] CQL_20220422032937: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.00041712295113271374, 'time_algorithm_update': 0.05663561131912849, 'temp_loss': 3.9711383091921064, 'temp': 0.7865011969398212, 'alpha_loss': -24.64014529630628, 'alpha': 1.4162755460408383, 'critic_loss': 840.7129855293759, 'actor_loss': 23.799640765768945, 'time_step': 0.05715791820790726, 'td_error': 1.8806211138554039, 'init_value': -23.867921829223633, 'ave_value': -23.09623921639889} step=3460
2022-04-22 03:33.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:33.39 [info     ] CQL_20220422032937: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.00041906957681468455, 'time_algorithm_update': 0.05618368890244148, 'temp_loss': 3.8769790506087287, 'temp': 0.7676379658238737, 'alpha_loss': -25.582226356329947, 'alpha': 1.4708228903698783, 'critic_loss': 972.7883041470037, 'actor_loss': 22.697357558101587, 'time_step': 0.05670719753111029, 'td_error': 1.811988130331547, 'init_value': -22.534191131591797, 'ave_value': -21.920628036600316} step=3806
2022-04-22 03:33.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:34.00 [info     ] CQL_20220422032937: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.0004351538729805478, 'time_algorithm_update': 0.05708681779100716, 'temp_loss': 3.78385550989581, 'temp': 0.7492655314117498, 'alpha_loss': -26.569556037814632, 'alpha': 1.5276065613493064, 'critic_loss': 1104.8499350134348, 'actor_loss': 20.590138066021694, 'time_step': 0.057618917757376084, 'td_error': 1.6921709569650232, 'init_value': -19.958911895751953, 'ave_value': -19.453326539825976} step=4152
2022-04-22 03:34.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:34.21 [info     ] CQL_20220422032937: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.0004643360314341639, 'time_algorithm_update': 0.05687881756380114, 'temp_loss': 3.693044860928045, 'temp': 0.7313671060380219, 'alpha_loss': -27.595231287741246, 'alpha': 1.586701813116239, 'critic_loss': 1241.5534865539198, 'actor_loss': 17.502555968444472, 'time_step': 0.05743824402031871, 'td_error': 1.5681501612040347, 'init_value': -16.682662963867188, 'ave_value': -16.35332964919313} step=4498
2022-04-22 03:34.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:34.41 [info     ] CQL_20220422032937: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.00041569588501329366, 'time_algorithm_update': 0.05713379934343989, 'temp_loss': 3.605226866082649, 'temp': 0.7139190590450529, 'alpha_loss': -28.662227652665507, 'alpha': 1.648175535518999, 'critic_loss': 1378.71039030042, 'actor_loss': 13.461661245092492, 'time_step': 0.05764798966446364, 'td_error': 1.4331023791094555, 'init_value': -12.364800453186035, 'ave_value': -12.161654901753222} step=4844
2022-04-22 03:34.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:35.02 [info     ] CQL_20220422032937: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.0004487520008417912, 'time_algorithm_update': 0.05683259881300733, 'temp_loss': 3.518713042915212, 'temp': 0.6969033562379077, 'alpha_loss': -29.772653320621203, 'alpha': 1.7121041182837735, 'critic_loss': 1519.6654197384166, 'actor_loss': 9.062752368133193, 'time_step': 0.05738301704384688, 'td_error': 1.340466448063217, 'init_value': -8.277161598205566, 'ave_value': -8.152809703287891} step=5190
2022-04-22 03:35.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:35.23 [info     ] CQL_20220422032937: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.0004181062555037482, 'time_algorithm_update': 0.05753839360496212, 'temp_loss': 3.43598626459265, 'temp': 0.6803072885626313, 'alpha_loss': -30.92567642851372, 'alpha': 1.778568359124178, 'critic_loss': 1666.299427539627, 'actor_loss': 5.8295499437806235, 'time_step': 0.058059428468605, 'td_error': 1.307081534269815, 'init_value': -6.154156684875488, 'ave_value': -6.085074053357822} step=5536
2022-04-22 03:35.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:35.44 [info     ] CQL_20220422032937: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.0004283940860990844, 'time_algorithm_update': 0.05865396791799909, 'temp_loss': 3.3537982020074923, 'temp': 0.6641158840559811, 'alpha_loss': -32.131183398252276, 'alpha': 1.8476631417439853, 'critic_loss': 1812.9237896693235, 'actor_loss': 4.46750771930452, 'time_step': 0.05918660742699066, 'td_error': 1.2973230644849196, 'init_value': -5.374889373779297, 'ave_value': -5.327860383594019} step=5882
2022-04-22 03:35.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:36.06 [info     ] CQL_20220422032937: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.00042787935003379865, 'time_algorithm_update': 0.05829304628978575, 'temp_loss': 3.2740486612209696, 'temp': 0.6483184164659136, 'alpha_loss': -33.3762488943993, 'alpha': 1.9194813742803012, 'critic_loss': 1960.3462002263593, 'actor_loss': 3.993646534881151, 'time_step': 0.058815258086761295, 'td_error': 1.2949572849490483, 'init_value': -5.117566108703613, 'ave_value': -5.084169065042263} step=6228
2022-04-22 03:36.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:36.28 [info     ] CQL_20220422032937: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.0004151260232649787, 'time_algorithm_update': 0.06135575206293536, 'temp_loss': 3.1964692538873307, 'temp': 0.632900049231645, 'alpha_loss': -34.67658599677114, 'alpha': 1.9941139028251516, 'critic_loss': 2107.8019666726877, 'actor_loss': 3.822595070552275, 'time_step': 0.06186480811565598, 'td_error': 1.293121773208869, 'init_value': -4.940649032592773, 'ave_value': -4.909223575601085} step=6574
2022-04-22 03:36.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:36.50 [info     ] CQL_20220422032937: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.00043871981560150323, 'time_algorithm_update': 0.061124690695305094, 'temp_loss': 3.120582501323237, 'temp': 0.6178527495075512, 'alpha_loss': -36.02289679422544, 'alpha': 2.071674525393227, 'critic_loss': 2257.346266200777, 'actor_loss': 3.77488933064345, 'time_step': 0.0616611614392672, 'td_error': 1.2941829682055377, 'init_value': -4.954137325286865, 'ave_value': -4.930730750154917} step=6920
2022-04-22 03:36.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:37.12 [info     ] CQL_20220422032937: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.00042703730522552667, 'time_algorithm_update': 0.06067560242779682, 'temp_loss': 3.0466588404826345, 'temp': 0.6031656325552505, 'alpha_loss': -37.42672951097433, 'alpha': 2.1522696541912985, 'critic_loss': 2416.6074536273936, 'actor_loss': 3.7693693548268667, 'time_step': 0.06119771568761396, 'td_error': 1.2948558160005426, 'init_value': -4.9725117683410645, 'ave_value': -4.9513597847444295} step=7266
2022-04-22 03:37.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:37.35 [info     ] CQL_20220422032937: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.00041396149321098547, 'time_algorithm_update': 0.0613352039645862, 'temp_loss': 2.973854202066543, 'temp': 0.5888319675288448, 'alpha_loss': -38.8807415824405, 'alpha': 2.23601216043351, 'critic_loss': 2572.8747114060243, 'actor_loss': 3.8073317453351323, 'time_step': 0.06184758754134867, 'td_error': 1.2953600570217254, 'init_value': -4.961658477783203, 'ave_value': -4.944945811547052} step=7612
2022-04-22 03:37.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:37.57 [info     ] CQL_20220422032937: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.00043039170303785733, 'time_algorithm_update': 0.061226476134592395, 'temp_loss': 2.9026209329594077, 'temp': 0.5748433656775194, 'alpha_loss': -40.400357869319144, 'alpha': 2.3230267701121425, 'critic_loss': 2733.277401609917, 'actor_loss': 3.8315757602625498, 'time_step': 0.06175180046544599, 'td_error': 1.295943989777599, 'init_value': -4.973935127258301, 'ave_value': -4.956590041553539} step=7958
2022-04-22 03:37.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:38.20 [info     ] CQL_20220422032937: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.00041890833419182396, 'time_algorithm_update': 0.06417886439086384, 'temp_loss': 2.834608607898558, 'temp': 0.561184559254288, 'alpha_loss': -41.96537978111664, 'alpha': 2.4134368593293116, 'critic_loss': 2890.857718230672, 'actor_loss': 3.9287170240644773, 'time_step': 0.06469635880751416, 'td_error': 1.2974757528029057, 'init_value': -5.043832778930664, 'ave_value': -5.029218816508497} step=8304
2022-04-22 03:38.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:38.43 [info     ] CQL_20220422032937: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.0004278683248971928, 'time_algorithm_update': 0.06399216541665138, 'temp_loss': 2.7672129449127727, 'temp': 0.5478513860978143, 'alpha_loss': -43.59449986364111, 'alpha': 2.5073507576319525, 'critic_loss': 3055.258830693416, 'actor_loss': 4.009464573997983, 'time_step': 0.06451721274094775, 'td_error': 1.2981584414472263, 'init_value': -5.062875270843506, 'ave_value': -5.05084897842154} step=8650
2022-04-22 03:38.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:39.07 [info     ] CQL_20220422032937: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.0004727165133966876, 'time_algorithm_update': 0.06387152148119976, 'temp_loss': 2.701579172487204, 'temp': 0.5348356025067368, 'alpha_loss': -45.300616754961844, 'alpha': 2.604940688678984, 'critic_loss': 3223.3255777524387, 'actor_loss': 4.112413181734912, 'time_step': 0.06444298463060677, 'td_error': 1.3005228033812348, 'init_value': -5.196233749389648, 'ave_value': -5.1864051457997915} step=8996
2022-04-22 03:39.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:39.30 [info     ] CQL_20220422032937: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.0004455926101331766, 'time_algorithm_update': 0.06388552202654711, 'temp_loss': 2.6366578733300887, 'temp': 0.5221298581602946, 'alpha_loss': -47.058971118375744, 'alpha': 2.706333403642467, 'critic_loss': 3385.8399679371387, 'actor_loss': 4.240926340136225, 'time_step': 0.0644276155901782, 'td_error': 1.3021252285829552, 'init_value': -5.281915187835693, 'ave_value': -5.273829716718157} step=9342
2022-04-22 03:39.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:39.53 [info     ] CQL_20220422032937: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.0004133723374736102, 'time_algorithm_update': 0.06265718675073172, 'temp_loss': 2.5742793413945018, 'temp': 0.5097284985415508, 'alpha_loss': -48.892584828283056, 'alpha': 2.8116837671037356, 'critic_loss': 3547.1903605378434, 'actor_loss': 4.380812417565053, 'time_step': 0.06317022701219327, 'td_error': 1.3040446062689657, 'init_value': -5.387065887451172, 'ave_value': -5.379270395864068} step=9688
2022-04-22 03:39.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:40.16 [info     ] CQL_20220422032937: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.00042954896915854744, 'time_algorithm_update': 0.06384306146919383, 'temp_loss': 2.5135349441814974, 'temp': 0.49761970814941936, 'alpha_loss': -50.795424819681685, 'alpha': 2.921141654080738, 'critic_loss': 3698.7243976923774, 'actor_loss': 4.530051709599578, 'time_step': 0.06437172021479964, 'td_error': 1.307544143765384, 'init_value': -5.595874786376953, 'ave_value': -5.588270639661027} step=10034
2022-04-22 03:40.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:40.39 [info     ] CQL_20220422032937: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.00041482007572416624, 'time_algorithm_update': 0.06386354272765231, 'temp_loss': 2.4536127148336067, 'temp': 0.4858002410286424, 'alpha_loss': -52.77230712581921, 'alpha': 3.034856731491971, 'critic_loss': 3849.9881810535585, 'actor_loss': 4.689906293946194, 'time_step': 0.0643809875311879, 'td_error': 1.3084892698218002, 'init_value': -5.630553245544434, 'ave_value': -5.625521170126425} step=10380
2022-04-22 03:40.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:41.02 [info     ] CQL_20220422032937: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.0004201720904752698, 'time_algorithm_update': 0.06285055524351969, 'temp_loss': 2.3950092627133936, 'temp': 0.47426141807109634, 'alpha_loss': -54.83013500367975, 'alpha': 3.152995627739526, 'critic_loss': 3985.663541055139, 'actor_loss': 4.836886618178704, 'time_step': 0.06337160182137021, 'td_error': 1.3125719021330908, 'init_value': -5.876426696777344, 'ave_value': -5.869888000886211} step=10726
2022-04-22 03:41.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:41.25 [info     ] CQL_20220422032937: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.00042277126643009956, 'time_algorithm_update': 0.06282352781020148, 'temp_loss': 2.3385965203963264, 'temp': 0.4629960054951596, 'alpha_loss': -56.96309127146109, 'alpha': 3.2757388312003517, 'critic_loss': 4123.280631181133, 'actor_loss': 5.034555275316183, 'time_step': 0.06334581333777808, 'td_error': 1.3142873038662046, 'init_value': -5.959018707275391, 'ave_value': -5.954232728939138} step=11072
2022-04-22 03:41.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:41.48 [info     ] CQL_20220422032937: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.00042985560577039775, 'time_algorithm_update': 0.06279424918180256, 'temp_loss': 2.282996852962957, 'temp': 0.451997967383076, 'alpha_loss': -59.18119419516856, 'alpha': 3.4032565417317295, 'critic_loss': 4246.4328472159505, 'actor_loss': 5.184431348921936, 'time_step': 0.06332174959899373, 'td_error': 1.3187473931894165, 'init_value': -6.213627815246582, 'ave_value': -6.208492525045541} step=11418
2022-04-22 03:41.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:42.11 [info     ] CQL_20220422032937: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.00043952051614750326, 'time_algorithm_update': 0.06318475331874251, 'temp_loss': 2.228692487485147, 'temp': 0.441261152742226, 'alpha_loss': -61.48364248992391, 'alpha': 3.5357375124286365, 'critic_loss': 4357.044145705383, 'actor_loss': 5.40167760159928, 'time_step': 0.06372554040368582, 'td_error': 1.3222696567917596, 'init_value': -6.404572486877441, 'ave_value': -6.400835586012455} step=11764
2022-04-22 03:42.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:42.33 [info     ] CQL_20220422032937: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.0004270896746244045, 'time_algorithm_update': 0.06265159562833048, 'temp_loss': 2.175971483219566, 'temp': 0.43077980050806364, 'alpha_loss': -63.880082169020106, 'alpha': 3.6733806663854964, 'critic_loss': 4467.84668533237, 'actor_loss': 5.6066235682867855, 'time_step': 0.06317837183186084, 'td_error': 1.325280890216816, 'init_value': -6.562437534332275, 'ave_value': -6.558766330051558} step=12110
2022-04-22 03:42.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:42.56 [info     ] CQL_20220422032937: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.0004166275090564882, 'time_algorithm_update': 0.06272672022009171, 'temp_loss': 2.1243713363746686, 'temp': 0.42054628240579817, 'alpha_loss': -66.37092495515857, 'alpha': 3.816382338545915, 'critic_loss': 4559.2799149882585, 'actor_loss': 5.820936752881618, 'time_step': 0.06324428630012997, 'td_error': 1.3275705900557575, 'init_value': -6.671037673950195, 'ave_value': -6.667534969722337} step=12456
2022-04-22 03:42.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:43.19 [info     ] CQL_20220422032937: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.0004416428549441299, 'time_algorithm_update': 0.06224432225861301, 'temp_loss': 2.0735894069506253, 'temp': 0.4105574872796935, 'alpha_loss': -68.94645873935237, 'alpha': 3.964940978612514, 'critic_loss': 4638.8592324670335, 'actor_loss': 6.025782152407431, 'time_step': 0.06278801997962026, 'td_error': 1.3325986127505731, 'init_value': -6.938459873199463, 'ave_value': -6.9354498764190025} step=12802
2022-04-22 03:43.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:43.41 [info     ] CQL_20220422032937: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.0004477693855417946, 'time_algorithm_update': 0.061970422033629666, 'temp_loss': 2.0244159298825126, 'temp': 0.40080568986821036, 'alpha_loss': -71.6302845023271, 'alpha': 4.11927616665129, 'critic_loss': 4789.2174714369585, 'actor_loss': 6.272793495586153, 'time_step': 0.0625166307294989, 'td_error': 1.3351026257525938, 'init_value': -7.053134918212891, 'ave_value': -7.050594472726864} step=13148
2022-04-22 03:43.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:44.04 [info     ] CQL_20220422032937: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.00042076331342575865, 'time_algorithm_update': 0.062344874949813577, 'temp_loss': 1.9764786782981343, 'temp': 0.39128592205529955, 'alpha_loss': -74.42357986097392, 'alpha': 4.279633214708008, 'critic_loss': 4844.656295158959, 'actor_loss': 6.4521744072092755, 'time_step': 0.06286562040362055, 'td_error': 1.3410741072072125, 'init_value': -7.360439300537109, 'ave_value': -7.357680253543917} step=13494
2022-04-22 03:44.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:44.27 [info     ] CQL_20220422032937: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.0004224363779056968, 'time_algorithm_update': 0.06194814092162027, 'temp_loss': 1.9299927505454577, 'temp': 0.3819898807072226, 'alpha_loss': -77.31138006662357, 'alpha': 4.44623618043227, 'critic_loss': 4961.442245924404, 'actor_loss': 6.726070758235248, 'time_step': 0.062470574599470016, 'td_error': 1.3441604484010292, 'init_value': -7.5010528564453125, 'ave_value': -7.499501807410438} step=13840
2022-04-22 03:44.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:44.49 [info     ] CQL_20220422032937: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.0004536960855384783, 'time_algorithm_update': 0.06225974504658253, 'temp_loss': 1.8831000090334458, 'temp': 0.3729173035463157, 'alpha_loss': -80.33255346662047, 'alpha': 4.619311843993347, 'critic_loss': 4958.223553784321, 'actor_loss': 6.877047392674264, 'time_step': 0.06281437212332136, 'td_error': 1.3486202149531956, 'init_value': -7.7179789543151855, 'ave_value': -7.716305273570626} step=14186
2022-04-22 03:44.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:45.12 [info     ] CQL_20220422032937: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.0004102632489507598, 'time_algorithm_update': 0.06219503231820343, 'temp_loss': 1.8387203895287707, 'temp': 0.36406076204225507, 'alpha_loss': -83.45405717530002, 'alpha': 4.799133875466495, 'critic_loss': 4943.691355446171, 'actor_loss': 7.102002735082814, 'time_step': 0.0627020184015263, 'td_error': 1.351055274028579, 'init_value': -7.820895671844482, 'ave_value': -7.819911075734934} step=14532
2022-04-22 03:45.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:45.33 [info     ] CQL_20220422032937: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.000421038941840905, 'time_algorithm_update': 0.05954975337651423, 'temp_loss': 1.794979425868547, 'temp': 0.35541278916287283, 'alpha_loss': -86.70133394450811, 'alpha': 4.985960140393648, 'critic_loss': 4835.95071746297, 'actor_loss': 7.312459271767236, 'time_step': 0.060069512769666024, 'td_error': 1.3567891224007607, 'init_value': -8.093478202819824, 'ave_value': -8.092163591127138} step=14878
2022-04-22 03:45.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:45.55 [info     ] CQL_20220422032937: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.0004563255806189741, 'time_algorithm_update': 0.059291126411085186, 'temp_loss': 1.7525160229964063, 'temp': 0.34697129461117565, 'alpha_loss': -90.08122837750209, 'alpha': 5.180053967271926, 'critic_loss': 4880.126061235549, 'actor_loss': 7.610322384475973, 'time_step': 0.05984820175722155, 'td_error': 1.3626878964869074, 'init_value': -8.363472938537598, 'ave_value': -8.36243916742606} step=15224
2022-04-22 03:45.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:46.17 [info     ] CQL_20220422032937: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.0004391601319946995, 'time_algorithm_update': 0.06063574518082459, 'temp_loss': 1.711027886137108, 'temp': 0.3387295332430415, 'alpha_loss': -93.57395097148212, 'alpha': 5.3816983727361425, 'critic_loss': 4928.609861870033, 'actor_loss': 7.853450500896211, 'time_step': 0.06117297597014146, 'td_error': 1.3668604098756438, 'init_value': -8.542170524597168, 'ave_value': -8.541002776397807} step=15570
2022-04-22 03:46.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:46.39 [info     ] CQL_20220422032937: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.000408958148405042, 'time_algorithm_update': 0.0602386128695714, 'temp_loss': 1.6701580534091574, 'temp': 0.3306833913043744, 'alpha_loss': -97.22746792831862, 'alpha': 5.59117015110964, 'critic_loss': 4672.674564780527, 'actor_loss': 7.973221190402962, 'time_step': 0.060749838117919217, 'td_error': 1.3706747626492974, 'init_value': -8.704277038574219, 'ave_value': -8.70390066053699} step=15916
2022-04-22 03:46.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:47.01 [info     ] CQL_20220422032937: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.00040715898392517443, 'time_algorithm_update': 0.06024657301820083, 'temp_loss': 1.6307503581047058, 'temp': 0.32282813875316885, 'alpha_loss': -101.01172128578142, 'alpha': 5.808813344536489, 'critic_loss': 4450.0826493632585, 'actor_loss': 8.211947645066102, 'time_step': 0.06075411931627748, 'td_error': 1.3764888778259474, 'init_value': -8.959332466125488, 'ave_value': -8.958418418265678} step=16262
2022-04-22 03:47.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:47.23 [info     ] CQL_20220422032937: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.00047172080574697155, 'time_algorithm_update': 0.060659520198844075, 'temp_loss': 1.5917882888303327, 'temp': 0.3151589847369001, 'alpha_loss': -104.9353251154023, 'alpha': 6.034912383625273, 'critic_loss': 4311.112369603505, 'actor_loss': 8.483799937143491, 'time_step': 0.0612240540498943, 'td_error': 1.3825881971711484, 'init_value': -9.215768814086914, 'ave_value': -9.214584749419862} step=16608
2022-04-22 03:47.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:47.45 [info     ] CQL_20220422032937: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.00042523745167462123, 'time_algorithm_update': 0.06087007205610331, 'temp_loss': 1.5542479195346721, 'temp': 0.30767192136001037, 'alpha_loss': -109.02186180952657, 'alpha': 6.269826583090545, 'critic_loss': 4172.194525040643, 'actor_loss': 8.750663363175585, 'time_step': 0.06139301771373418, 'td_error': 1.3921747410612249, 'init_value': -9.62574577331543, 'ave_value': -9.62411240179316} step=16954
2022-04-22 03:47.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:48.07 [info     ] CQL_20220422032937: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.00041867680632310105, 'time_algorithm_update': 0.06019085680129211, 'temp_loss': 1.517048587688821, 'temp': 0.30036316545023395, 'alpha_loss': -113.25816473106428, 'alpha': 6.513880859220648, 'critic_loss': 4305.547864263457, 'actor_loss': 9.168705628786473, 'time_step': 0.06070411205291748, 'td_error': 1.3974944832433882, 'init_value': -9.828285217285156, 'ave_value': -9.827326941184943} step=17300
2022-04-22 03:48.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422032937/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.5191

Epoch 1/50:   0%|          | 0/177 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 03:48.09 [info     ] FQE_20220422034807: epoch=1 step=177 epoch=1 metrics={'time_sample_batch': 0.00017206547623973783, 'time_algorithm_update': 0.009116478558987548, 'loss': 0.006821257890975576, 'time_step': 0.009364661523851297, 'init_value': -0.3399529755115509, 'ave_value': -0.2946371183112577, 'soft_opc': nan} step=177




2022-04-22 03:48.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_177.pt


Epoch 2/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:48.11 [info     ] FQE_20220422034807: epoch=2 step=354 epoch=2 metrics={'time_sample_batch': 0.00017340035088318217, 'time_algorithm_update': 0.009098369522956805, 'loss': 0.004450615796676211, 'time_step': 0.009343104173908125, 'init_value': -0.41713854670524597, 'ave_value': -0.3293669098624596, 'soft_opc': nan} step=354




2022-04-22 03:48.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_354.pt


Epoch 3/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:48.13 [info     ] FQE_20220422034807: epoch=3 step=531 epoch=3 metrics={'time_sample_batch': 0.00016635959431276484, 'time_algorithm_update': 0.008829533043554274, 'loss': 0.0037521105496433832, 'time_step': 0.009064530248695848, 'init_value': -0.49509596824645996, 'ave_value': -0.3740531927382982, 'soft_opc': nan} step=531




2022-04-22 03:48.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_531.pt


Epoch 4/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:48.14 [info     ] FQE_20220422034807: epoch=4 step=708 epoch=4 metrics={'time_sample_batch': 0.0001680393003474521, 'time_algorithm_update': 0.009227908937271032, 'loss': 0.003313639077573678, 'time_step': 0.009464945496812378, 'init_value': -0.5245413780212402, 'ave_value': -0.3851643452773223, 'soft_opc': nan} step=708




2022-04-22 03:48.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_708.pt


Epoch 5/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:48.16 [info     ] FQE_20220422034807: epoch=5 step=885 epoch=5 metrics={'time_sample_batch': 0.00016664381081101586, 'time_algorithm_update': 0.009224479481325312, 'loss': 0.00310134581661136, 'time_step': 0.009464148074220129, 'init_value': -0.5477592945098877, 'ave_value': -0.3870530451024259, 'soft_opc': nan} step=885




2022-04-22 03:48.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_885.pt


Epoch 6/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:48.18 [info     ] FQE_20220422034807: epoch=6 step=1062 epoch=6 metrics={'time_sample_batch': 0.0001715226361980546, 'time_algorithm_update': 0.00870702495682711, 'loss': 0.002830849554965872, 'time_step': 0.008950112229686672, 'init_value': -0.6000843644142151, 'ave_value': -0.4038754439479238, 'soft_opc': nan} step=1062




2022-04-22 03:48.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_1062.pt


Epoch 7/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:48.20 [info     ] FQE_20220422034807: epoch=7 step=1239 epoch=7 metrics={'time_sample_batch': 0.00017042079214322364, 'time_algorithm_update': 0.008891795314637954, 'loss': 0.0025867004924858945, 'time_step': 0.009137728793472893, 'init_value': -0.6473038792610168, 'ave_value': -0.4237858332939692, 'soft_opc': nan} step=1239




2022-04-22 03:48.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_1239.pt


Epoch 8/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:48.21 [info     ] FQE_20220422034807: epoch=8 step=1416 epoch=8 metrics={'time_sample_batch': 0.00016813224318337305, 'time_algorithm_update': 0.009068413642840197, 'loss': 0.0022587440829029528, 'time_step': 0.009310741209040928, 'init_value': -0.7108884453773499, 'ave_value': -0.46044469400569127, 'soft_opc': nan} step=1416




2022-04-22 03:48.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_1416.pt


Epoch 9/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:48.23 [info     ] FQE_20220422034807: epoch=9 step=1593 epoch=9 metrics={'time_sample_batch': 0.00016520252335543013, 'time_algorithm_update': 0.009181022644042969, 'loss': 0.002107112913124137, 'time_step': 0.009418884913126627, 'init_value': -0.7438684105873108, 'ave_value': -0.4783296999183145, 'soft_opc': nan} step=1593




2022-04-22 03:48.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_1593.pt


Epoch 10/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:48.25 [info     ] FQE_20220422034807: epoch=10 step=1770 epoch=10 metrics={'time_sample_batch': 0.0001689215837898901, 'time_algorithm_update': 0.008970450546781895, 'loss': 0.00208025398410299, 'time_step': 0.009210283473386602, 'init_value': -0.8119921088218689, 'ave_value': -0.5087392603536626, 'soft_opc': nan} step=1770




2022-04-22 03:48.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_1770.pt


Epoch 11/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:48.27 [info     ] FQE_20220422034807: epoch=11 step=1947 epoch=11 metrics={'time_sample_batch': 0.0001704733250504833, 'time_algorithm_update': 0.009007517227345267, 'loss': 0.0021357364070570192, 'time_step': 0.00925236502609684, 'init_value': -0.913779079914093, 'ave_value': -0.5760054390351693, 'soft_opc': nan} step=1947




2022-04-22 03:48.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_1947.pt


Epoch 12/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:48.28 [info     ] FQE_20220422034807: epoch=12 step=2124 epoch=12 metrics={'time_sample_batch': 0.000167152975912148, 'time_algorithm_update': 0.009159371004266254, 'loss': 0.0023691841055202365, 'time_step': 0.00940251350402832, 'init_value': -1.0260649919509888, 'ave_value': -0.6574689470253907, 'soft_opc': nan} step=2124




2022-04-22 03:48.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_2124.pt


Epoch 13/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:48.30 [info     ] FQE_20220422034807: epoch=13 step=2301 epoch=13 metrics={'time_sample_batch': 0.000171188580787788, 'time_algorithm_update': 0.008868771084284379, 'loss': 0.002426082392665163, 'time_step': 0.009113204007768362, 'init_value': -1.0724519491195679, 'ave_value': -0.6792443355819484, 'soft_opc': nan} step=2301




2022-04-22 03:48.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_2301.pt


Epoch 14/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:48.32 [info     ] FQE_20220422034807: epoch=14 step=2478 epoch=14 metrics={'time_sample_batch': 0.00016801774838549942, 'time_algorithm_update': 0.009221137580225023, 'loss': 0.0028309742341312647, 'time_step': 0.0094637736088812, 'init_value': -1.180579662322998, 'ave_value': -0.7540253195139739, 'soft_opc': nan} step=2478




2022-04-22 03:48.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_2478.pt


Epoch 15/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:48.34 [info     ] FQE_20220422034807: epoch=15 step=2655 epoch=15 metrics={'time_sample_batch': 0.0001710902499613789, 'time_algorithm_update': 0.00875248477957343, 'loss': 0.0028368115963119654, 'time_step': 0.008997042973836264, 'init_value': -1.2570234537124634, 'ave_value': -0.7903030306637824, 'soft_opc': nan} step=2655




2022-04-22 03:48.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_2655.pt


Epoch 16/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:48.35 [info     ] FQE_20220422034807: epoch=16 step=2832 epoch=16 metrics={'time_sample_batch': 0.00016809183325471177, 'time_algorithm_update': 0.009049116554906813, 'loss': 0.0031027870356647226, 'time_step': 0.009293109010168388, 'init_value': -1.3392611742019653, 'ave_value': -0.8279265282121865, 'soft_opc': nan} step=2832




2022-04-22 03:48.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_2832.pt


Epoch 17/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:48.37 [info     ] FQE_20220422034807: epoch=17 step=3009 epoch=17 metrics={'time_sample_batch': 0.00017379098019357456, 'time_algorithm_update': 0.009001981067118671, 'loss': 0.003479041041257807, 'time_step': 0.009251684792297708, 'init_value': -1.4346729516983032, 'ave_value': -0.8939848140463815, 'soft_opc': nan} step=3009




2022-04-22 03:48.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_3009.pt


Epoch 18/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:48.39 [info     ] FQE_20220422034807: epoch=18 step=3186 epoch=18 metrics={'time_sample_batch': 0.000169454994848219, 'time_algorithm_update': 0.009215412840331342, 'loss': 0.0036833344353297145, 'time_step': 0.009459205939944855, 'init_value': -1.4966325759887695, 'ave_value': -0.9269825515804349, 'soft_opc': nan} step=3186




2022-04-22 03:48.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_3186.pt


Epoch 19/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:48.41 [info     ] FQE_20220422034807: epoch=19 step=3363 epoch=19 metrics={'time_sample_batch': 0.00017105118703033965, 'time_algorithm_update': 0.00914005371136854, 'loss': 0.004231666379192539, 'time_step': 0.009384510880809719, 'init_value': -1.5470284223556519, 'ave_value': -0.9504161068716565, 'soft_opc': nan} step=3363




2022-04-22 03:48.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_3363.pt


Epoch 20/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:48.42 [info     ] FQE_20220422034807: epoch=20 step=3540 epoch=20 metrics={'time_sample_batch': 0.00017964503185897223, 'time_algorithm_update': 0.008933212797520525, 'loss': 0.0045695975768158875, 'time_step': 0.009185450225226623, 'init_value': -1.5837537050247192, 'ave_value': -0.9520875572173803, 'soft_opc': nan} step=3540




2022-04-22 03:48.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_3540.pt


Epoch 21/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:48.44 [info     ] FQE_20220422034807: epoch=21 step=3717 epoch=21 metrics={'time_sample_batch': 0.00016030888099454892, 'time_algorithm_update': 0.00895871011550817, 'loss': 0.004938288167905652, 'time_step': 0.00918866012055995, 'init_value': -1.6683391332626343, 'ave_value': -0.9941842756024352, 'soft_opc': nan} step=3717




2022-04-22 03:48.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_3717.pt


Epoch 22/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:48.46 [info     ] FQE_20220422034807: epoch=22 step=3894 epoch=22 metrics={'time_sample_batch': 0.00016375045991886807, 'time_algorithm_update': 0.009139862437706208, 'loss': 0.005380233982906777, 'time_step': 0.009373798208721614, 'init_value': -1.74883234500885, 'ave_value': -1.067269586568122, 'soft_opc': nan} step=3894




2022-04-22 03:48.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_3894.pt


Epoch 23/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:48.48 [info     ] FQE_20220422034807: epoch=23 step=4071 epoch=23 metrics={'time_sample_batch': 0.0001584931282000353, 'time_algorithm_update': 0.008329473646347132, 'loss': 0.0061045778385168835, 'time_step': 0.008559174456838833, 'init_value': -1.7939066886901855, 'ave_value': -1.0708986229724713, 'soft_opc': nan} step=4071




2022-04-22 03:48.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_4071.pt


Epoch 24/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:48.49 [info     ] FQE_20220422034807: epoch=24 step=4248 epoch=24 metrics={'time_sample_batch': 0.00016736310754118667, 'time_algorithm_update': 0.009259791023987161, 'loss': 0.006437979524463184, 'time_step': 0.009500402515217408, 'init_value': -1.9325624704360962, 'ave_value': -1.17812787314972, 'soft_opc': nan} step=4248




2022-04-22 03:48.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_4248.pt


Epoch 25/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:48.51 [info     ] FQE_20220422034807: epoch=25 step=4425 epoch=25 metrics={'time_sample_batch': 0.0001736886083742993, 'time_algorithm_update': 0.009345456031756212, 'loss': 0.006502159263433541, 'time_step': 0.00959789685610324, 'init_value': -2.016751527786255, 'ave_value': -1.2162753195018023, 'soft_opc': nan} step=4425




2022-04-22 03:48.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_4425.pt


Epoch 26/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:48.53 [info     ] FQE_20220422034807: epoch=26 step=4602 epoch=26 metrics={'time_sample_batch': 0.00017149030825512558, 'time_algorithm_update': 0.009215113806859248, 'loss': 0.007517985059053632, 'time_step': 0.00946335065162788, 'init_value': -2.0722787380218506, 'ave_value': -1.25081030898266, 'soft_opc': nan} step=4602




2022-04-22 03:48.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_4602.pt


Epoch 27/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:48.55 [info     ] FQE_20220422034807: epoch=27 step=4779 epoch=27 metrics={'time_sample_batch': 0.0001669792132189045, 'time_algorithm_update': 0.008962703963457528, 'loss': 0.008039113331130984, 'time_step': 0.009206350240330239, 'init_value': -2.1619069576263428, 'ave_value': -1.3096955608856213, 'soft_opc': nan} step=4779




2022-04-22 03:48.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_4779.pt


Epoch 28/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:48.56 [info     ] FQE_20220422034807: epoch=28 step=4956 epoch=28 metrics={'time_sample_batch': 0.00016903338459251963, 'time_algorithm_update': 0.00919761496075129, 'loss': 0.008493817048798457, 'time_step': 0.00944399025480626, 'init_value': -2.175603151321411, 'ave_value': -1.3007703275114924, 'soft_opc': nan} step=4956




2022-04-22 03:48.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_4956.pt


Epoch 29/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:48.58 [info     ] FQE_20220422034807: epoch=29 step=5133 epoch=29 metrics={'time_sample_batch': 0.00018656455864340572, 'time_algorithm_update': 0.009176541182954433, 'loss': 0.008811973224263796, 'time_step': 0.009440742643539515, 'init_value': -2.2320444583892822, 'ave_value': -1.3117228519719641, 'soft_opc': nan} step=5133




2022-04-22 03:48.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_5133.pt


Epoch 30/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:49.00 [info     ] FQE_20220422034807: epoch=30 step=5310 epoch=30 metrics={'time_sample_batch': 0.00016966377947963564, 'time_algorithm_update': 0.00905623004934882, 'loss': 0.009390271550615312, 'time_step': 0.009300156501726916, 'init_value': -2.3302547931671143, 'ave_value': -1.4047205514736003, 'soft_opc': nan} step=5310




2022-04-22 03:49.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_5310.pt


Epoch 31/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:49.02 [info     ] FQE_20220422034807: epoch=31 step=5487 epoch=31 metrics={'time_sample_batch': 0.000173710160336252, 'time_algorithm_update': 0.008823249299647444, 'loss': 0.009798306209498248, 'time_step': 0.00907615753216932, 'init_value': -2.3575053215026855, 'ave_value': -1.4202549190671594, 'soft_opc': nan} step=5487




2022-04-22 03:49.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_5487.pt


Epoch 32/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:49.04 [info     ] FQE_20220422034807: epoch=32 step=5664 epoch=32 metrics={'time_sample_batch': 0.00017765012838072696, 'time_algorithm_update': 0.009233990631534555, 'loss': 0.010265589557853817, 'time_step': 0.009491544658854857, 'init_value': -2.4653451442718506, 'ave_value': -1.520909071827794, 'soft_opc': nan} step=5664




2022-04-22 03:49.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_5664.pt


Epoch 33/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:49.05 [info     ] FQE_20220422034807: epoch=33 step=5841 epoch=33 metrics={'time_sample_batch': 0.00017148087927177128, 'time_algorithm_update': 0.008976044627906238, 'loss': 0.011043150024948968, 'time_step': 0.009227388996188924, 'init_value': -2.50109601020813, 'ave_value': -1.5260029867873177, 'soft_opc': nan} step=5841




2022-04-22 03:49.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_5841.pt


Epoch 34/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:49.07 [info     ] FQE_20220422034807: epoch=34 step=6018 epoch=34 metrics={'time_sample_batch': 0.00017091918126337945, 'time_algorithm_update': 0.009264924431924766, 'loss': 0.011398965938445601, 'time_step': 0.009511899139921544, 'init_value': -2.6174542903900146, 'ave_value': -1.616354804300331, 'soft_opc': nan} step=6018




2022-04-22 03:49.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_6018.pt


Epoch 35/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:49.09 [info     ] FQE_20220422034807: epoch=35 step=6195 epoch=35 metrics={'time_sample_batch': 0.0001712451546879138, 'time_algorithm_update': 0.009271476228358382, 'loss': 0.012145116939051551, 'time_step': 0.009516736208382299, 'init_value': -2.6440303325653076, 'ave_value': -1.64240127071604, 'soft_opc': nan} step=6195




2022-04-22 03:49.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_6195.pt


Epoch 36/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:49.11 [info     ] FQE_20220422034807: epoch=36 step=6372 epoch=36 metrics={'time_sample_batch': 0.00017316866729219082, 'time_algorithm_update': 0.00929190344729666, 'loss': 0.012867984428704522, 'time_step': 0.009542094785614876, 'init_value': -2.725393533706665, 'ave_value': -1.6760820390404882, 'soft_opc': nan} step=6372




2022-04-22 03:49.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_6372.pt


Epoch 37/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:49.12 [info     ] FQE_20220422034807: epoch=37 step=6549 epoch=37 metrics={'time_sample_batch': 0.0001767247410143836, 'time_algorithm_update': 0.00886827808315471, 'loss': 0.013244230922878493, 'time_step': 0.009114168458065744, 'init_value': -2.7887632846832275, 'ave_value': -1.7291701239687545, 'soft_opc': nan} step=6549




2022-04-22 03:49.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_6549.pt


Epoch 38/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:49.14 [info     ] FQE_20220422034807: epoch=38 step=6726 epoch=38 metrics={'time_sample_batch': 0.00017107947398040254, 'time_algorithm_update': 0.00918255956832972, 'loss': 0.013741245300911557, 'time_step': 0.009428392022343005, 'init_value': -2.8253402709960938, 'ave_value': -1.769942646261092, 'soft_opc': nan} step=6726




2022-04-22 03:49.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_6726.pt


Epoch 39/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:49.16 [info     ] FQE_20220422034807: epoch=39 step=6903 epoch=39 metrics={'time_sample_batch': 0.00017786026000976562, 'time_algorithm_update': 0.008966282936139295, 'loss': 0.014077687929358101, 'time_step': 0.009216785430908203, 'init_value': -2.845386266708374, 'ave_value': -1.800986078641078, 'soft_opc': nan} step=6903




2022-04-22 03:49.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_6903.pt


Epoch 40/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:49.18 [info     ] FQE_20220422034807: epoch=40 step=7080 epoch=40 metrics={'time_sample_batch': 0.00017460726075253243, 'time_algorithm_update': 0.00888714951983953, 'loss': 0.014405037137340376, 'time_step': 0.009137114562557242, 'init_value': -2.854738235473633, 'ave_value': -1.7992793527361868, 'soft_opc': nan} step=7080




2022-04-22 03:49.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_7080.pt


Epoch 41/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:49.19 [info     ] FQE_20220422034807: epoch=41 step=7257 epoch=41 metrics={'time_sample_batch': 0.00017333434799970206, 'time_algorithm_update': 0.009302345372862736, 'loss': 0.014783547719609056, 'time_step': 0.009551335189302089, 'init_value': -2.905954599380493, 'ave_value': -1.8289473809458472, 'soft_opc': nan} step=7257




2022-04-22 03:49.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_7257.pt


Epoch 42/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:49.21 [info     ] FQE_20220422034807: epoch=42 step=7434 epoch=42 metrics={'time_sample_batch': 0.00016880439499677237, 'time_algorithm_update': 0.009155142778730661, 'loss': 0.015095374472623449, 'time_step': 0.009397446098974195, 'init_value': -2.9181134700775146, 'ave_value': -1.814651598318203, 'soft_opc': nan} step=7434




2022-04-22 03:49.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_7434.pt


Epoch 43/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:49.23 [info     ] FQE_20220422034807: epoch=43 step=7611 epoch=43 metrics={'time_sample_batch': 0.0001701419636354608, 'time_algorithm_update': 0.009161159817108327, 'loss': 0.015215803140650718, 'time_step': 0.009410353030188608, 'init_value': -2.947681427001953, 'ave_value': -1.8512128371376175, 'soft_opc': nan} step=7611




2022-04-22 03:49.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_7611.pt


Epoch 44/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:49.25 [info     ] FQE_20220422034807: epoch=44 step=7788 epoch=44 metrics={'time_sample_batch': 0.00017522014467056187, 'time_algorithm_update': 0.00908020526002356, 'loss': 0.015409778690460207, 'time_step': 0.009331559057289598, 'init_value': -2.9673547744750977, 'ave_value': -1.8715103446631818, 'soft_opc': nan} step=7788




2022-04-22 03:49.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_7788.pt


Epoch 45/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:49.27 [info     ] FQE_20220422034807: epoch=45 step=7965 epoch=45 metrics={'time_sample_batch': 0.00017549897317832472, 'time_algorithm_update': 0.009335998761451851, 'loss': 0.015596008464288871, 'time_step': 0.009583471858568783, 'init_value': -2.907949447631836, 'ave_value': -1.8012141880240884, 'soft_opc': nan} step=7965




2022-04-22 03:49.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_7965.pt


Epoch 46/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:49.28 [info     ] FQE_20220422034807: epoch=46 step=8142 epoch=46 metrics={'time_sample_batch': 0.00016839086672680527, 'time_algorithm_update': 0.009163635598737641, 'loss': 0.015548848899247094, 'time_step': 0.009407074437976557, 'init_value': -2.9035279750823975, 'ave_value': -1.7981024496727163, 'soft_opc': nan} step=8142




2022-04-22 03:49.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_8142.pt


Epoch 47/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:49.30 [info     ] FQE_20220422034807: epoch=47 step=8319 epoch=47 metrics={'time_sample_batch': 0.0001708477903894112, 'time_algorithm_update': 0.008947975891458113, 'loss': 0.015717793075173125, 'time_step': 0.00919336787724899, 'init_value': -2.9164583683013916, 'ave_value': -1.8124607928746097, 'soft_opc': nan} step=8319




2022-04-22 03:49.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_8319.pt


Epoch 48/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:49.32 [info     ] FQE_20220422034807: epoch=48 step=8496 epoch=48 metrics={'time_sample_batch': 0.00017268105415301133, 'time_algorithm_update': 0.008096051081425725, 'loss': 0.01539568351068676, 'time_step': 0.008350220103721833, 'init_value': -2.9085769653320312, 'ave_value': -1.7818306761281983, 'soft_opc': nan} step=8496




2022-04-22 03:49.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_8496.pt


Epoch 49/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:49.33 [info     ] FQE_20220422034807: epoch=49 step=8673 epoch=49 metrics={'time_sample_batch': 0.00017057165587689243, 'time_algorithm_update': 0.008320230548664674, 'loss': 0.01643082390751132, 'time_step': 0.008566046838706496, 'init_value': -2.9496266841888428, 'ave_value': -1.818581662271116, 'soft_opc': nan} step=8673




2022-04-22 03:49.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_8673.pt


Epoch 50/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 03:49.35 [info     ] FQE_20220422034807: epoch=50 step=8850 epoch=50 metrics={'time_sample_batch': 0.00016758401515120166, 'time_algorithm_update': 0.007131854019596078, 'loss': 0.017092780382398746, 'time_step': 0.007375880149798205, 'init_value': -3.0015857219696045, 'ave_value': -1.8642806437459436, 'soft_opc': nan} step=8850




2022-04-22 03:49.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034807/model_8850.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-22 03:49.35 [info     ] Directory is created at d3rlpy_logs/FQE_20220422034935
2022-04-22 03:49.35 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 03:49.35 [debug    ] Building models...
2022-04-22 03:49.35 [debug    ] Models have been built.
2022-04-22 03:49.35 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422034935/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/355 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 03:49.38 [info     ] FQE_20220422034935: epoch=1 step=355 epoch=1 metrics={'time_sample_batch': 0.00017380714416503906, 'time_algorithm_update': 0.0081847822162467, 'loss': 0.022771645804316225, 'time_step': 0.008435012253237442, 'init_value': -1.1578174829483032, 'ave_value': -1.1245292813007743, 'soft_opc': nan} step=355




2022-04-22 03:49.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_355.pt


Epoch 2/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:49.41 [info     ] FQE_20220422034935: epoch=2 step=710 epoch=2 metrics={'time_sample_batch': 0.0001739287040602993, 'time_algorithm_update': 0.00818437589725978, 'loss': 0.02224396272549327, 'time_step': 0.008431977285465724, 'init_value': -2.364125967025757, 'ave_value': -2.312262678560603, 'soft_opc': nan} step=710




2022-04-22 03:49.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_710.pt


Epoch 3/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:49.45 [info     ] FQE_20220422034935: epoch=3 step=1065 epoch=3 metrics={'time_sample_batch': 0.0001742047323307521, 'time_algorithm_update': 0.008404668619934942, 'loss': 0.023449456020140313, 'time_step': 0.008658025634120887, 'init_value': -3.1511104106903076, 'ave_value': -3.008598833455389, 'soft_opc': nan} step=1065




2022-04-22 03:49.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_1065.pt


Epoch 4/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:49.48 [info     ] FQE_20220422034935: epoch=4 step=1420 epoch=4 metrics={'time_sample_batch': 0.00017502341471927266, 'time_algorithm_update': 0.008181739860857037, 'loss': 0.02735031847382935, 'time_step': 0.008433581070161202, 'init_value': -4.316803932189941, 'ave_value': -4.166828207711916, 'soft_opc': nan} step=1420




2022-04-22 03:49.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_1420.pt


Epoch 5/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:49.51 [info     ] FQE_20220422034935: epoch=5 step=1775 epoch=5 metrics={'time_sample_batch': 0.00017741431652660102, 'time_algorithm_update': 0.008356971472082003, 'loss': 0.03186350924185884, 'time_step': 0.0086139611794915, 'init_value': -5.083210468292236, 'ave_value': -4.911665251172187, 'soft_opc': nan} step=1775




2022-04-22 03:49.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_1775.pt


Epoch 6/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:49.54 [info     ] FQE_20220422034935: epoch=6 step=2130 epoch=6 metrics={'time_sample_batch': 0.00017555465160960882, 'time_algorithm_update': 0.008189911909506355, 'loss': 0.04025861389252921, 'time_step': 0.008445000984299351, 'init_value': -6.057980537414551, 'ave_value': -5.928622514830286, 'soft_opc': nan} step=2130




2022-04-22 03:49.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_2130.pt


Epoch 7/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:49.58 [info     ] FQE_20220422034935: epoch=7 step=2485 epoch=7 metrics={'time_sample_batch': 0.00017452172830071248, 'time_algorithm_update': 0.008170461654663086, 'loss': 0.04543597853257203, 'time_step': 0.00842064467953964, 'init_value': -6.614217281341553, 'ave_value': -6.581649772019307, 'soft_opc': nan} step=2485




2022-04-22 03:49.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_2485.pt


Epoch 8/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:50.01 [info     ] FQE_20220422034935: epoch=8 step=2840 epoch=8 metrics={'time_sample_batch': 0.000177750117342237, 'time_algorithm_update': 0.008317905748394173, 'loss': 0.05398365351778101, 'time_step': 0.008575135217586034, 'init_value': -7.391324520111084, 'ave_value': -7.603668255695505, 'soft_opc': nan} step=2840




2022-04-22 03:50.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_2840.pt


Epoch 9/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:50.04 [info     ] FQE_20220422034935: epoch=9 step=3195 epoch=9 metrics={'time_sample_batch': 0.0001726593769771952, 'time_algorithm_update': 0.008128769968596983, 'loss': 0.062154099636409485, 'time_step': 0.008377053032458668, 'init_value': -7.679406642913818, 'ave_value': -8.224633459679715, 'soft_opc': nan} step=3195




2022-04-22 03:50.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_3195.pt


Epoch 10/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:50.07 [info     ] FQE_20220422034935: epoch=10 step=3550 epoch=10 metrics={'time_sample_batch': 0.000177680942374216, 'time_algorithm_update': 0.00851095696570168, 'loss': 0.07210818661875289, 'time_step': 0.008767019862860022, 'init_value': -7.862187385559082, 'ave_value': -8.927270494372689, 'soft_opc': nan} step=3550




2022-04-22 03:50.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_3550.pt


Epoch 11/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:50.11 [info     ] FQE_20220422034935: epoch=11 step=3905 epoch=11 metrics={'time_sample_batch': 0.00017588776601871973, 'time_algorithm_update': 0.008924519176214514, 'loss': 0.07751960407575251, 'time_step': 0.00917672036399304, 'init_value': -8.304490089416504, 'ave_value': -9.993389817554519, 'soft_opc': nan} step=3905




2022-04-22 03:50.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_3905.pt


Epoch 12/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:50.14 [info     ] FQE_20220422034935: epoch=12 step=4260 epoch=12 metrics={'time_sample_batch': 0.00018202217531875826, 'time_algorithm_update': 0.008722111876581756, 'loss': 0.08561618731317805, 'time_step': 0.008984357538357588, 'init_value': -8.074822425842285, 'ave_value': -10.489737262167372, 'soft_opc': nan} step=4260




2022-04-22 03:50.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_4260.pt


Epoch 13/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:50.18 [info     ] FQE_20220422034935: epoch=13 step=4615 epoch=13 metrics={'time_sample_batch': 0.00017724775932204557, 'time_algorithm_update': 0.008988702129310286, 'loss': 0.08907576728559716, 'time_step': 0.009244499072222643, 'init_value': -8.065949440002441, 'ave_value': -11.326216115577187, 'soft_opc': nan} step=4615




2022-04-22 03:50.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_4615.pt


Epoch 14/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:50.21 [info     ] FQE_20220422034935: epoch=14 step=4970 epoch=14 metrics={'time_sample_batch': 0.0001832491914990922, 'time_algorithm_update': 0.008712019047267, 'loss': 0.09123249898179316, 'time_step': 0.008973347301214513, 'init_value': -7.9894232749938965, 'ave_value': -11.929664795325069, 'soft_opc': nan} step=4970




2022-04-22 03:50.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_4970.pt


Epoch 15/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:50.25 [info     ] FQE_20220422034935: epoch=15 step=5325 epoch=15 metrics={'time_sample_batch': 0.00017516713746836487, 'time_algorithm_update': 0.00905407381729341, 'loss': 0.09515016173655298, 'time_step': 0.009306749827425244, 'init_value': -8.175302505493164, 'ave_value': -12.96395846599164, 'soft_opc': nan} step=5325




2022-04-22 03:50.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_5325.pt


Epoch 16/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:50.28 [info     ] FQE_20220422034935: epoch=16 step=5680 epoch=16 metrics={'time_sample_batch': 0.00017706374047507702, 'time_algorithm_update': 0.00872070487116424, 'loss': 0.09608247447496569, 'time_step': 0.008974061885350187, 'init_value': -8.091314315795898, 'ave_value': -13.456582485463475, 'soft_opc': nan} step=5680




2022-04-22 03:50.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_5680.pt


Epoch 17/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:50.31 [info     ] FQE_20220422034935: epoch=17 step=6035 epoch=17 metrics={'time_sample_batch': 0.0001739837753940636, 'time_algorithm_update': 0.008920503670061138, 'loss': 0.09883350403848248, 'time_step': 0.009171148085258377, 'init_value': -8.218107223510742, 'ave_value': -14.299762167955151, 'soft_opc': nan} step=6035




2022-04-22 03:50.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_6035.pt


Epoch 18/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:50.35 [info     ] FQE_20220422034935: epoch=18 step=6390 epoch=18 metrics={'time_sample_batch': 0.0001819536719523685, 'time_algorithm_update': 0.008909066294280576, 'loss': 0.10052540809125968, 'time_step': 0.009165592596564494, 'init_value': -8.208704948425293, 'ave_value': -15.012453936303137, 'soft_opc': nan} step=6390




2022-04-22 03:50.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_6390.pt


Epoch 19/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:50.38 [info     ] FQE_20220422034935: epoch=19 step=6745 epoch=19 metrics={'time_sample_batch': 0.00017855536769813214, 'time_algorithm_update': 0.008673542318209796, 'loss': 0.10691479746724518, 'time_step': 0.008927814725419165, 'init_value': -8.543391227722168, 'ave_value': -16.00141142068222, 'soft_opc': nan} step=6745




2022-04-22 03:50.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_6745.pt


Epoch 20/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:50.42 [info     ] FQE_20220422034935: epoch=20 step=7100 epoch=20 metrics={'time_sample_batch': 0.0001778300379363584, 'time_algorithm_update': 0.009109947043405453, 'loss': 0.11115104894770284, 'time_step': 0.009364359815355758, 'init_value': -8.634568214416504, 'ave_value': -16.54683442158742, 'soft_opc': nan} step=7100




2022-04-22 03:50.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_7100.pt


Epoch 21/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:50.45 [info     ] FQE_20220422034935: epoch=21 step=7455 epoch=21 metrics={'time_sample_batch': 0.00017680651705029985, 'time_algorithm_update': 0.00871806413355008, 'loss': 0.11914767436945523, 'time_step': 0.008974472905548526, 'init_value': -8.91274356842041, 'ave_value': -17.400501179940605, 'soft_opc': nan} step=7455




2022-04-22 03:50.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_7455.pt


Epoch 22/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:50.49 [info     ] FQE_20220422034935: epoch=22 step=7810 epoch=22 metrics={'time_sample_batch': 0.0001793646476638149, 'time_algorithm_update': 0.008543946037829762, 'loss': 0.1255213860002622, 'time_step': 0.008797127092388314, 'init_value': -9.242742538452148, 'ave_value': -18.114392385335503, 'soft_opc': nan} step=7810




2022-04-22 03:50.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_7810.pt


Epoch 23/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:50.52 [info     ] FQE_20220422034935: epoch=23 step=8165 epoch=23 metrics={'time_sample_batch': 0.0001806843448692644, 'time_algorithm_update': 0.008615261400249643, 'loss': 0.13679627330685165, 'time_step': 0.008871277285293794, 'init_value': -9.343711853027344, 'ave_value': -18.72262117233669, 'soft_opc': nan} step=8165




2022-04-22 03:50.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_8165.pt


Epoch 24/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:50.55 [info     ] FQE_20220422034935: epoch=24 step=8520 epoch=24 metrics={'time_sample_batch': 0.00017722022365516341, 'time_algorithm_update': 0.008906125350737235, 'loss': 0.14792071080596095, 'time_step': 0.009159576389151559, 'init_value': -9.859169006347656, 'ave_value': -19.641686258444913, 'soft_opc': nan} step=8520




2022-04-22 03:50.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_8520.pt


Epoch 25/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:50.59 [info     ] FQE_20220422034935: epoch=25 step=8875 epoch=25 metrics={'time_sample_batch': 0.00017519131512709068, 'time_algorithm_update': 0.008761737044428435, 'loss': 0.16035664754749185, 'time_step': 0.009014478871520137, 'init_value': -10.07055950164795, 'ave_value': -20.10474437752899, 'soft_opc': nan} step=8875




2022-04-22 03:50.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_8875.pt


Epoch 26/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:51.02 [info     ] FQE_20220422034935: epoch=26 step=9230 epoch=26 metrics={'time_sample_batch': 0.00017907317255584288, 'time_algorithm_update': 0.008728776179568869, 'loss': 0.16737492442865606, 'time_step': 0.008984534169586611, 'init_value': -10.25927448272705, 'ave_value': -20.592504061449755, 'soft_opc': nan} step=9230




2022-04-22 03:51.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_9230.pt


Epoch 27/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:51.05 [info     ] FQE_20220422034935: epoch=27 step=9585 epoch=27 metrics={'time_sample_batch': 0.00017324098398987676, 'time_algorithm_update': 0.008512961024969396, 'loss': 0.17861528386375014, 'time_step': 0.008765178331187073, 'init_value': -10.53074836730957, 'ave_value': -21.153565058284745, 'soft_opc': nan} step=9585




2022-04-22 03:51.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_9585.pt


Epoch 28/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:51.09 [info     ] FQE_20220422034935: epoch=28 step=9940 epoch=28 metrics={'time_sample_batch': 0.00018161988594162632, 'time_algorithm_update': 0.008463308844767826, 'loss': 0.18942775062324715, 'time_step': 0.008722500733926263, 'init_value': -10.943269729614258, 'ave_value': -21.8893250459563, 'soft_opc': nan} step=9940




2022-04-22 03:51.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_9940.pt


Epoch 29/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:51.12 [info     ] FQE_20220422034935: epoch=29 step=10295 epoch=29 metrics={'time_sample_batch': 0.00017094209160603267, 'time_algorithm_update': 0.008622314560581261, 'loss': 0.19848101082402217, 'time_step': 0.008869236287936358, 'init_value': -11.471036911010742, 'ave_value': -22.584410735876556, 'soft_opc': nan} step=10295




2022-04-22 03:51.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_10295.pt


Epoch 30/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:51.15 [info     ] FQE_20220422034935: epoch=30 step=10650 epoch=30 metrics={'time_sample_batch': 0.0001736479745784276, 'time_algorithm_update': 0.008304362229897942, 'loss': 0.215625282172376, 'time_step': 0.008555556015229561, 'init_value': -11.896880149841309, 'ave_value': -23.203011182836583, 'soft_opc': nan} step=10650




2022-04-22 03:51.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_10650.pt


Epoch 31/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:51.19 [info     ] FQE_20220422034935: epoch=31 step=11005 epoch=31 metrics={'time_sample_batch': 0.00017455396517901354, 'time_algorithm_update': 0.008626129929448518, 'loss': 0.2265959440061534, 'time_step': 0.008879695140140158, 'init_value': -12.382940292358398, 'ave_value': -23.967917028510588, 'soft_opc': nan} step=11005




2022-04-22 03:51.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_11005.pt


Epoch 32/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:51.22 [info     ] FQE_20220422034935: epoch=32 step=11360 epoch=32 metrics={'time_sample_batch': 0.00017941971899757922, 'time_algorithm_update': 0.008270989001636773, 'loss': 0.23552713414103213, 'time_step': 0.008526932353704749, 'init_value': -12.785055160522461, 'ave_value': -24.491786820778056, 'soft_opc': nan} step=11360




2022-04-22 03:51.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_11360.pt


Epoch 33/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:51.25 [info     ] FQE_20220422034935: epoch=33 step=11715 epoch=33 metrics={'time_sample_batch': 0.00017063315485564757, 'time_algorithm_update': 0.008590906438693193, 'loss': 0.24277449531454434, 'time_step': 0.008836387580549214, 'init_value': -12.784724235534668, 'ave_value': -24.62551093864011, 'soft_opc': nan} step=11715




2022-04-22 03:51.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_11715.pt


Epoch 34/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:51.29 [info     ] FQE_20220422034935: epoch=34 step=12070 epoch=34 metrics={'time_sample_batch': 0.00017206568113515075, 'time_algorithm_update': 0.008274764074406154, 'loss': 0.2530285397413331, 'time_step': 0.008522531348215023, 'init_value': -13.245298385620117, 'ave_value': -25.082452370377887, 'soft_opc': nan} step=12070




2022-04-22 03:51.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_12070.pt


Epoch 35/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:51.32 [info     ] FQE_20220422034935: epoch=35 step=12425 epoch=35 metrics={'time_sample_batch': 0.00017071038904324382, 'time_algorithm_update': 0.008665893447231239, 'loss': 0.2716813078386263, 'time_step': 0.008911642558138136, 'init_value': -14.046113967895508, 'ave_value': -25.95052440454908, 'soft_opc': nan} step=12425




2022-04-22 03:51.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_12425.pt


Epoch 36/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:51.35 [info     ] FQE_20220422034935: epoch=36 step=12780 epoch=36 metrics={'time_sample_batch': 0.00017589918324645136, 'time_algorithm_update': 0.008419360577220649, 'loss': 0.28208173518659363, 'time_step': 0.008675017826993701, 'init_value': -14.364416122436523, 'ave_value': -26.256021298742354, 'soft_opc': nan} step=12780




2022-04-22 03:51.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_12780.pt


Epoch 37/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:51.39 [info     ] FQE_20220422034935: epoch=37 step=13135 epoch=37 metrics={'time_sample_batch': 0.0001731227821027729, 'time_algorithm_update': 0.008661260067577094, 'loss': 0.2894297607872688, 'time_step': 0.008909118679207816, 'init_value': -14.663634300231934, 'ave_value': -26.605159796649733, 'soft_opc': nan} step=13135




2022-04-22 03:51.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_13135.pt


Epoch 38/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:51.42 [info     ] FQE_20220422034935: epoch=38 step=13490 epoch=38 metrics={'time_sample_batch': 0.00018143855350118287, 'time_algorithm_update': 0.008568589116486025, 'loss': 0.3053038780984115, 'time_step': 0.008825922012329101, 'init_value': -14.626690864562988, 'ave_value': -26.571829247290562, 'soft_opc': nan} step=13490




2022-04-22 03:51.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_13490.pt


Epoch 39/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:51.45 [info     ] FQE_20220422034935: epoch=39 step=13845 epoch=39 metrics={'time_sample_batch': 0.00017601805673518652, 'time_algorithm_update': 0.008442446211693991, 'loss': 0.31740687798026584, 'time_step': 0.008693370013169839, 'init_value': -15.152083396911621, 'ave_value': -27.008890456060954, 'soft_opc': nan} step=13845




2022-04-22 03:51.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_13845.pt


Epoch 40/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:51.49 [info     ] FQE_20220422034935: epoch=40 step=14200 epoch=40 metrics={'time_sample_batch': 0.00017627393695670115, 'time_algorithm_update': 0.008641832647189289, 'loss': 0.332155834064937, 'time_step': 0.008894716853826819, 'init_value': -15.471463203430176, 'ave_value': -27.225954796847535, 'soft_opc': nan} step=14200




2022-04-22 03:51.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_14200.pt


Epoch 41/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:51.52 [info     ] FQE_20220422034935: epoch=41 step=14555 epoch=41 metrics={'time_sample_batch': 0.00017128057882819376, 'time_algorithm_update': 0.008267230047306545, 'loss': 0.34586125465346057, 'time_step': 0.00851353860237229, 'init_value': -16.240161895751953, 'ave_value': -27.952919219666313, 'soft_opc': nan} step=14555




2022-04-22 03:51.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_14555.pt


Epoch 42/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:51.55 [info     ] FQE_20220422034935: epoch=42 step=14910 epoch=42 metrics={'time_sample_batch': 0.00017280242812465615, 'time_algorithm_update': 0.008550796374468737, 'loss': 0.35818623599766847, 'time_step': 0.0088012480399978, 'init_value': -16.395973205566406, 'ave_value': -27.921158653965634, 'soft_opc': nan} step=14910




2022-04-22 03:51.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_14910.pt


Epoch 43/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:51.58 [info     ] FQE_20220422034935: epoch=43 step=15265 epoch=43 metrics={'time_sample_batch': 0.00017361305129360145, 'time_algorithm_update': 0.008423335787276148, 'loss': 0.37761698455457954, 'time_step': 0.008675160878141162, 'init_value': -16.813074111938477, 'ave_value': -28.220011240742537, 'soft_opc': nan} step=15265




2022-04-22 03:51.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_15265.pt


Epoch 44/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:52.02 [info     ] FQE_20220422034935: epoch=44 step=15620 epoch=44 metrics={'time_sample_batch': 0.00018868513510260783, 'time_algorithm_update': 0.0085363744010388, 'loss': 0.38435077569119525, 'time_step': 0.008802919656458035, 'init_value': -17.768497467041016, 'ave_value': -29.066133956620746, 'soft_opc': nan} step=15620




2022-04-22 03:52.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_15620.pt


Epoch 45/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:52.05 [info     ] FQE_20220422034935: epoch=45 step=15975 epoch=45 metrics={'time_sample_batch': 0.0001731778534365372, 'time_algorithm_update': 0.00843502702847333, 'loss': 0.39291994841113476, 'time_step': 0.008684651280792667, 'init_value': -17.770423889160156, 'ave_value': -29.031453711062937, 'soft_opc': nan} step=15975




2022-04-22 03:52.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_15975.pt


Epoch 46/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:52.08 [info     ] FQE_20220422034935: epoch=46 step=16330 epoch=46 metrics={'time_sample_batch': 0.00017460903651277785, 'time_algorithm_update': 0.00851647954591563, 'loss': 0.40568428360660314, 'time_step': 0.008769653212856238, 'init_value': -18.07345199584961, 'ave_value': -29.33937771667078, 'soft_opc': nan} step=16330




2022-04-22 03:52.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_16330.pt


Epoch 47/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:52.12 [info     ] FQE_20220422034935: epoch=47 step=16685 epoch=47 metrics={'time_sample_batch': 0.00017094209160603267, 'time_algorithm_update': 0.008462801113934585, 'loss': 0.4080585926797398, 'time_step': 0.00870818621675733, 'init_value': -18.037370681762695, 'ave_value': -29.190952947486476, 'soft_opc': nan} step=16685




2022-04-22 03:52.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_16685.pt


Epoch 48/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:52.15 [info     ] FQE_20220422034935: epoch=48 step=17040 epoch=48 metrics={'time_sample_batch': 0.00017767221155300946, 'time_algorithm_update': 0.008512723949593558, 'loss': 0.39706092893447675, 'time_step': 0.008765011773982518, 'init_value': -18.497652053833008, 'ave_value': -29.430564135995763, 'soft_opc': nan} step=17040




2022-04-22 03:52.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_17040.pt


Epoch 49/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:52.18 [info     ] FQE_20220422034935: epoch=49 step=17395 epoch=49 metrics={'time_sample_batch': 0.00017191121275995819, 'time_algorithm_update': 0.008532853865287673, 'loss': 0.4170547129953621, 'time_step': 0.008783304187613474, 'init_value': -18.600784301757812, 'ave_value': -29.446151659832346, 'soft_opc': nan} step=17395




2022-04-22 03:52.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_17395.pt


Epoch 50/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 03:52.22 [info     ] FQE_20220422034935: epoch=50 step=17750 epoch=50 metrics={'time_sample_batch': 0.00018637684029592595, 'time_algorithm_update': 0.00823192126314405, 'loss': 0.4184667576970139, 'time_step': 0.008494992994926346, 'init_value': -18.557832717895508, 'ave_value': -29.295508358438656, 'soft_opc': nan} step=17750




2022-04-22 03:52.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422034935/model_17750.pt
search iteration:  17
using hyper params:  [0.008807960479928626, 0.006935999022483508, 7.263444950832548e-05, 1]
2022-04-22 03:52.22 [debug    ] RoundIterator is selected.
2022-04-22 03:52.22 [info     ] Directory is created at d3rlpy_logs/CQL_20220422035222
2022-04-22 03:52.22 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 03:52.22 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 03:52.22 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422035222/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.008807960479928626, 'actor_optim_factory': {'optim

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:52.43 [info     ] CQL_20220422035222: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.000341525656639496, 'time_algorithm_update': 0.05773258484856931, 'temp_loss': 4.93979410008888, 'temp': 0.9872440437360995, 'alpha_loss': -17.693356745504918, 'alpha': 1.0176954393441966, 'critic_loss': 25.83045746136263, 'actor_loss': -1.8999199845715065, 'time_step': 0.05816813769368078, 'td_error': 1.2202687793100606, 'init_value': 0.13904054462909698, 'ave_value': 0.19484001797075914} step=346
2022-04-22 03:52.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:53.04 [info     ] CQL_20220422035222: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.00034786993368512634, 'time_algorithm_update': 0.058529272244844825, 'temp_loss': 4.859195649968407, 'temp': 0.9628003401563346, 'alpha_loss': -18.34954235870714, 'alpha': 1.0541308825415683, 'critic_loss': 31.07860565185547, 'actor_loss': -1.7354834813603086, 'time_step': 0.05897942926153282, 'td_error': 1.2104287856025568, 'init_value': 0.1860874593257904, 'ave_value': 0.42810534950135554} step=692
2022-04-22 03:53.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:53.26 [info     ] CQL_20220422035222: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.00036734514842832707, 'time_algorithm_update': 0.05949110860769459, 'temp_loss': 4.7409129969646475, 'temp': 0.9393337399973346, 'alpha_loss': -19.023628394727762, 'alpha': 1.0924146444122227, 'critic_loss': 41.29911865939984, 'actor_loss': -1.3648524434235745, 'time_step': 0.05996027985060146, 'td_error': 1.1928859468965571, 'init_value': -0.13453148305416107, 'ave_value': 0.20319147807421872} step=1038
2022-04-22 03:53.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:53.48 [info     ] CQL_20220422035222: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.0003548495342276689, 'time_algorithm_update': 0.06272353326654159, 'temp_loss': 4.627178156306978, 'temp': 0.9166684400484052, 'alpha_loss': -19.72312877632979, 'alpha': 1.1325888172050431, 'critic_loss': 54.14844810618141, 'actor_loss': -0.88779970150807, 'time_step': 0.06318310092639372, 'td_error': 1.2065156610568333, 'init_value': -0.7685523629188538, 'ave_value': -0.3124264167864609} step=1384
2022-04-22 03:53.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:54.11 [info     ] CQL_20220422035222: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.00036028492657435424, 'time_algorithm_update': 0.0626456799534704, 'temp_loss': 4.518429360637775, 'temp': 0.8947264376403279, 'alpha_loss': -20.462773052943234, 'alpha': 1.1747020913686366, 'critic_loss': 68.9445817649709, 'actor_loss': -0.4288242920113437, 'time_step': 0.06310780820129924, 'td_error': 1.2037434335751571, 'init_value': -1.0098607540130615, 'ave_value': -0.4987297609618005} step=1730
2022-04-22 03:54.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:54.34 [info     ] CQL_20220422035222: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.0003463277927023827, 'time_algorithm_update': 0.06308543682098389, 'temp_loss': 4.411434126727154, 'temp': 0.8734510015545552, 'alpha_loss': -21.22699686281943, 'alpha': 1.2187872701297606, 'critic_loss': 86.6615019120233, 'actor_loss': -0.02915470287031521, 'time_step': 0.06353661021745274, 'td_error': 1.215526493616672, 'init_value': -1.583669662475586, 'ave_value': -1.0406546083161932} step=2076
2022-04-22 03:54.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:54.57 [info     ] CQL_20220422035222: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.0003658746708335215, 'time_algorithm_update': 0.06283311761183545, 'temp_loss': 4.3060730289172575, 'temp': 0.8528017465434322, 'alpha_loss': -22.027458438983544, 'alpha': 1.2648676072241942, 'critic_loss': 108.30604815620907, 'actor_loss': 0.2526083053601098, 'time_step': 0.06329815167223098, 'td_error': 1.2094342063920152, 'init_value': -1.5810816287994385, 'ave_value': -1.074704703680143} step=2422
2022-04-22 03:54.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:55.20 [info     ] CQL_20220422035222: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.00035296561401014384, 'time_algorithm_update': 0.06284394843040864, 'temp_loss': 4.204978890501695, 'temp': 0.8327338290696888, 'alpha_loss': -22.866815137036273, 'alpha': 1.3129957828218537, 'critic_loss': 138.33558941438707, 'actor_loss': 0.19009953529460927, 'time_step': 0.0632989110285147, 'td_error': 1.208414511928704, 'init_value': -1.0962449312210083, 'ave_value': -0.6908627063332716} step=2768
2022-04-22 03:55.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:55.43 [info     ] CQL_20220422035222: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.0003968187839309604, 'time_algorithm_update': 0.06287676819487115, 'temp_loss': 4.107366218732271, 'temp': 0.8132078065348498, 'alpha_loss': -23.722362463184865, 'alpha': 1.3631920507877548, 'critic_loss': 185.23380380972273, 'actor_loss': -0.38678301629109707, 'time_step': 0.0633799761016934, 'td_error': 1.2057718514644231, 'init_value': -0.5882652401924133, 'ave_value': -0.2839486255160775} step=3114
2022-04-22 03:55.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:56.03 [info     ] CQL_20220422035222: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.00035749487794203567, 'time_algorithm_update': 0.05620438928549, 'temp_loss': 4.010290295402439, 'temp': 0.7941989176879728, 'alpha_loss': -24.628611476435136, 'alpha': 1.4155032893136748, 'critic_loss': 249.41484330017443, 'actor_loss': -1.1756425153657881, 'time_step': 0.05666274211310238, 'td_error': 1.2154048499037895, 'init_value': 0.11539731174707413, 'ave_value': 0.3106856848007698} step=3460
2022-04-22 03:56.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:56.23 [info     ] CQL_20220422035222: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.00034980897958568065, 'time_algorithm_update': 0.05632972717285156, 'temp_loss': 3.9173427358528095, 'temp': 0.775680000210084, 'alpha_loss': -25.566638880382385, 'alpha': 1.4699964151217069, 'critic_loss': 318.3589008309249, 'actor_loss': -1.8799356215262, 'time_step': 0.05678345151030259, 'td_error': 1.2229232550220939, 'init_value': 0.7080075740814209, 'ave_value': 0.8165484355517958} step=3806
2022-04-22 03:56.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:56.44 [info     ] CQL_20220422035222: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.0003679329260236266, 'time_algorithm_update': 0.055779698956219446, 'temp_loss': 3.8260610076044337, 'temp': 0.7576287294054307, 'alpha_loss': -26.552201011966417, 'alpha': 1.5267304430118185, 'critic_loss': 384.0623844565684, 'actor_loss': -2.4974681070085207, 'time_step': 0.05624851532754181, 'td_error': 1.228639802536656, 'init_value': 1.305728554725647, 'ave_value': 1.3609181519762041} step=4152
2022-04-22 03:56.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:57.03 [info     ] CQL_20220422035222: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.00034358804625582835, 'time_algorithm_update': 0.054155914080625325, 'temp_loss': 3.7363394795125617, 'temp': 0.74002405001938, 'alpha_loss': -27.57833662198458, 'alpha': 1.5857779538700347, 'critic_loss': 450.06775951936754, 'actor_loss': -3.0982932520739603, 'time_step': 0.05460307639458276, 'td_error': 1.231074958942549, 'init_value': 1.8959362506866455, 'ave_value': 1.930703426701811} step=4498
2022-04-22 03:57.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:57.23 [info     ] CQL_20220422035222: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.00034726906374010736, 'time_algorithm_update': 0.054058101824942355, 'temp_loss': 3.651712124747348, 'temp': 0.7228478661851386, 'alpha_loss': -28.646955980730883, 'alpha': 1.6472074103493222, 'critic_loss': 524.6714908842406, 'actor_loss': -3.6238638534711276, 'time_step': 0.05450688896840707, 'td_error': 1.232849170784892, 'init_value': 2.422262191772461, 'ave_value': 2.4493321442841465} step=4844
2022-04-22 03:57.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:57.43 [info     ] CQL_20220422035222: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.000368100370285828, 'time_algorithm_update': 0.054770405581920825, 'temp_loss': 3.5662837889842214, 'temp': 0.7060819245831815, 'alpha_loss': -29.75454092852642, 'alpha': 1.7110942681400763, 'critic_loss': 611.0688688245123, 'actor_loss': -4.097936788046291, 'time_step': 0.05524583427892255, 'td_error': 1.2351119779107065, 'init_value': 3.0040552616119385, 'ave_value': 3.0116778672730655} step=5190
2022-04-22 03:57.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:58.03 [info     ] CQL_20220422035222: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.00035779324570143155, 'time_algorithm_update': 0.055163322156564346, 'temp_loss': 3.483964208233563, 'temp': 0.6897198695667907, 'alpha_loss': -30.909805000172874, 'alpha': 1.7775179144274982, 'critic_loss': 706.7400239271925, 'actor_loss': -4.584117970714679, 'time_step': 0.05562533119510364, 'td_error': 1.2363564513877212, 'init_value': 3.495189905166626, 'ave_value': 3.5035937790172027} step=5536
2022-04-22 03:58.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:58.23 [info     ] CQL_20220422035222: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.0003438168178403998, 'time_algorithm_update': 0.05402983337468495, 'temp_loss': 3.4026970491243924, 'temp': 0.67374788732887, 'alpha_loss': -32.112095634372245, 'alpha': 1.8465738355079828, 'critic_loss': 821.3064120077673, 'actor_loss': -4.99936238051839, 'time_step': 0.05447696743672983, 'td_error': 1.236280315540379, 'init_value': 3.8355014324188232, 'ave_value': 3.850503150859451} step=5882
2022-04-22 03:58.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:58.42 [info     ] CQL_20220422035222: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.0003562373232979306, 'time_algorithm_update': 0.05401412117687953, 'temp_loss': 3.32360597497466, 'temp': 0.6581538834668308, 'alpha_loss': -33.35913110192801, 'alpha': 1.9183456067404996, 'critic_loss': 948.1507748289605, 'actor_loss': -5.330930957904441, 'time_step': 0.05447405128809758, 'td_error': 1.2376559009628623, 'init_value': 4.1720757484436035, 'ave_value': 4.178896697769237} step=6228
2022-04-22 03:58.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:59.02 [info     ] CQL_20220422035222: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.0003659718298498606, 'time_algorithm_update': 0.05431380506195774, 'temp_loss': 3.2475547687166686, 'temp': 0.642923504803222, 'alpha_loss': -34.65993666235422, 'alpha': 1.9929372726837336, 'critic_loss': 1086.3701921584288, 'actor_loss': -5.59263403842904, 'time_step': 0.054779588142571424, 'td_error': 1.2403103953263077, 'init_value': 4.541250705718994, 'ave_value': 4.544896126743062} step=6574
2022-04-22 03:59.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:59.22 [info     ] CQL_20220422035222: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.0003564378429699495, 'time_algorithm_update': 0.05422238738550616, 'temp_loss': 3.1718675821502775, 'temp': 0.6280504229096319, 'alpha_loss': -36.00111412312943, 'alpha': 2.070449547271508, 'critic_loss': 1250.787640345579, 'actor_loss': -5.86139568014641, 'time_step': 0.054677898484158376, 'td_error': 1.239466981758123, 'init_value': 4.7286696434021, 'ave_value': 4.739719020817279} step=6920
2022-04-22 03:59.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 03:59.41 [info     ] CQL_20220422035222: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.00036342984679117366, 'time_algorithm_update': 0.054143767825440864, 'temp_loss': 3.0985537016322846, 'temp': 0.613524573386749, 'alpha_loss': -37.40670706908827, 'alpha': 2.150996226795836, 'critic_loss': 1421.6816275712383, 'actor_loss': -6.076869334788681, 'time_step': 0.05460686559621998, 'td_error': 1.242091850763361, 'init_value': 5.055273056030273, 'ave_value': 5.060324525516595} step=7266
2022-04-22 03:59.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:00.01 [info     ] CQL_20220422035222: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.0003386722134716938, 'time_algorithm_update': 0.053819955428900744, 'temp_loss': 3.027062207977207, 'temp': 0.5993369740213272, 'alpha_loss': -38.856646620469284, 'alpha': 2.2346865538227765, 'critic_loss': 1597.0063942264271, 'actor_loss': -6.287427802995451, 'time_step': 0.05425525331772821, 'td_error': 1.2411772291708074, 'init_value': 5.095413684844971, 'ave_value': 5.10403654167244} step=7612
2022-04-22 04:00.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:00.20 [info     ] CQL_20220422035222: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.00035998035717561757, 'time_algorithm_update': 0.05321178546530663, 'temp_loss': 2.956315508467614, 'temp': 0.5854790415722511, 'alpha_loss': -40.36640106598077, 'alpha': 2.321638389129859, 'critic_loss': 1805.2051927864206, 'actor_loss': -6.348714860188479, 'time_step': 0.053667792006035074, 'td_error': 1.2436487749837, 'init_value': 5.351158142089844, 'ave_value': 5.355793016767434} step=7958
2022-04-22 04:00.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:00.40 [info     ] CQL_20220422035222: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.00033708390472941314, 'time_algorithm_update': 0.05349661780230572, 'temp_loss': 2.8880696861730146, 'temp': 0.5719429345833773, 'alpha_loss': -41.93988954400741, 'alpha': 2.4119748779804033, 'critic_loss': 2003.0268794594472, 'actor_loss': -6.5746290242740875, 'time_step': 0.05393209760588718, 'td_error': 1.245608586781469, 'init_value': 5.5956339836120605, 'ave_value': 5.597437732781545} step=8304
2022-04-22 04:00.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:01.00 [info     ] CQL_20220422035222: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.0003539013724795656, 'time_algorithm_update': 0.055026570496531584, 'temp_loss': 2.8213434605240133, 'temp': 0.5587214979133165, 'alpha_loss': -43.5716429252845, 'alpha': 2.5058501945065625, 'critic_loss': 2221.435355655031, 'actor_loss': -6.751998317034947, 'time_step': 0.055480303791906105, 'td_error': 1.24472989641213, 'init_value': 5.675100326538086, 'ave_value': 5.682522372421691} step=8650
2022-04-22 04:01.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:01.20 [info     ] CQL_20220422035222: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.0003543031008946413, 'time_algorithm_update': 0.05611112627679902, 'temp_loss': 2.7566027069367425, 'temp': 0.54580487302273, 'alpha_loss': -45.272218075790846, 'alpha': 2.6033793529334095, 'critic_loss': 2440.44397113778, 'actor_loss': -6.873800470649852, 'time_step': 0.056564759656873055, 'td_error': 1.2475697520209923, 'init_value': 5.900946617126465, 'ave_value': 5.903068991756484} step=8996
2022-04-22 04:01.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:01.40 [info     ] CQL_20220422035222: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.0003672659052589725, 'time_algorithm_update': 0.056223335293676124, 'temp_loss': 2.6929381451854817, 'temp': 0.5331858937450916, 'alpha_loss': -47.032362711911944, 'alpha': 2.7047101762253427, 'critic_loss': 2728.2602722520774, 'actor_loss': -6.931053259469181, 'time_step': 0.05668857469724093, 'td_error': 1.249121769556067, 'init_value': 5.944683074951172, 'ave_value': 5.943175398427313} step=9342
2022-04-22 04:01.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:02.01 [info     ] CQL_20220422035222: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.00037333179760530507, 'time_algorithm_update': 0.0560753655571469, 'temp_loss': 2.6305798505772056, 'temp': 0.5208584065037656, 'alpha_loss': -48.861629541209666, 'alpha': 2.809998656283913, 'critic_loss': 3054.362897398844, 'actor_loss': -6.8245041356610425, 'time_step': 0.05654818267491511, 'td_error': 1.2438951069002055, 'init_value': 5.624977111816406, 'ave_value': 5.633520871013643} step=9688
2022-04-22 04:02.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:02.21 [info     ] CQL_20220422035222: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.0003470271998058165, 'time_algorithm_update': 0.0558655386026195, 'temp_loss': 2.569275720271072, 'temp': 0.5088194851585895, 'alpha_loss': -50.76730026950726, 'alpha': 2.91938970130303, 'critic_loss': 3318.9499645784413, 'actor_loss': -6.6895172003376695, 'time_step': 0.056309261763026947, 'td_error': 1.243848707159489, 'init_value': 5.53766393661499, 'ave_value': 5.542920925019193} step=10034
2022-04-22 04:02.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:02.41 [info     ] CQL_20220422035222: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.00033861915500177814, 'time_algorithm_update': 0.05582702159881592, 'temp_loss': 2.510380547170694, 'temp': 0.49705722066708385, 'alpha_loss': -52.74517834393275, 'alpha': 3.033041167121402, 'critic_loss': 3611.8423543059066, 'actor_loss': -6.614031341034553, 'time_step': 0.05626610179857022, 'td_error': 1.2482102684186596, 'init_value': 5.755995750427246, 'ave_value': 5.754311064850837} step=10380
2022-04-22 04:02.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:03.02 [info     ] CQL_20220422035222: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.0003434653916110882, 'time_algorithm_update': 0.057170393839047824, 'temp_loss': 2.4523585145873144, 'temp': 0.4855655731493338, 'alpha_loss': -54.79156166694068, 'alpha': 3.151114202648229, 'critic_loss': 3740.62268207528, 'actor_loss': -6.64952997802999, 'time_step': 0.057614305115848606, 'td_error': 1.2504166474205343, 'init_value': 5.894497871398926, 'ave_value': 5.886947424505603} step=10726
2022-04-22 04:03.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:03.23 [info     ] CQL_20220422035222: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.00036526622110708603, 'time_algorithm_update': 0.05675924444474237, 'temp_loss': 2.3953120839389075, 'temp': 0.47434216255397466, 'alpha_loss': -56.92863376154376, 'alpha': 3.27378206032549, 'critic_loss': 3523.1049353097906, 'actor_loss': -6.725976264545683, 'time_step': 0.05722548575759623, 'td_error': 1.2481593477073774, 'init_value': 5.822338581085205, 'ave_value': 5.821883015612281} step=11072
2022-04-22 04:03.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:03.43 [info     ] CQL_20220422035222: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.00035356166045789775, 'time_algorithm_update': 0.056831942817379286, 'temp_loss': 2.3403509338467106, 'temp': 0.4633768954545776, 'alpha_loss': -59.13992213651624, 'alpha': 3.4012236712295887, 'critic_loss': 3208.2811117006863, 'actor_loss': -6.8111591256422805, 'time_step': 0.05728670971931061, 'td_error': 1.2501258344294877, 'init_value': 5.972747325897217, 'ave_value': 5.968062031059935} step=11418
2022-04-22 04:03.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:04.04 [info     ] CQL_20220422035222: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.0003399711123780708, 'time_algorithm_update': 0.05617224205436045, 'temp_loss': 2.2863876089195294, 'temp': 0.4526650679077027, 'alpha_loss': -61.44848453102773, 'alpha': 3.5336365851363696, 'critic_loss': 3061.450788023844, 'actor_loss': -6.828952880264017, 'time_step': 0.05661095980274884, 'td_error': 1.2492270291216643, 'init_value': 5.988162040710449, 'ave_value': 5.987469748401597} step=11764
2022-04-22 04:04.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:04.24 [info     ] CQL_20220422035222: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.00035305726045817997, 'time_algorithm_update': 0.05634435477284338, 'temp_loss': 2.23260026653378, 'temp': 0.4422019454785165, 'alpha_loss': -63.8376162755007, 'alpha': 3.671196676403112, 'critic_loss': 2721.938360842666, 'actor_loss': -7.050614562337798, 'time_step': 0.05679754094581384, 'td_error': 1.2518075511626083, 'init_value': 6.289805889129639, 'ave_value': 6.28902177247689} step=12110
2022-04-22 04:04.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:04.44 [info     ] CQL_20220422035222: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.00034041763041060786, 'time_algorithm_update': 0.05616765835381657, 'temp_loss': 2.181456893854748, 'temp': 0.4319810954304789, 'alpha_loss': -66.31458297905894, 'alpha': 3.81409781585539, 'critic_loss': 2500.873464595376, 'actor_loss': -7.14167247066608, 'time_step': 0.05660876097706701, 'td_error': 1.2532079672088592, 'init_value': 6.403402805328369, 'ave_value': 6.397339863209659} step=12456
2022-04-22 04:04.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:05.05 [info     ] CQL_20220422035222: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.00035692501619372064, 'time_algorithm_update': 0.05583425753378455, 'temp_loss': 2.1307879920639743, 'temp': 0.4219967117371587, 'alpha_loss': -68.90745319520806, 'alpha': 3.962572461607828, 'critic_loss': 2441.6290452549224, 'actor_loss': -7.229523949540419, 'time_step': 0.056286474183804724, 'td_error': 1.2519632968211745, 'init_value': 6.457136631011963, 'ave_value': 6.458516632634466} step=12802
2022-04-22 04:05.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:05.24 [info     ] CQL_20220422035222: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.00034052374735043915, 'time_algorithm_update': 0.05313571133365521, 'temp_loss': 2.0819019458197445, 'temp': 0.41224202117478914, 'alpha_loss': -71.58200807516286, 'alpha': 4.1168325195422755, 'critic_loss': 2270.8223495924403, 'actor_loss': -7.4384847045633835, 'time_step': 0.053577011031222484, 'td_error': 1.2546220201873053, 'init_value': 6.741097927093506, 'ave_value': 6.739641506652688} step=13148
2022-04-22 04:05.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:05.43 [info     ] CQL_20220422035222: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.00035643370854372236, 'time_algorithm_update': 0.05232657586908065, 'temp_loss': 2.0337540338494184, 'temp': 0.40271342403626853, 'alpha_loss': -74.36666426906696, 'alpha': 4.277085251890855, 'critic_loss': 1979.224400514812, 'actor_loss': -7.708707670255892, 'time_step': 0.052782519704344645, 'td_error': 1.2562472835552243, 'init_value': 6.993800163269043, 'ave_value': 6.9938885900860095} step=13494
2022-04-22 04:05.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:06.02 [info     ] CQL_20220422035222: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.00034595362712882156, 'time_algorithm_update': 0.052441134618196876, 'temp_loss': 1.98673870970059, 'temp': 0.39340325888526234, 'alpha_loss': -77.27065788665948, 'alpha': 4.443592983863257, 'critic_loss': 1748.2615673969246, 'actor_loss': -7.97017984169756, 'time_step': 0.05288709450319323, 'td_error': 1.2593239077854423, 'init_value': 7.3020501136779785, 'ave_value': 7.300149024047236} step=13840
2022-04-22 04:06.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:06.21 [info     ] CQL_20220422035222: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.000337264441341334, 'time_algorithm_update': 0.05231683516088938, 'temp_loss': 1.9407999722254758, 'temp': 0.3843094417986842, 'alpha_loss': -80.27227183435694, 'alpha': 4.61658024925717, 'critic_loss': 1827.2825557289784, 'actor_loss': -8.000866840340498, 'time_step': 0.05274997212294209, 'td_error': 1.2590600162852834, 'init_value': 7.312525749206543, 'ave_value': 7.314631900972753} step=14186
2022-04-22 04:06.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:06.40 [info     ] CQL_20220422035222: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.0003384689375155234, 'time_algorithm_update': 0.052387772267953506, 'temp_loss': 1.8958785678609946, 'temp': 0.3754252061850763, 'alpha_loss': -83.40374758064402, 'alpha': 4.796303177155512, 'critic_loss': 1914.6882278354183, 'actor_loss': -8.155457606894432, 'time_step': 0.05282225567481421, 'td_error': 1.260503573438798, 'init_value': 7.507181167602539, 'ave_value': 7.5083434625167085} step=14532
2022-04-22 04:06.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:06.59 [info     ] CQL_20220422035222: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.0003390828998102618, 'time_algorithm_update': 0.05173970509126696, 'temp_loss': 1.8524242763574412, 'temp': 0.36674695492135306, 'alpha_loss': -86.64579351789, 'alpha': 4.983015837696936, 'critic_loss': 2005.866276559113, 'actor_loss': -8.305893534180745, 'time_step': 0.05217489755222563, 'td_error': 1.261680991647245, 'init_value': 7.67826509475708, 'ave_value': 7.680655023553124} step=14878
2022-04-22 04:06.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:07.17 [info     ] CQL_20220422035222: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.0003301325560994231, 'time_algorithm_update': 0.05031194783359594, 'temp_loss': 1.809309181794955, 'temp': 0.3582689927492528, 'alpha_loss': -90.01598091345991, 'alpha': 5.17700586980478, 'critic_loss': 1960.1049871720331, 'actor_loss': -8.539210154141994, 'time_step': 0.050741974328983724, 'td_error': 1.2637870152894481, 'init_value': 7.843043327331543, 'ave_value': 7.843924292708642} step=15224
2022-04-22 04:07.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:07.36 [info     ] CQL_20220422035222: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.00034559806647328284, 'time_algorithm_update': 0.05076425888634831, 'temp_loss': 1.7673338203760929, 'temp': 0.3499872041920017, 'alpha_loss': -93.51856924068032, 'alpha': 5.378544001220968, 'critic_loss': 2055.1639573642974, 'actor_loss': -8.696404517730537, 'time_step': 0.05120847335440575, 'td_error': 1.2695801335628303, 'init_value': 8.20556354522705, 'ave_value': 8.204125915109518} step=15570
2022-04-22 04:07.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:07.54 [info     ] CQL_20220422035222: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.00034074356101151834, 'time_algorithm_update': 0.05105874304137478, 'temp_loss': 1.7264567403435018, 'temp': 0.3418982248602575, 'alpha_loss': -97.16425843872776, 'alpha': 5.587919457110367, 'critic_loss': 1964.2867442224756, 'actor_loss': -8.937464578992369, 'time_step': 0.051502969223639866, 'td_error': 1.2686039686780333, 'init_value': 8.332902908325195, 'ave_value': 8.334900692098227} step=15916
2022-04-22 04:07.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:08.12 [info     ] CQL_20220422035222: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.0003360564998119553, 'time_algorithm_update': 0.05034447887729358, 'temp_loss': 1.686557947900254, 'temp': 0.3339957884281357, 'alpha_loss': -100.95346508136375, 'alpha': 5.805468205082623, 'critic_loss': 2031.2186582708634, 'actor_loss': -9.026313478547024, 'time_step': 0.05077848889235127, 'td_error': 1.2708012871200138, 'init_value': 8.493465423583984, 'ave_value': 8.491273656847806} step=16262
2022-04-22 04:08.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:08.31 [info     ] CQL_20220422035222: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.0003371169801392307, 'time_algorithm_update': 0.050833060562266094, 'temp_loss': 1.6476158389466347, 'temp': 0.3262750548606663, 'alpha_loss': -104.89440013907549, 'alpha': 6.0314879128009595, 'critic_loss': 2139.2157557493, 'actor_loss': -9.145545953960088, 'time_step': 0.051265939122679606, 'td_error': 1.2731743543669864, 'init_value': 8.630212783813477, 'ave_value': 8.63037986520231} step=16608
2022-04-22 04:08.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:08.50 [info     ] CQL_20220422035222: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.0003520270992565706, 'time_algorithm_update': 0.05395389292281487, 'temp_loss': 1.60953171094718, 'temp': 0.31873310241051495, 'alpha_loss': -108.95484525206462, 'alpha': 6.266295635631319, 'critic_loss': 2143.740765345579, 'actor_loss': -9.358230858179875, 'time_step': 0.0544047893127265, 'td_error': 1.2745564554959556, 'init_value': 8.823769569396973, 'ave_value': 8.82815700786947} step=16954
2022-04-22 04:08.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:09.10 [info     ] CQL_20220422035222: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.00037485257738587485, 'time_algorithm_update': 0.053770095626742856, 'temp_loss': 1.5725253338069585, 'temp': 0.3113649151573291, 'alpha_loss': -113.19879216541445, 'alpha': 6.510230096089358, 'critic_loss': 2187.734464259506, 'actor_loss': -9.46376411901044, 'time_step': 0.05424338958166927, 'td_error': 1.275713811342974, 'init_value': 8.912424087524414, 'ave_value': 8.913662340246484} step=17300
2022-04-22 04:09.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422035222/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.519100

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 04:09.11 [info     ] FQE_20220422040910: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00017508253993758237, 'time_algorithm_update': 0.006955537451318948, 'loss': 0.007375357882961272, 'time_step': 0.007210445691304034, 'init_value': -0.19812482595443726, 'ave_value': -0.14740226051124097, 'soft_opc': nan} step=166




2022-04-22 04:09.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.13 [info     ] FQE_20220422040910: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00016763554998191007, 'time_algorithm_update': 0.007096098130007824, 'loss': 0.004501609395381557, 'time_step': 0.007334644535937941, 'init_value': -0.2463313639163971, 'ave_value': -0.17761980303991082, 'soft_opc': nan} step=332




2022-04-22 04:09.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.14 [info     ] FQE_20220422040910: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00017150051622505648, 'time_algorithm_update': 0.007013509072453143, 'loss': 0.0036989185364400766, 'time_step': 0.007260540881788874, 'init_value': -0.2513432800769806, 'ave_value': -0.18647062034673384, 'soft_opc': nan} step=498




2022-04-22 04:09.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.15 [info     ] FQE_20220422040910: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00017153785889407238, 'time_algorithm_update': 0.007094205143940018, 'loss': 0.003338276895312929, 'time_step': 0.007339822240622647, 'init_value': -0.29621684551239014, 'ave_value': -0.22950742510860567, 'soft_opc': nan} step=664




2022-04-22 04:09.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.17 [info     ] FQE_20220422040910: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00017143875719553017, 'time_algorithm_update': 0.006981685937169087, 'loss': 0.003021927415084722, 'time_step': 0.007227496928479298, 'init_value': -0.3037787675857544, 'ave_value': -0.24911082329580914, 'soft_opc': nan} step=830




2022-04-22 04:09.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.18 [info     ] FQE_20220422040910: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00017561395484280874, 'time_algorithm_update': 0.007109109177646867, 'loss': 0.0026361277279253288, 'time_step': 0.007359204522098403, 'init_value': -0.3152815103530884, 'ave_value': -0.26570408651689154, 'soft_opc': nan} step=996




2022-04-22 04:09.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.19 [info     ] FQE_20220422040910: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.000172438391719956, 'time_algorithm_update': 0.00633425166807979, 'loss': 0.002359063219396869, 'time_step': 0.006576308284897402, 'init_value': -0.32975488901138306, 'ave_value': -0.29736179651961, 'soft_opc': nan} step=1162




2022-04-22 04:09.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.21 [info     ] FQE_20220422040910: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00018450438258159593, 'time_algorithm_update': 0.007138579724782921, 'loss': 0.002049934766283371, 'time_step': 0.007394282214612846, 'init_value': -0.3490125834941864, 'ave_value': -0.34699214506466447, 'soft_opc': nan} step=1328




2022-04-22 04:09.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.22 [info     ] FQE_20220422040910: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00017536835498120412, 'time_algorithm_update': 0.007020270968058023, 'loss': 0.0019330963448913072, 'time_step': 0.007267697747931423, 'init_value': -0.36436980962753296, 'ave_value': -0.36827007427618647, 'soft_opc': nan} step=1494




2022-04-22 04:09.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.23 [info     ] FQE_20220422040910: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.0001733044543898249, 'time_algorithm_update': 0.00693779681102339, 'loss': 0.0018943494542765554, 'time_step': 0.007187949605734952, 'init_value': -0.35492250323295593, 'ave_value': -0.3788091399077628, 'soft_opc': nan} step=1660




2022-04-22 04:09.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.25 [info     ] FQE_20220422040910: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00018068394028996848, 'time_algorithm_update': 0.007170960127589214, 'loss': 0.0018737867060089372, 'time_step': 0.00742354019578681, 'init_value': -0.39264723658561707, 'ave_value': -0.4389093782769533, 'soft_opc': nan} step=1826




2022-04-22 04:09.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.26 [info     ] FQE_20220422040910: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00017197160835725716, 'time_algorithm_update': 0.007024736289518425, 'loss': 0.0019602181143772573, 'time_step': 0.0072725465498774886, 'init_value': -0.402605801820755, 'ave_value': -0.4594447945840321, 'soft_opc': nan} step=1992




2022-04-22 04:09.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.27 [info     ] FQE_20220422040910: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00016946821327669075, 'time_algorithm_update': 0.007110986364893167, 'loss': 0.002092040035556665, 'time_step': 0.0073532110237213505, 'init_value': -0.4184129536151886, 'ave_value': -0.490110674217293, 'soft_opc': nan} step=2158




2022-04-22 04:09.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.29 [info     ] FQE_20220422040910: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00017293390021266708, 'time_algorithm_update': 0.007052028035543051, 'loss': 0.0022341883180281483, 'time_step': 0.007297873497009277, 'init_value': -0.4226585924625397, 'ave_value': -0.5020704203704791, 'soft_opc': nan} step=2324




2022-04-22 04:09.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.30 [info     ] FQE_20220422040910: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00016998957438641284, 'time_algorithm_update': 0.007115002138068877, 'loss': 0.0025583962458943254, 'time_step': 0.007359224629689412, 'init_value': -0.48101773858070374, 'ave_value': -0.5636786948969201, 'soft_opc': nan} step=2490




2022-04-22 04:09.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.31 [info     ] FQE_20220422040910: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00017949902867696373, 'time_algorithm_update': 0.006522111145846815, 'loss': 0.002969993502273314, 'time_step': 0.006774388163922781, 'init_value': -0.5541859269142151, 'ave_value': -0.6484230413798008, 'soft_opc': nan} step=2656




2022-04-22 04:09.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.32 [info     ] FQE_20220422040910: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001690244100180017, 'time_algorithm_update': 0.0071219134043498215, 'loss': 0.003135521868131606, 'time_step': 0.007366365697010454, 'init_value': -0.6202306747436523, 'ave_value': -0.7248406550041526, 'soft_opc': nan} step=2822




2022-04-22 04:09.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.34 [info     ] FQE_20220422040910: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00017064737986369305, 'time_algorithm_update': 0.0070622024765933855, 'loss': 0.0032889250049112275, 'time_step': 0.007312330854944436, 'init_value': -0.6315693855285645, 'ave_value': -0.7381638411650653, 'soft_opc': nan} step=2988




2022-04-22 04:09.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.35 [info     ] FQE_20220422040910: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00017458559518837067, 'time_algorithm_update': 0.007191570408372994, 'loss': 0.00352268473435255, 'time_step': 0.007442260363015784, 'init_value': -0.6403658390045166, 'ave_value': -0.7553051505573429, 'soft_opc': nan} step=3154




2022-04-22 04:09.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.36 [info     ] FQE_20220422040910: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00017408721418265836, 'time_algorithm_update': 0.007083966071347156, 'loss': 0.003921511708913057, 'time_step': 0.00733302156609225, 'init_value': -0.673961341381073, 'ave_value': -0.7795094169789573, 'soft_opc': nan} step=3320




2022-04-22 04:09.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.38 [info     ] FQE_20220422040910: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00019761165940617942, 'time_algorithm_update': 0.0069903580539197804, 'loss': 0.0040027517052185, 'time_step': 0.007264269403664462, 'init_value': -0.6810914278030396, 'ave_value': -0.7919944381555291, 'soft_opc': nan} step=3486




2022-04-22 04:09.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.39 [info     ] FQE_20220422040910: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00018489791686276356, 'time_algorithm_update': 0.007162371313715556, 'loss': 0.004444280385771901, 'time_step': 0.0074225103998758705, 'init_value': -0.7105628252029419, 'ave_value': -0.8065942923152434, 'soft_opc': nan} step=3652




2022-04-22 04:09.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.40 [info     ] FQE_20220422040910: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00019848777587155262, 'time_algorithm_update': 0.007010184138654226, 'loss': 0.004634876428515628, 'time_step': 0.0072835123682596595, 'init_value': -0.7544462084770203, 'ave_value': -0.8577735177899903, 'soft_opc': nan} step=3818




2022-04-22 04:09.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.42 [info     ] FQE_20220422040910: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00017243408295045416, 'time_algorithm_update': 0.007111576666314918, 'loss': 0.0048405745430324925, 'time_step': 0.007360795894301081, 'init_value': -0.7670107483863831, 'ave_value': -0.8659758350107114, 'soft_opc': nan} step=3984




2022-04-22 04:09.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.43 [info     ] FQE_20220422040910: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.0001751486077366105, 'time_algorithm_update': 0.0064900553370096595, 'loss': 0.0050688674569996746, 'time_step': 0.006739093596676746, 'init_value': -0.7473356127738953, 'ave_value': -0.8527190955636839, 'soft_opc': nan} step=4150




2022-04-22 04:09.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.44 [info     ] FQE_20220422040910: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00017775110451571913, 'time_algorithm_update': 0.0072760553245084835, 'loss': 0.005161685729364258, 'time_step': 0.007534106093716909, 'init_value': -0.8294351696968079, 'ave_value': -0.9370354894729046, 'soft_opc': nan} step=4316




2022-04-22 04:09.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.46 [info     ] FQE_20220422040910: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00017754859234913285, 'time_algorithm_update': 0.00725099982985531, 'loss': 0.005319454702822189, 'time_step': 0.007502736815487046, 'init_value': -0.9049839973449707, 'ave_value': -1.0091165550946686, 'soft_opc': nan} step=4482




2022-04-22 04:09.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.47 [info     ] FQE_20220422040910: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00017604195927999107, 'time_algorithm_update': 0.0072206761463578925, 'loss': 0.005540647610064024, 'time_step': 0.007476168942738728, 'init_value': -0.9060974717140198, 'ave_value': -1.0121649782118798, 'soft_opc': nan} step=4648




2022-04-22 04:09.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.48 [info     ] FQE_20220422040910: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.0001746373004223927, 'time_algorithm_update': 0.007261414125741246, 'loss': 0.005843606060062232, 'time_step': 0.007509872137782085, 'init_value': -0.9850537776947021, 'ave_value': -1.0934223462097548, 'soft_opc': nan} step=4814




2022-04-22 04:09.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.50 [info     ] FQE_20220422040910: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00017751412219311818, 'time_algorithm_update': 0.007167626576251294, 'loss': 0.005999978358930269, 'time_step': 0.0074213412870843725, 'init_value': -1.0273455381393433, 'ave_value': -1.124453004121126, 'soft_opc': nan} step=4980




2022-04-22 04:09.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.51 [info     ] FQE_20220422040910: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.0001863241195678711, 'time_algorithm_update': 0.007052500563931753, 'loss': 0.0060254073972148955, 'time_step': 0.007312075201287327, 'init_value': -1.037787914276123, 'ave_value': -1.1486205055000813, 'soft_opc': nan} step=5146




2022-04-22 04:09.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.52 [info     ] FQE_20220422040910: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00017216693924134037, 'time_algorithm_update': 0.007049975625003676, 'loss': 0.006186345750634301, 'time_step': 0.007295974765915468, 'init_value': -1.1040990352630615, 'ave_value': -1.1929982416751284, 'soft_opc': nan} step=5312




2022-04-22 04:09.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.54 [info     ] FQE_20220422040910: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.0001746502267308982, 'time_algorithm_update': 0.007263857198048787, 'loss': 0.006538454021736755, 'time_step': 0.007512593843850745, 'init_value': -1.1388707160949707, 'ave_value': -1.2277114114556293, 'soft_opc': nan} step=5478




2022-04-22 04:09.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.55 [info     ] FQE_20220422040910: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00017317375504826926, 'time_algorithm_update': 0.006488604717944042, 'loss': 0.006916521279266138, 'time_step': 0.00673830509185791, 'init_value': -1.2353636026382446, 'ave_value': -1.3123636748406802, 'soft_opc': nan} step=5644




2022-04-22 04:09.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.56 [info     ] FQE_20220422040910: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.0001754516578582396, 'time_algorithm_update': 0.007136533059269549, 'loss': 0.00681761460005676, 'time_step': 0.007391988512981369, 'init_value': -1.2690010070800781, 'ave_value': -1.3310478296181238, 'soft_opc': nan} step=5810




2022-04-22 04:09.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.58 [info     ] FQE_20220422040910: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00017296837036868176, 'time_algorithm_update': 0.007064167275486222, 'loss': 0.007156464013345371, 'time_step': 0.007315233529332173, 'init_value': -1.2802766561508179, 'ave_value': -1.3350043007298498, 'soft_opc': nan} step=5976




2022-04-22 04:09.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:09.59 [info     ] FQE_20220422040910: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00017230338360889848, 'time_algorithm_update': 0.007155681230935706, 'loss': 0.007353000692267206, 'time_step': 0.007402033690946648, 'init_value': -1.3346104621887207, 'ave_value': -1.3817015047013121, 'soft_opc': nan} step=6142




2022-04-22 04:09.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:10.00 [info     ] FQE_20220422040910: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00017435004912227033, 'time_algorithm_update': 0.007140379354178187, 'loss': 0.007222935706128069, 'time_step': 0.007392596049481128, 'init_value': -1.3357737064361572, 'ave_value': -1.3809730530456379, 'soft_opc': nan} step=6308




2022-04-22 04:10.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:10.02 [info     ] FQE_20220422040910: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00017578774187938277, 'time_algorithm_update': 0.007149317178381495, 'loss': 0.007742146711729197, 'time_step': 0.007403477128729762, 'init_value': -1.3600904941558838, 'ave_value': -1.3965773594758666, 'soft_opc': nan} step=6474




2022-04-22 04:10.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:10.03 [info     ] FQE_20220422040910: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00017945306846894413, 'time_algorithm_update': 0.0072415320270032765, 'loss': 0.00787677535617511, 'time_step': 0.007496631289102945, 'init_value': -1.422982096672058, 'ave_value': -1.4368569174798222, 'soft_opc': nan} step=6640




2022-04-22 04:10.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:10.04 [info     ] FQE_20220422040910: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.0001757561442363693, 'time_algorithm_update': 0.007166468953511801, 'loss': 0.008462651359921613, 'time_step': 0.007418527660599674, 'init_value': -1.4699232578277588, 'ave_value': -1.4813223725704996, 'soft_opc': nan} step=6806




2022-04-22 04:10.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:10.06 [info     ] FQE_20220422040910: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00018051158950989506, 'time_algorithm_update': 0.007053620844002229, 'loss': 0.008796740951492025, 'time_step': 0.007305284580552435, 'init_value': -1.5321762561798096, 'ave_value': -1.5284560839653956, 'soft_opc': nan} step=6972




2022-04-22 04:10.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:10.07 [info     ] FQE_20220422040910: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00017179638506418252, 'time_algorithm_update': 0.006560259554759565, 'loss': 0.008640775691257813, 'time_step': 0.006810591881533703, 'init_value': -1.5154547691345215, 'ave_value': -1.503305798362907, 'soft_opc': nan} step=7138




2022-04-22 04:10.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:10.08 [info     ] FQE_20220422040910: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.0001715780740760895, 'time_algorithm_update': 0.007184682122196059, 'loss': 0.00895175537135149, 'time_step': 0.007432045706783433, 'init_value': -1.576069712638855, 'ave_value': -1.5527754805730405, 'soft_opc': nan} step=7304




2022-04-22 04:10.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:10.10 [info     ] FQE_20220422040910: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00017316944627876742, 'time_algorithm_update': 0.0069995170616241826, 'loss': 0.008870881185474166, 'time_step': 0.0072518055697521535, 'init_value': -1.5810458660125732, 'ave_value': -1.5455729894625307, 'soft_opc': nan} step=7470




2022-04-22 04:10.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:10.11 [info     ] FQE_20220422040910: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00017442329820380154, 'time_algorithm_update': 0.007111187440803252, 'loss': 0.00880992588277132, 'time_step': 0.0073645416512546765, 'init_value': -1.6140787601470947, 'ave_value': -1.557981352105334, 'soft_opc': nan} step=7636




2022-04-22 04:10.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:10.12 [info     ] FQE_20220422040910: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00017441611692129848, 'time_algorithm_update': 0.007165893014655055, 'loss': 0.009312567894917868, 'time_step': 0.007412824286035745, 'init_value': -1.6697998046875, 'ave_value': -1.6115694422409073, 'soft_opc': nan} step=7802




2022-04-22 04:10.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:10.14 [info     ] FQE_20220422040910: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00018199236996202585, 'time_algorithm_update': 0.007084942725767572, 'loss': 0.00958163522030311, 'time_step': 0.007340852036533585, 'init_value': -1.674020767211914, 'ave_value': -1.6048714529488002, 'soft_opc': nan} step=7968




2022-04-22 04:10.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:10.15 [info     ] FQE_20220422040910: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00017986814659762094, 'time_algorithm_update': 0.007058640560471868, 'loss': 0.009298758848124263, 'time_step': 0.007314772490995476, 'init_value': -1.7126047611236572, 'ave_value': -1.6309104320282737, 'soft_opc': nan} step=8134




2022-04-22 04:10.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:10.16 [info     ] FQE_20220422040910: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.0001766796571662627, 'time_algorithm_update': 0.00722902797790895, 'loss': 0.00998199938905863, 'time_step': 0.007483692054288933, 'init_value': -1.7711482048034668, 'ave_value': -1.6781816352964254, 'soft_opc': nan} step=8300




2022-04-22 04:10.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422040910/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-22 04:10.17 [info     ] Directory is created at d3rlpy_logs/FQE_20220422041017
2022-04-22 04:10.17 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 04:10.17 [debug    ] Building models...
2022-04-22 04:10.17 [debug    ] Models have been built.
2022-04-22 04:10.17 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422041017/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 04:10.19 [info     ] FQE_20220422041017: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.0001700097738310348, 'time_algorithm_update': 0.006769172674001649, 'loss': 0.024447412961054335, 'time_step': 0.0070152871830518855, 'init_value': -1.4410362243652344, 'ave_value': -1.4130597839618588, 'soft_opc': nan} step=344




2022-04-22 04:10.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:10.22 [info     ] FQE_20220422041017: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00017249237659365633, 'time_algorithm_update': 0.007071269806041274, 'loss': 0.022727694340743298, 'time_step': 0.007322583780732266, 'init_value': -2.248032808303833, 'ave_value': -2.169527592100539, 'soft_opc': nan} step=688




2022-04-22 04:10.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:10.25 [info     ] FQE_20220422041017: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00017578310744706974, 'time_algorithm_update': 0.007063993880915088, 'loss': 0.025907119730660736, 'time_step': 0.007319189088289128, 'init_value': -3.401041269302368, 'ave_value': -3.2296131080872303, 'soft_opc': nan} step=1032




2022-04-22 04:10.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:10.27 [info     ] FQE_20220422041017: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00017395130423612372, 'time_algorithm_update': 0.007151257160098054, 'loss': 0.02906647081714297, 'time_step': 0.007404558187307313, 'init_value': -4.2221808433532715, 'ave_value': -3.956752765930451, 'soft_opc': nan} step=1376




2022-04-22 04:10.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:10.30 [info     ] FQE_20220422041017: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00017623291459194449, 'time_algorithm_update': 0.007068402545396672, 'loss': 0.03749278632837326, 'time_step': 0.007324042015297468, 'init_value': -5.17606258392334, 'ave_value': -4.821406894908832, 'soft_opc': nan} step=1720




2022-04-22 04:10.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:10.33 [info     ] FQE_20220422041017: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00017321317694907965, 'time_algorithm_update': 0.006939330073290093, 'loss': 0.04748051352582352, 'time_step': 0.007190375134002331, 'init_value': -5.8679304122924805, 'ave_value': -5.488498498546379, 'soft_opc': nan} step=2064




2022-04-22 04:10.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:10.35 [info     ] FQE_20220422041017: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00016363346299459768, 'time_algorithm_update': 0.006724076215610947, 'loss': 0.06012863677189967, 'time_step': 0.00696116893790489, 'init_value': -6.735328674316406, 'ave_value': -6.359633446850202, 'soft_opc': nan} step=2408




2022-04-22 04:10.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:10.38 [info     ] FQE_20220422041017: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00015955123790474825, 'time_algorithm_update': 0.007725628309471663, 'loss': 0.07510317448894818, 'time_step': 0.007958102364872778, 'init_value': -7.247005939483643, 'ave_value': -6.943923680778794, 'soft_opc': nan} step=2752




2022-04-22 04:10.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:10.41 [info     ] FQE_20220422041017: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00016076135080914165, 'time_algorithm_update': 0.007682829402213873, 'loss': 0.09110686496755671, 'time_step': 0.007913839678431666, 'init_value': -7.804527282714844, 'ave_value': -7.61610618560231, 'soft_opc': nan} step=3096




2022-04-22 04:10.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:10.44 [info     ] FQE_20220422041017: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.000176663315573404, 'time_algorithm_update': 0.0074206944121870885, 'loss': 0.11159106358604202, 'time_step': 0.007675214562305185, 'init_value': -8.240859985351562, 'ave_value': -8.210350489210304, 'soft_opc': nan} step=3440




2022-04-22 04:10.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:10.47 [info     ] FQE_20220422041017: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.0001737530841383823, 'time_algorithm_update': 0.007897328498751618, 'loss': 0.12566150114654975, 'time_step': 0.008149255846821985, 'init_value': -8.522822380065918, 'ave_value': -8.723699119562065, 'soft_opc': nan} step=3784




2022-04-22 04:10.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:10.50 [info     ] FQE_20220422041017: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00017768144607543945, 'time_algorithm_update': 0.007550935412562171, 'loss': 0.14675298301762965, 'time_step': 0.007807012214217075, 'init_value': -9.22671127319336, 'ave_value': -9.723827329873284, 'soft_opc': nan} step=4128




2022-04-22 04:10.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:10.53 [info     ] FQE_20220422041017: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00018116554548574048, 'time_algorithm_update': 0.007790056772010271, 'loss': 0.1698235621253505, 'time_step': 0.008049560147662496, 'init_value': -9.275171279907227, 'ave_value': -9.927063943578249, 'soft_opc': nan} step=4472




2022-04-22 04:10.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:10.56 [info     ] FQE_20220422041017: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001743921013765557, 'time_algorithm_update': 0.007379156212474025, 'loss': 0.18944915597933495, 'time_step': 0.007633355467818504, 'init_value': -9.782976150512695, 'ave_value': -10.709517239295954, 'soft_opc': nan} step=4816




2022-04-22 04:10.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:10.59 [info     ] FQE_20220422041017: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00017467487690060637, 'time_algorithm_update': 0.007875442504882812, 'loss': 0.21294462035388448, 'time_step': 0.008127729560053625, 'init_value': -9.872894287109375, 'ave_value': -11.046744106282052, 'soft_opc': nan} step=5160




2022-04-22 04:10.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:11.02 [info     ] FQE_20220422041017: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.0001771761927493783, 'time_algorithm_update': 0.007873859516409941, 'loss': 0.23232273245771784, 'time_step': 0.00813085811082707, 'init_value': -10.49268627166748, 'ave_value': -11.804392575840088, 'soft_opc': nan} step=5504




2022-04-22 04:11.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:11.05 [info     ] FQE_20220422041017: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.0001726850520732791, 'time_algorithm_update': 0.007527523262556209, 'loss': 0.26215450496542764, 'time_step': 0.007777104544085126, 'init_value': -10.874835968017578, 'ave_value': -12.227597416663828, 'soft_opc': nan} step=5848




2022-04-22 04:11.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:11.08 [info     ] FQE_20220422041017: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00018720224846241086, 'time_algorithm_update': 0.007631062768226446, 'loss': 0.28244324963604817, 'time_step': 0.007898854654888774, 'init_value': -11.523551940917969, 'ave_value': -12.953957128977857, 'soft_opc': nan} step=6192




2022-04-22 04:11.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:11.10 [info     ] FQE_20220422041017: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00017591201981832815, 'time_algorithm_update': 0.007506316484406937, 'loss': 0.30228068689859017, 'time_step': 0.007761218520097955, 'init_value': -12.026317596435547, 'ave_value': -13.322681001402639, 'soft_opc': nan} step=6536




2022-04-22 04:11.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:11.13 [info     ] FQE_20220422041017: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.0001795714677766312, 'time_algorithm_update': 0.007826931947885557, 'loss': 0.33190044096324506, 'time_step': 0.008084874513537385, 'init_value': -12.970441818237305, 'ave_value': -14.171088649961796, 'soft_opc': nan} step=6880




2022-04-22 04:11.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:11.16 [info     ] FQE_20220422041017: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00017586904902790868, 'time_algorithm_update': 0.007765391538309497, 'loss': 0.3539442851662982, 'time_step': 0.008021458636882693, 'init_value': -13.571313858032227, 'ave_value': -14.53249173466586, 'soft_opc': nan} step=7224




2022-04-22 04:11.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:11.19 [info     ] FQE_20220422041017: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.0001748918100844982, 'time_algorithm_update': 0.007822797048923582, 'loss': 0.3736179093938581, 'time_step': 0.008077292248260143, 'init_value': -14.387331008911133, 'ave_value': -15.20965108961404, 'soft_opc': nan} step=7568




2022-04-22 04:11.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:11.22 [info     ] FQE_20220422041017: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00017411001892976983, 'time_algorithm_update': 0.007597343866215195, 'loss': 0.3885992156926456, 'time_step': 0.007850022510040638, 'init_value': -15.112081527709961, 'ave_value': -15.688046111037266, 'soft_opc': nan} step=7912




2022-04-22 04:11.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:11.25 [info     ] FQE_20220422041017: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00017212435256603153, 'time_algorithm_update': 0.007453803406205288, 'loss': 0.41252750471819105, 'time_step': 0.007705142331677814, 'init_value': -15.688788414001465, 'ave_value': -16.045950146161253, 'soft_opc': nan} step=8256




2022-04-22 04:11.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:11.28 [info     ] FQE_20220422041017: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00017321248387181483, 'time_algorithm_update': 0.007775345513986987, 'loss': 0.4334402026955125, 'time_step': 0.008024444413739581, 'init_value': -16.591724395751953, 'ave_value': -16.558675848348653, 'soft_opc': nan} step=8600




2022-04-22 04:11.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:11.31 [info     ] FQE_20220422041017: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00017829967099566792, 'time_algorithm_update': 0.007505159738451936, 'loss': 0.4757937477403429, 'time_step': 0.007758124623187753, 'init_value': -17.607044219970703, 'ave_value': -17.24426562630714, 'soft_opc': nan} step=8944




2022-04-22 04:11.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:11.34 [info     ] FQE_20220422041017: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00017848472262537755, 'time_algorithm_update': 0.00754390137140141, 'loss': 0.4904719962207817, 'time_step': 0.007801314426022906, 'init_value': -18.255413055419922, 'ave_value': -17.690147424918003, 'soft_opc': nan} step=9288




2022-04-22 04:11.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:11.37 [info     ] FQE_20220422041017: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00018253645231557447, 'time_algorithm_update': 0.007618304601935453, 'loss': 0.5010316738328182, 'time_step': 0.007878299369368442, 'init_value': -18.82674789428711, 'ave_value': -17.93571674596216, 'soft_opc': nan} step=9632




2022-04-22 04:11.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:11.40 [info     ] FQE_20220422041017: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00017353129941363667, 'time_algorithm_update': 0.007552479588708212, 'loss': 0.5123594089432858, 'time_step': 0.0078036112840785534, 'init_value': -19.651731491088867, 'ave_value': -18.42578806478765, 'soft_opc': nan} step=9976




2022-04-22 04:11.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:11.43 [info     ] FQE_20220422041017: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.0001774042151695074, 'time_algorithm_update': 0.007836996122848156, 'loss': 0.5314358334966697, 'time_step': 0.008090190416158632, 'init_value': -20.159996032714844, 'ave_value': -18.4943076674516, 'soft_opc': nan} step=10320




2022-04-22 04:11.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:11.45 [info     ] FQE_20220422041017: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00017027522242346474, 'time_algorithm_update': 0.0072359442710876465, 'loss': 0.5313288052133176, 'time_step': 0.007481401049813559, 'init_value': -21.01789093017578, 'ave_value': -19.23118449319909, 'soft_opc': nan} step=10664




2022-04-22 04:11.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:11.48 [info     ] FQE_20220422041017: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00017434497212254725, 'time_algorithm_update': 0.007883509924245436, 'loss': 0.5536210802762748, 'time_step': 0.008133760718412177, 'init_value': -21.76310157775879, 'ave_value': -19.75124526789311, 'soft_opc': nan} step=11008




2022-04-22 04:11.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:11.51 [info     ] FQE_20220422041017: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00017345783322356468, 'time_algorithm_update': 0.007515822039094082, 'loss': 0.5594756217662592, 'time_step': 0.007768037707306618, 'init_value': -22.017780303955078, 'ave_value': -19.696057332780313, 'soft_opc': nan} step=11352




2022-04-22 04:11.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:11.54 [info     ] FQE_20220422041017: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00017619895380596782, 'time_algorithm_update': 0.007783459369526353, 'loss': 0.5767918756274983, 'time_step': 0.00803854645684708, 'init_value': -22.8060302734375, 'ave_value': -20.361473923042283, 'soft_opc': nan} step=11696




2022-04-22 04:11.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:11.57 [info     ] FQE_20220422041017: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00017306139302808186, 'time_algorithm_update': 0.007715864930042001, 'loss': 0.6078531651616876, 'time_step': 0.007968127727508545, 'init_value': -23.716108322143555, 'ave_value': -21.100673190928863, 'soft_opc': nan} step=12040




2022-04-22 04:11.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:12.00 [info     ] FQE_20220422041017: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00017732451128405193, 'time_algorithm_update': 0.007327442252358725, 'loss': 0.6184905559896643, 'time_step': 0.007583763017210849, 'init_value': -23.833492279052734, 'ave_value': -21.0973131233734, 'soft_opc': nan} step=12384




2022-04-22 04:12.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:12.03 [info     ] FQE_20220422041017: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00017346684322800746, 'time_algorithm_update': 0.007808081632436708, 'loss': 0.630416122411356, 'time_step': 0.008061744445978208, 'init_value': -24.15065574645996, 'ave_value': -21.57375573247067, 'soft_opc': nan} step=12728




2022-04-22 04:12.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:12.06 [info     ] FQE_20220422041017: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00017457091531088186, 'time_algorithm_update': 0.007503612789996835, 'loss': 0.6454793099303145, 'time_step': 0.007755617762720862, 'init_value': -24.664806365966797, 'ave_value': -22.044630647970884, 'soft_opc': nan} step=13072




2022-04-22 04:12.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:12.09 [info     ] FQE_20220422041017: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00017645054085310116, 'time_algorithm_update': 0.00788820136425107, 'loss': 0.6595654882339024, 'time_step': 0.008143217757690785, 'init_value': -24.82486343383789, 'ave_value': -22.226821346243693, 'soft_opc': nan} step=13416




2022-04-22 04:12.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:12.12 [info     ] FQE_20220422041017: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00017510527788206588, 'time_algorithm_update': 0.007324784301048101, 'loss': 0.6581547899756494, 'time_step': 0.00757870355317759, 'init_value': -25.092060089111328, 'ave_value': -22.39134623305956, 'soft_opc': nan} step=13760




2022-04-22 04:12.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:12.15 [info     ] FQE_20220422041017: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.0001827076423999875, 'time_algorithm_update': 0.0077903561813886775, 'loss': 0.6719321867418584, 'time_step': 0.00805112927459007, 'init_value': -25.244491577148438, 'ave_value': -22.922062051979204, 'soft_opc': nan} step=14104




2022-04-22 04:12.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:12.18 [info     ] FQE_20220422041017: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00017787065616873808, 'time_algorithm_update': 0.00790132200995157, 'loss': 0.6835620912755731, 'time_step': 0.008158379515936209, 'init_value': -25.288496017456055, 'ave_value': -23.282665345230484, 'soft_opc': nan} step=14448




2022-04-22 04:12.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:12.21 [info     ] FQE_20220422041017: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00017635212388149527, 'time_algorithm_update': 0.007534424925959388, 'loss': 0.6926044851790594, 'time_step': 0.0077873780283817026, 'init_value': -25.436832427978516, 'ave_value': -23.628087543612263, 'soft_opc': nan} step=14792




2022-04-22 04:12.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:12.24 [info     ] FQE_20220422041017: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00017756223678588867, 'time_algorithm_update': 0.0074909239314323245, 'loss': 0.701815668336484, 'time_step': 0.007744062778561614, 'init_value': -25.53179168701172, 'ave_value': -24.088641465592893, 'soft_opc': nan} step=15136




2022-04-22 04:12.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:12.26 [info     ] FQE_20220422041017: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00017710134040477664, 'time_algorithm_update': 0.007535461076470309, 'loss': 0.6999650130178346, 'time_step': 0.0077910395555718, 'init_value': -25.65756607055664, 'ave_value': -24.395125252267697, 'soft_opc': nan} step=15480




2022-04-22 04:12.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:12.29 [info     ] FQE_20220422041017: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00017767728761185046, 'time_algorithm_update': 0.00777979992156805, 'loss': 0.7025271571013904, 'time_step': 0.008034245219341544, 'init_value': -25.33705711364746, 'ave_value': -24.370790328696234, 'soft_opc': nan} step=15824




2022-04-22 04:12.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:12.32 [info     ] FQE_20220422041017: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.0001762218253557072, 'time_algorithm_update': 0.007649426543435385, 'loss': 0.6887578069838847, 'time_step': 0.007904581552328066, 'init_value': -25.2496395111084, 'ave_value': -24.524088446162473, 'soft_opc': nan} step=16168




2022-04-22 04:12.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:12.35 [info     ] FQE_20220422041017: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00017510943634565487, 'time_algorithm_update': 0.007734152466751808, 'loss': 0.6755777219321232, 'time_step': 0.00799058204473451, 'init_value': -24.54136848449707, 'ave_value': -24.201233621341018, 'soft_opc': nan} step=16512




2022-04-22 04:12.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:12.38 [info     ] FQE_20220422041017: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016365633454433706, 'time_algorithm_update': 0.007698001556618269, 'loss': 0.6575145973704842, 'time_step': 0.007935668839964756, 'init_value': -24.141822814941406, 'ave_value': -24.163680414843732, 'soft_opc': nan} step=16856




2022-04-22 04:12.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:12.41 [info     ] FQE_20220422041017: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00016079046005426452, 'time_algorithm_update': 0.007464799077011818, 'loss': 0.6396922856504315, 'time_step': 0.007696721442910128, 'init_value': -23.74529266357422, 'ave_value': -23.91888610834071, 'soft_opc': nan} step=17200




2022-04-22 04:12.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422041017/model_17200.pt
search iteration:  18
using hyper params:  [0.008015326902538738, 0.00488678244826896, 5.7138426138602196e-05, 7]
2022-04-22 04:12.41 [debug    ] RoundIterator is selected.
2022-04-22 04:12.41 [info     ] Directory is created at d3rlpy_logs/CQL_20220422041241
2022-04-22 04:12.41 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 04:12.41 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 04:12.41 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422041241/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.008015326902538738, 'actor_optim_factory': {'optim

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:13.01 [info     ] CQL_20220422041241: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.0004306252981196938, 'time_algorithm_update': 0.055055889780121735, 'temp_loss': 4.939957598730318, 'temp': 0.9898744234115402, 'alpha_loss': -17.767622865004345, 'alpha': 1.0177289399108447, 'critic_loss': 150.98134037524977, 'actor_loss': 5.105793385814897, 'time_step': 0.05558752944703736, 'td_error': 1.4113715791410215, 'init_value': -10.517354011535645, 'ave_value': -9.664046247167121} step=346
2022-04-22 04:13.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:13.21 [info     ] CQL_20220422041241: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.00043406031724345476, 'time_algorithm_update': 0.05558261223611115, 'temp_loss': 4.895504159045357, 'temp': 0.9704608836270481, 'alpha_loss': -18.401468442354588, 'alpha': 1.0541529428063101, 'critic_loss': 212.77923733926232, 'actor_loss': 12.692839366163133, 'time_step': 0.05612417039154582, 'td_error': 1.5983194292716654, 'init_value': -15.725613594055176, 'ave_value': -14.638682020620578} step=692
2022-04-22 04:13.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:13.42 [info     ] CQL_20220422041241: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.0004885231139342909, 'time_algorithm_update': 0.05598787765282427, 'temp_loss': 4.803171488591012, 'temp': 0.951707838070875, 'alpha_loss': -19.079436208471396, 'alpha': 1.0924042408176928, 'critic_loss': 413.98416702182305, 'actor_loss': 17.825220300972116, 'time_step': 0.05658628623609598, 'td_error': 1.7864189892550697, 'init_value': -19.800262451171875, 'ave_value': -18.701595090610148} step=1038
2022-04-22 04:13.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:14.02 [info     ] CQL_20220422041241: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.00045900193253004483, 'time_algorithm_update': 0.05577426356387276, 'temp_loss': 4.7135039244084, 'temp': 0.9334701044711075, 'alpha_loss': -19.782933025690863, 'alpha': 1.1325852953629687, 'critic_loss': 702.4231966051752, 'actor_loss': 19.407948703435114, 'time_step': 0.05633731384497846, 'td_error': 1.7007697624865818, 'init_value': -19.429189682006836, 'ave_value': -18.53828310760744} step=1384
2022-04-22 04:14.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:14.22 [info     ] CQL_20220422041241: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.0004408125243435016, 'time_algorithm_update': 0.056043071553886284, 'temp_loss': 4.623430446393228, 'temp': 0.9157022456557764, 'alpha_loss': -20.476051181727062, 'alpha': 1.1746482218621095, 'critic_loss': 1080.1679609883038, 'actor_loss': 16.278724882644035, 'time_step': 0.05658779185631372, 'td_error': 1.4459944996424698, 'init_value': -13.860668182373047, 'ave_value': -13.387568504248485} step=1730
2022-04-22 04:14.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:14.43 [info     ] CQL_20220422041241: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.00045857264127345445, 'time_algorithm_update': 0.05540765020888665, 'temp_loss': 4.5364838321773995, 'temp': 0.8983661048329634, 'alpha_loss': -21.203977435999523, 'alpha': 1.2186282501055326, 'critic_loss': 1561.0405908485368, 'actor_loss': 9.260404654320954, 'time_step': 0.055974179609662535, 'td_error': 1.3288350043200536, 'init_value': -8.254294395446777, 'ave_value': -8.100502066094483} step=2076
2022-04-22 04:14.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:15.03 [info     ] CQL_20220422041241: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.00043405893910137904, 'time_algorithm_update': 0.055336661421494675, 'temp_loss': 4.45129223917261, 'temp': 0.8814381574619712, 'alpha_loss': -21.994447322250103, 'alpha': 1.2645961831759855, 'critic_loss': 2076.4217053010975, 'actor_loss': 5.853361205558556, 'time_step': 0.055874779045237286, 'td_error': 1.321043599292728, 'init_value': -7.1960930824279785, 'ave_value': -7.126297912760446} step=2422
2022-04-22 04:15.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:15.23 [info     ] CQL_20220422041241: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.0004463512773458668, 'time_algorithm_update': 0.05543078576898299, 'temp_loss': 4.366788439667983, 'temp': 0.8648900060639905, 'alpha_loss': -22.825717854362004, 'alpha': 1.3126286719575784, 'critic_loss': 2528.9221261967123, 'actor_loss': 5.602235455044432, 'time_step': 0.05597763461184639, 'td_error': 1.3287461426409266, 'init_value': -7.400091171264648, 'ave_value': -7.344747211515989} step=2768
2022-04-22 04:15.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:15.43 [info     ] CQL_20220422041241: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.0004390168052188234, 'time_algorithm_update': 0.05510538368555852, 'temp_loss': 4.285738833377816, 'temp': 0.8487019244301526, 'alpha_loss': -23.70102771582631, 'alpha': 1.3627688591879916, 'critic_loss': 2956.4980750993495, 'actor_loss': 5.811512186348094, 'time_step': 0.05564208251203416, 'td_error': 1.332617330632755, 'init_value': -7.311737060546875, 'ave_value': -7.277445969868746} step=3114
2022-04-22 04:15.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:16.03 [info     ] CQL_20220422041241: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.000456274589362172, 'time_algorithm_update': 0.0553839468542551, 'temp_loss': 4.20574728739744, 'temp': 0.8328559632935276, 'alpha_loss': -24.607893866610667, 'alpha': 1.4150550031248545, 'critic_loss': 3371.27676585644, 'actor_loss': 6.171829118894014, 'time_step': 0.055947219016235, 'td_error': 1.3435101287734998, 'init_value': -7.763072967529297, 'ave_value': -7.738382558881417} step=3460
2022-04-22 04:16.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:16.23 [info     ] CQL_20220422041241: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.00046472122214433084, 'time_algorithm_update': 0.055278378414969916, 'temp_loss': 4.1277224617886406, 'temp': 0.8173325215116402, 'alpha_loss': -25.55637150141545, 'alpha': 1.4695352026492874, 'critic_loss': 3779.0704564441835, 'actor_loss': 6.633987027096611, 'time_step': 0.05584453571738535, 'td_error': 1.3581445501301443, 'init_value': -8.448660850524902, 'ave_value': -8.425041096534928} step=3806
2022-04-22 04:16.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:16.43 [info     ] CQL_20220422041241: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.0004579324942792771, 'time_algorithm_update': 0.05478033785186062, 'temp_loss': 4.051283285796987, 'temp': 0.8021238189556695, 'alpha_loss': -26.541507891836883, 'alpha': 1.526265245641587, 'critic_loss': 4204.050049533734, 'actor_loss': 7.152454934368244, 'time_step': 0.055344740779413655, 'td_error': 1.3713515766153366, 'init_value': -8.979423522949219, 'ave_value': -8.96185010292339} step=4152
2022-04-22 04:16.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:17.02 [info     ] CQL_20220422041241: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.00045683687132907054, 'time_algorithm_update': 0.052242288699728905, 'temp_loss': 3.9753814983919176, 'temp': 0.7872159939280824, 'alpha_loss': -27.569226639808257, 'alpha': 1.585308417074942, 'critic_loss': 4617.202656475794, 'actor_loss': 7.699637578401951, 'time_step': 0.052800854506520174, 'td_error': 1.3839974191064228, 'init_value': -9.454757690429688, 'ave_value': -9.439885604771577} step=4498
2022-04-22 04:17.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:17.21 [info     ] CQL_20220422041241: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.0004525322445555229, 'time_algorithm_update': 0.05245580011709577, 'temp_loss': 3.9022941189694267, 'temp': 0.7725984716001962, 'alpha_loss': -28.637177009803022, 'alpha': 1.646729832095218, 'critic_loss': 5030.47000174991, 'actor_loss': 8.314975847398614, 'time_step': 0.053008007865420655, 'td_error': 1.398792005518445, 'init_value': -9.991218566894531, 'ave_value': -9.982330785853538} step=4844
2022-04-22 04:17.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:17.40 [info     ] CQL_20220422041241: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.0004341223336368627, 'time_algorithm_update': 0.05220367936040625, 'temp_loss': 3.8288638577984937, 'temp': 0.758263763492507, 'alpha_loss': -29.746322383770362, 'alpha': 1.7106069633037368, 'critic_loss': 5437.253084921424, 'actor_loss': 8.997704544508387, 'time_step': 0.052737661179779585, 'td_error': 1.4176762880624565, 'init_value': -10.696029663085938, 'ave_value': -10.688645234069535} step=5190
2022-04-22 04:17.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:17.59 [info     ] CQL_20220422041241: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.0004696811554748888, 'time_algorithm_update': 0.05236875597452153, 'temp_loss': 3.7577609533519416, 'temp': 0.7442047818203192, 'alpha_loss': -30.89748825227594, 'alpha': 1.7770149459728617, 'critic_loss': 5839.502085779443, 'actor_loss': 9.775624605961617, 'time_step': 0.052936318292783176, 'td_error': 1.444464923488127, 'init_value': -11.701560020446777, 'ave_value': -11.68947374932627} step=5536
2022-04-22 04:17.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:18.19 [info     ] CQL_20220422041241: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.0004318649369168144, 'time_algorithm_update': 0.05264341624485964, 'temp_loss': 3.688285738746555, 'temp': 0.7304133688438834, 'alpha_loss': -32.102816515575256, 'alpha': 1.8460486772432492, 'critic_loss': 6223.394432464776, 'actor_loss': 10.535948687206114, 'time_step': 0.05317147618773355, 'td_error': 1.46807441366953, 'init_value': -12.475235939025879, 'ave_value': -12.465265032938726} step=5882
2022-04-22 04:18.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:18.38 [info     ] CQL_20220422041241: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.0004639584205054134, 'time_algorithm_update': 0.05302185405885553, 'temp_loss': 3.621014591586383, 'temp': 0.7168789144196263, 'alpha_loss': -33.34659149743229, 'alpha': 1.9178052255183975, 'critic_loss': 6586.942282616059, 'actor_loss': 11.361791087023784, 'time_step': 0.053583491744333606, 'td_error': 1.4902400703892178, 'init_value': -13.150749206542969, 'ave_value': -13.142812283200751} step=6228
2022-04-22 04:18.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:18.57 [info     ] CQL_20220422041241: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.00044100959866033123, 'time_algorithm_update': 0.05309263543586511, 'temp_loss': 3.552554780348188, 'temp': 0.7036008504084769, 'alpha_loss': -34.64503741402157, 'alpha': 1.9923757552411514, 'critic_loss': 6917.591794052565, 'actor_loss': 12.219062592941901, 'time_step': 0.05363013427381571, 'td_error': 1.522069913094307, 'init_value': -14.138664245605469, 'ave_value': -14.129500233997039} step=6574
2022-04-22 04:18.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:19.17 [info     ] CQL_20220422041241: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.0004469259625914469, 'time_algorithm_update': 0.053647497485827846, 'temp_loss': 3.4875145041184616, 'temp': 0.6905693404936377, 'alpha_loss': -35.998609432595316, 'alpha': 2.0698763125204627, 'critic_loss': 7260.3165925307085, 'actor_loss': 13.17765251887327, 'time_step': 0.05419488242595871, 'td_error': 1.550779054932395, 'init_value': -14.934870719909668, 'ave_value': -14.929063017959558} step=6920
2022-04-22 04:19.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:19.35 [info     ] CQL_20220422041241: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.000450257621059528, 'time_algorithm_update': 0.05088539344037889, 'temp_loss': 3.422551299106179, 'temp': 0.6777832826782513, 'alpha_loss': -37.39317868624119, 'alpha': 2.1504043960846917, 'critic_loss': 7613.446692670701, 'actor_loss': 14.076349115096075, 'time_step': 0.05143144640619355, 'td_error': 1.58349006538717, 'init_value': -15.8272123336792, 'ave_value': -15.82178758663722} step=7266
2022-04-22 04:19.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:19.53 [info     ] CQL_20220422041241: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.000444019460953729, 'time_algorithm_update': 0.04959683059956986, 'temp_loss': 3.3589645445002296, 'temp': 0.6652344682313114, 'alpha_loss': -38.854489376090164, 'alpha': 2.234082175817104, 'critic_loss': 7885.029692015896, 'actor_loss': 15.041678059307825, 'time_step': 0.050138591341889666, 'td_error': 1.6128540566721985, 'init_value': -16.560998916625977, 'ave_value': -16.559796712521752} step=7612
2022-04-22 04:19.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:20.11 [info     ] CQL_20220422041241: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.0004442406527568839, 'time_algorithm_update': 0.04968721880389087, 'temp_loss': 3.2967467190902355, 'temp': 0.6529205939328739, 'alpha_loss': -40.36378232040846, 'alpha': 2.32102004434332, 'critic_loss': 8123.186156520954, 'actor_loss': 15.994863364048776, 'time_step': 0.05022875766533648, 'td_error': 1.65091523296477, 'init_value': -17.51464080810547, 'ave_value': -17.514326739141644} step=7958
2022-04-22 04:20.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:20.29 [info     ] CQL_20220422041241: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.000431163462600267, 'time_algorithm_update': 0.04961563603726426, 'temp_loss': 3.2357085398855925, 'temp': 0.640835256934855, 'alpha_loss': -41.93601688759865, 'alpha': 2.411351180490042, 'critic_loss': 8333.679844145141, 'actor_loss': 16.93583360572771, 'time_step': 0.05014719577193949, 'td_error': 1.7022891984128097, 'init_value': -18.79706382751465, 'ave_value': -18.789392773016417} step=8304
2022-04-22 04:20.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:20.48 [info     ] CQL_20220422041241: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.00043728792598481814, 'time_algorithm_update': 0.050452700240074554, 'temp_loss': 3.17622858664893, 'temp': 0.6289717043410836, 'alpha_loss': -43.564417447657945, 'alpha': 2.505200508012937, 'critic_loss': 8266.029816203034, 'actor_loss': 17.865997529443288, 'time_step': 0.050987242963272714, 'td_error': 1.7288099192423345, 'init_value': -19.3328914642334, 'ave_value': -19.334601440538215} step=8650
2022-04-22 04:20.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:21.06 [info     ] CQL_20220422041241: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.0004390967374592158, 'time_algorithm_update': 0.05128615508878851, 'temp_loss': 3.118072471866718, 'temp': 0.6173277736743751, 'alpha_loss': -45.25313988172939, 'alpha': 2.602698404664938, 'critic_loss': 8332.15158592621, 'actor_loss': 18.900429505144242, 'time_step': 0.05182244736335181, 'td_error': 1.7851377581151848, 'init_value': -20.628582000732422, 'ave_value': -20.6234440043392} step=8996
2022-04-22 04:21.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:21.25 [info     ] CQL_20220422041241: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.00044324149975197855, 'time_algorithm_update': 0.051663681261801306, 'temp_loss': 3.0602692400099913, 'temp': 0.6058989306750325, 'alpha_loss': -47.02164228803161, 'alpha': 2.7039884921443256, 'critic_loss': 7734.478271484375, 'actor_loss': 19.72638498427551, 'time_step': 0.05220943310357243, 'td_error': 1.8290447956722704, 'init_value': -21.543128967285156, 'ave_value': -21.538217762331648} step=9342
2022-04-22 04:21.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:21.44 [info     ] CQL_20220422041241: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.0004353585270787939, 'time_algorithm_update': 0.05165251831098788, 'temp_loss': 3.0021095971840657, 'temp': 0.5946856177266623, 'alpha_loss': -48.84695016717635, 'alpha': 2.809238604038437, 'critic_loss': 7648.212739624729, 'actor_loss': 20.925163351731495, 'time_step': 0.052186163863694734, 'td_error': 1.8814223303756301, 'init_value': -22.588119506835938, 'ave_value': -22.5841537853619} step=9688
2022-04-22 04:21.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:22.03 [info     ] CQL_20220422041241: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.00043968864948074255, 'time_algorithm_update': 0.05184897384202549, 'temp_loss': 2.9478226452204535, 'temp': 0.5836803668496237, 'alpha_loss': -50.741226130138244, 'alpha': 2.9185755356198793, 'critic_loss': 7829.622591051752, 'actor_loss': 22.019993495389905, 'time_step': 0.052389745078334916, 'td_error': 1.9373997755872499, 'init_value': -23.676433563232422, 'ave_value': -23.67218529152723} step=10034
2022-04-22 04:22.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:22.22 [info     ] CQL_20220422041241: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.00043347667407438243, 'time_algorithm_update': 0.051906747625053276, 'temp_loss': 2.893513360464504, 'temp': 0.5728747381295772, 'alpha_loss': -52.72238547264496, 'alpha': 3.0321640954541333, 'critic_loss': 7999.399033033779, 'actor_loss': 23.07715379571639, 'time_step': 0.05244084176300578, 'td_error': 1.9933575462780713, 'init_value': -24.70444107055664, 'ave_value': -24.70073981793812} step=10380
2022-04-22 04:22.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:22.40 [info     ] CQL_20220422041241: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.00045629043799604296, 'time_algorithm_update': 0.051598014170034776, 'temp_loss': 2.8391778916981867, 'temp': 0.5622712313439805, 'alpha_loss': -54.774549054272605, 'alpha': 3.1501872532629553, 'critic_loss': 8103.25592711344, 'actor_loss': 24.097097099171897, 'time_step': 0.05215238215606337, 'td_error': 2.0507786188858526, 'init_value': -25.743183135986328, 'ave_value': -25.737800507886877} step=10726
2022-04-22 04:22.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:23.00 [info     ] CQL_20220422041241: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.00044703139046024034, 'time_algorithm_update': 0.05394716069877492, 'temp_loss': 2.7856876312652763, 'temp': 0.5518677524059494, 'alpha_loss': -56.91384544813564, 'alpha': 3.2728171693107297, 'critic_loss': 8279.789052621478, 'actor_loss': 25.127317759342965, 'time_step': 0.05449097831814275, 'td_error': 2.107560117637207, 'init_value': -26.71059799194336, 'ave_value': -26.70642664128426} step=11072
2022-04-22 04:23.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:23.20 [info     ] CQL_20220422041241: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.00043487066478398493, 'time_algorithm_update': 0.05540081806954621, 'temp_loss': 2.735248114332298, 'temp': 0.5416540506947247, 'alpha_loss': -59.11120395991154, 'alpha': 3.400209887868407, 'critic_loss': 8454.534827436326, 'actor_loss': 26.096027313629328, 'time_step': 0.05593046907744656, 'td_error': 2.1651985550853197, 'init_value': -27.671695709228516, 'ave_value': -27.668051765156108} step=11418
2022-04-22 04:23.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:23.41 [info     ] CQL_20220422041241: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.0004663777489193602, 'time_algorithm_update': 0.056648546560651304, 'temp_loss': 2.68481123309604, 'temp': 0.5316277773394061, 'alpha_loss': -61.42819001082051, 'alpha': 3.5325372983954546, 'critic_loss': 8542.24613890896, 'actor_loss': 27.04514630268075, 'time_step': 0.05721425665596317, 'td_error': 2.2244536824840253, 'init_value': -28.637231826782227, 'ave_value': -28.631149967568017} step=11764
2022-04-22 04:23.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:24.01 [info     ] CQL_20220422041241: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.0004656004767886476, 'time_algorithm_update': 0.05628307568544597, 'temp_loss': 2.634938780972034, 'temp': 0.5217879107921799, 'alpha_loss': -63.81920979891209, 'alpha': 3.670061343667135, 'critic_loss': 8218.700960192376, 'actor_loss': 27.842056230313517, 'time_step': 0.056851217512450467, 'td_error': 2.269408670689161, 'init_value': -29.326303482055664, 'ave_value': -29.32381056997662} step=12110
2022-04-22 04:24.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:24.22 [info     ] CQL_20220422041241: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.00045403648663118397, 'time_algorithm_update': 0.05627236407616235, 'temp_loss': 2.5859430098120186, 'temp': 0.5121297831135678, 'alpha_loss': -66.30743059808809, 'alpha': 3.8129242555254454, 'critic_loss': 8312.838802271495, 'actor_loss': 28.868626792995915, 'time_step': 0.05682733293213596, 'td_error': 2.342929840643532, 'init_value': -30.484420776367188, 'ave_value': -30.475971889269754} step=12456
2022-04-22 04:24.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:24.42 [info     ] CQL_20220422041241: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.00046361733034166986, 'time_algorithm_update': 0.0558915503452279, 'temp_loss': 2.5379319501060973, 'temp': 0.5026517823941445, 'alpha_loss': -68.88476569115082, 'alpha': 3.9613537829735375, 'critic_loss': 8392.430489071532, 'actor_loss': 29.723178527258725, 'time_step': 0.056458110754200486, 'td_error': 2.3904554327378746, 'init_value': -31.1657772064209, 'ave_value': -31.16243274565724} step=12802
2022-04-22 04:24.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:25.02 [info     ] CQL_20220422041241: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.00045179700575812015, 'time_algorithm_update': 0.05567711006010199, 'temp_loss': 2.4917788946559662, 'temp': 0.49334770191267047, 'alpha_loss': -71.57441998079334, 'alpha': 4.115554208700368, 'critic_loss': 8319.602845296695, 'actor_loss': 30.61054396767148, 'time_step': 0.05623121964449138, 'td_error': 2.4356597562871594, 'init_value': -31.77996253967285, 'ave_value': -31.78317849659818} step=13148
2022-04-22 04:25.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:25.22 [info     ] CQL_20220422041241: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.00043716044784281293, 'time_algorithm_update': 0.05545451048481671, 'temp_loss': 2.4451616837110133, 'temp': 0.48421569099660555, 'alpha_loss': -74.356325381064, 'alpha': 4.275768550145144, 'critic_loss': 7123.962148324603, 'actor_loss': 31.095785063815256, 'time_step': 0.055997788561561894, 'td_error': 2.471205078224832, 'init_value': -32.2804069519043, 'ave_value': -32.28511283371674} step=13494
2022-04-22 04:25.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:25.42 [info     ] CQL_20220422041241: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.00043631220139520016, 'time_algorithm_update': 0.05454125776456271, 'temp_loss': 2.4003032907585187, 'temp': 0.4752530268678775, 'alpha_loss': -77.24437468865014, 'alpha': 4.442213135647636, 'critic_loss': 5890.213354915552, 'actor_loss': 31.698932427202347, 'time_step': 0.055080690824916595, 'td_error': 2.5174188719427715, 'init_value': -32.92867660522461, 'ave_value': -32.93511194561035} step=13840
2022-04-22 04:25.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:26.03 [info     ] CQL_20220422041241: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.00044673922434018524, 'time_algorithm_update': 0.05596748528452967, 'temp_loss': 2.3559625934314177, 'temp': 0.4664554083450681, 'alpha_loss': -80.2524899609516, 'alpha': 4.615141452392402, 'critic_loss': 5061.545494829299, 'actor_loss': 32.44189088192979, 'time_step': 0.05651957175635189, 'td_error': 2.5799657362647475, 'init_value': -33.84737014770508, 'ave_value': -33.84452202508889} step=14186
2022-04-22 04:26.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:26.23 [info     ] CQL_20220422041241: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.00044632922707265514, 'time_algorithm_update': 0.055907131619536116, 'temp_loss': 2.312061074841229, 'temp': 0.45782243180481685, 'alpha_loss': -83.37137103218564, 'alpha': 4.794795551741054, 'critic_loss': 5019.371342124277, 'actor_loss': 33.59753836923941, 'time_step': 0.0564609862476415, 'td_error': 2.670841486716788, 'init_value': -35.08346176147461, 'ave_value': -35.07683015590937} step=14532
2022-04-22 04:26.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:26.43 [info     ] CQL_20220422041241: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.00044691631559691677, 'time_algorithm_update': 0.05527722559912356, 'temp_loss': 2.268878748651185, 'temp': 0.4493494579902274, 'alpha_loss': -86.62566657975918, 'alpha': 4.981447586434425, 'critic_loss': 5342.441713895412, 'actor_loss': 34.65131175311315, 'time_step': 0.05582840042996269, 'td_error': 2.741058433496822, 'init_value': -35.99799728393555, 'ave_value': -35.99227339092054} step=14878
2022-04-22 04:26.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:27.02 [info     ] CQL_20220422041241: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.0004418406183319974, 'time_algorithm_update': 0.05286545078189387, 'temp_loss': 2.2267752251873127, 'temp': 0.44103310166755855, 'alpha_loss': -89.99434203219552, 'alpha': 5.1753781269051435, 'critic_loss': 5601.833673907153, 'actor_loss': 35.578801502382134, 'time_step': 0.0534087116318631, 'td_error': 2.807602995969437, 'init_value': -36.82439041137695, 'ave_value': -36.82220327233522} step=15224
2022-04-22 04:27.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:27.22 [info     ] CQL_20220422041241: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.00043973826259546887, 'time_algorithm_update': 0.053237699359827646, 'temp_loss': 2.185918341482306, 'temp': 0.43287173585395594, 'alpha_loss': -93.5013215390244, 'alpha': 5.376846324501699, 'critic_loss': 5906.808887283237, 'actor_loss': 36.544347112578464, 'time_step': 0.05377288291909102, 'td_error': 2.8908772446285105, 'init_value': -37.87404251098633, 'ave_value': -37.86891521437578} step=15570
2022-04-22 04:27.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:27.41 [info     ] CQL_20220422041241: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.00043678008062991106, 'time_algorithm_update': 0.05316075562052644, 'temp_loss': 2.145543899839324, 'temp': 0.424859449525789, 'alpha_loss': -97.13069024940447, 'alpha': 5.586149853778023, 'critic_loss': 6204.942089279263, 'actor_loss': 37.50407344619663, 'time_step': 0.05369892354645481, 'td_error': 2.969611476413519, 'init_value': -38.83378982543945, 'ave_value': -38.82831408616885} step=15916
2022-04-22 04:27.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:28.00 [info     ] CQL_20220422041241: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.00044449560904089426, 'time_algorithm_update': 0.053043244891084, 'temp_loss': 2.1059021432964786, 'temp': 0.4169948751354493, 'alpha_loss': -100.9178361396569, 'alpha': 5.803596631639954, 'critic_loss': 6485.9226935061415, 'actor_loss': 38.427607255174934, 'time_step': 0.05358805615088843, 'td_error': 3.044412797638419, 'init_value': -39.72043228149414, 'ave_value': -39.71620096249171} step=16262
2022-04-22 04:28.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:28.19 [info     ] CQL_20220422041241: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.00043367650467536354, 'time_algorithm_update': 0.05248215363893895, 'temp_loss': 2.066631971067087, 'temp': 0.40927782620308717, 'alpha_loss': -104.84972017762288, 'alpha': 6.029501411956169, 'critic_loss': 6639.081010939758, 'actor_loss': 39.25974042153772, 'time_step': 0.053008528114054244, 'td_error': 3.10581520083456, 'init_value': -40.42805862426758, 'ave_value': -40.424194503610536} step=16608
2022-04-22 04:28.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:28.39 [info     ] CQL_20220422041241: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.0004320454735287352, 'time_algorithm_update': 0.0525920797634676, 'temp_loss': 2.028992939546618, 'temp': 0.4017016626679139, 'alpha_loss': -108.93640963603995, 'alpha': 6.264234953533018, 'critic_loss': 6831.435666828486, 'actor_loss': 39.995810900120375, 'time_step': 0.0531187946396756, 'td_error': 3.1743070980288906, 'init_value': -41.23227310180664, 'ave_value': -41.225802196830074} step=16954
2022-04-22 04:28.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:28.58 [info     ] CQL_20220422041241: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.0004576685800717745, 'time_algorithm_update': 0.05305525815555815, 'temp_loss': 1.990829461571798, 'temp': 0.39426674560315345, 'alpha_loss': -113.16593399488858, 'alpha': 6.508070619120074, 'critic_loss': 6169.721109555636, 'actor_loss': 40.25767174208095, 'time_step': 0.05361416298530005, 'td_error': 3.1592046134157, 'init_value': -40.99235916137695, 'ave_value': -40.998519398602} step=17300
2022-04-22 04:28.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422041241/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.51910049e-

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 04:28.59 [info     ] FQE_20220422042858: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00016134905527873212, 'time_algorithm_update': 0.007012690406247794, 'loss': 0.006933696424947625, 'time_step': 0.007245391248220421, 'init_value': -0.35047709941864014, 'ave_value': -0.29462405765378796, 'soft_opc': nan} step=166




2022-04-22 04:28.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.01 [info     ] FQE_20220422042858: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00016255694699574667, 'time_algorithm_update': 0.007116385253078966, 'loss': 0.004368801005128546, 'time_step': 0.0073583872921495555, 'init_value': -0.3725886344909668, 'ave_value': -0.28764382064476746, 'soft_opc': nan} step=332




2022-04-22 04:29.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.02 [info     ] FQE_20220422042858: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00016628259635833372, 'time_algorithm_update': 0.007165666086127959, 'loss': 0.003733881266720324, 'time_step': 0.007406442998403527, 'init_value': -0.4248984754085541, 'ave_value': -0.3251412329171692, 'soft_opc': nan} step=498




2022-04-22 04:29.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.03 [info     ] FQE_20220422042858: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00016794003636003976, 'time_algorithm_update': 0.007149466549057558, 'loss': 0.0034772753533177884, 'time_step': 0.007394372698772384, 'init_value': -0.44448554515838623, 'ave_value': -0.3337944636440223, 'soft_opc': nan} step=664




2022-04-22 04:29.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.05 [info     ] FQE_20220422042858: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.0001637763287647661, 'time_algorithm_update': 0.007107071129672499, 'loss': 0.0032098324638398655, 'time_step': 0.007345551467803587, 'init_value': -0.44503480195999146, 'ave_value': -0.35262039742759754, 'soft_opc': nan} step=830




2022-04-22 04:29.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.06 [info     ] FQE_20220422042858: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.0001616262527833502, 'time_algorithm_update': 0.006447690079011113, 'loss': 0.002884566307724285, 'time_step': 0.006687572203486799, 'init_value': -0.4678700864315033, 'ave_value': -0.3775021450610848, 'soft_opc': nan} step=996




2022-04-22 04:29.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.07 [info     ] FQE_20220422042858: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00016654399504144508, 'time_algorithm_update': 0.007076474557439965, 'loss': 0.0026464513194444307, 'time_step': 0.007315265126975186, 'init_value': -0.4769958555698395, 'ave_value': -0.4108744618692645, 'soft_opc': nan} step=1162




2022-04-22 04:29.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.09 [info     ] FQE_20220422042858: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00016601688890572055, 'time_algorithm_update': 0.006975837500698595, 'loss': 0.002295248504273638, 'time_step': 0.0072192556886787876, 'init_value': -0.46475088596343994, 'ave_value': -0.4209332847770572, 'soft_opc': nan} step=1328




2022-04-22 04:29.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.10 [info     ] FQE_20220422042858: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.0001656721873455737, 'time_algorithm_update': 0.007167632321277297, 'loss': 0.0020715511040705964, 'time_step': 0.007405125951192465, 'init_value': -0.4759620130062103, 'ave_value': -0.4462575876074298, 'soft_opc': nan} step=1494




2022-04-22 04:29.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.11 [info     ] FQE_20220422042858: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.0001661691320947854, 'time_algorithm_update': 0.007064007851014654, 'loss': 0.0019860325667990304, 'time_step': 0.007306486727243446, 'init_value': -0.48857027292251587, 'ave_value': -0.46571017771220957, 'soft_opc': nan} step=1660




2022-04-22 04:29.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.13 [info     ] FQE_20220422042858: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00016238459621567325, 'time_algorithm_update': 0.00714822705969753, 'loss': 0.0018936925874854695, 'time_step': 0.007380058966487287, 'init_value': -0.5160857439041138, 'ave_value': -0.5009996318593957, 'soft_opc': nan} step=1826




2022-04-22 04:29.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.14 [info     ] FQE_20220422042858: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.0001663400466183582, 'time_algorithm_update': 0.007158830941441548, 'loss': 0.0017929731812804982, 'time_step': 0.007402427225227815, 'init_value': -0.5538798570632935, 'ave_value': -0.5393373236311851, 'soft_opc': nan} step=1992




2022-04-22 04:29.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.15 [info     ] FQE_20220422042858: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00018160458070686064, 'time_algorithm_update': 0.007092554885220815, 'loss': 0.0019342258862915438, 'time_step': 0.0073471370949802625, 'init_value': -0.616386890411377, 'ave_value': -0.5905044736080542, 'soft_opc': nan} step=2158




2022-04-22 04:29.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.17 [info     ] FQE_20220422042858: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00016205425722053252, 'time_algorithm_update': 0.006898385932646602, 'loss': 0.0020402129408160045, 'time_step': 0.0071272447884800924, 'init_value': -0.6543468236923218, 'ave_value': -0.6170035873006963, 'soft_opc': nan} step=2324




2022-04-22 04:29.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.18 [info     ] FQE_20220422042858: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00016287435968238186, 'time_algorithm_update': 0.006517362881855792, 'loss': 0.002125152930756189, 'time_step': 0.006754882364387971, 'init_value': -0.660787045955658, 'ave_value': -0.6318612346366921, 'soft_opc': nan} step=2490




2022-04-22 04:29.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.19 [info     ] FQE_20220422042858: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00016798599656805936, 'time_algorithm_update': 0.0071694736021110815, 'loss': 0.002627404327420181, 'time_step': 0.007413225001599415, 'init_value': -0.7348154783248901, 'ave_value': -0.6880185038128214, 'soft_opc': nan} step=2656




2022-04-22 04:29.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.20 [info     ] FQE_20220422042858: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00016528727060340973, 'time_algorithm_update': 0.007012161863855569, 'loss': 0.0029909846101851055, 'time_step': 0.007251551352351545, 'init_value': -0.8421727418899536, 'ave_value': -0.7905820815102407, 'soft_opc': nan} step=2822




2022-04-22 04:29.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.22 [info     ] FQE_20220422042858: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00017039028995008353, 'time_algorithm_update': 0.007170319557189941, 'loss': 0.0034752277222936077, 'time_step': 0.007419752787394696, 'init_value': -0.8843843340873718, 'ave_value': -0.8116982618514617, 'soft_opc': nan} step=2988




2022-04-22 04:29.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.23 [info     ] FQE_20220422042858: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.0001701303275234728, 'time_algorithm_update': 0.007052861064313406, 'loss': 0.0037232529449470184, 'time_step': 0.007300366838294339, 'init_value': -0.9205195903778076, 'ave_value': -0.842733755830239, 'soft_opc': nan} step=3154




2022-04-22 04:29.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.24 [info     ] FQE_20220422042858: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00016559032072503883, 'time_algorithm_update': 0.0071586427918399675, 'loss': 0.004248947076077281, 'time_step': 0.00740241429891931, 'init_value': -0.9757214784622192, 'ave_value': -0.8948886490175249, 'soft_opc': nan} step=3320




2022-04-22 04:29.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.26 [info     ] FQE_20220422042858: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.0001802501908267837, 'time_algorithm_update': 0.007167896592473409, 'loss': 0.004679556073956135, 'time_step': 0.007426855075790222, 'init_value': -0.9745168685913086, 'ave_value': -0.8853441125956607, 'soft_opc': nan} step=3486




2022-04-22 04:29.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.27 [info     ] FQE_20220422042858: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00016218352030558758, 'time_algorithm_update': 0.0071006553718842656, 'loss': 0.005283004650039632, 'time_step': 0.007338308426271002, 'init_value': -0.9985966682434082, 'ave_value': -0.9041547855630786, 'soft_opc': nan} step=3652




2022-04-22 04:29.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.28 [info     ] FQE_20220422042858: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00016390128308031932, 'time_algorithm_update': 0.007013088249298464, 'loss': 0.005940394398387443, 'time_step': 0.007252506462924452, 'init_value': -1.0275205373764038, 'ave_value': -0.9425377520942338, 'soft_opc': nan} step=3818




2022-04-22 04:29.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.30 [info     ] FQE_20220422042858: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00016602694270122483, 'time_algorithm_update': 0.0065117729715554115, 'loss': 0.006268411967692986, 'time_step': 0.006752289921404368, 'init_value': -1.0212316513061523, 'ave_value': -0.9410297428509472, 'soft_opc': nan} step=3984




2022-04-22 04:29.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.31 [info     ] FQE_20220422042858: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00016788689486951712, 'time_algorithm_update': 0.0070107126810464515, 'loss': 0.007153797220068434, 'time_step': 0.007257088121161403, 'init_value': -1.0806360244750977, 'ave_value': -1.015975705887451, 'soft_opc': nan} step=4150




2022-04-22 04:29.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.32 [info     ] FQE_20220422042858: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00016672209084752095, 'time_algorithm_update': 0.007128220006644008, 'loss': 0.007536030859940304, 'time_step': 0.007372258657432464, 'init_value': -1.1391654014587402, 'ave_value': -1.0839581860254544, 'soft_opc': nan} step=4316




2022-04-22 04:29.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.34 [info     ] FQE_20220422042858: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00016670054700001176, 'time_algorithm_update': 0.007119565124971321, 'loss': 0.008535750188194328, 'time_step': 0.007360614925982004, 'init_value': -1.1673386096954346, 'ave_value': -1.112674618108819, 'soft_opc': nan} step=4482




2022-04-22 04:29.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.35 [info     ] FQE_20220422042858: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00016742011150681828, 'time_algorithm_update': 0.007008171943296869, 'loss': 0.008913904851714867, 'time_step': 0.007251055843858834, 'init_value': -1.1758265495300293, 'ave_value': -1.124766197049705, 'soft_opc': nan} step=4648




2022-04-22 04:29.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.36 [info     ] FQE_20220422042858: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00016436663018651754, 'time_algorithm_update': 0.007258591881717544, 'loss': 0.009690606388044599, 'time_step': 0.007496980299432594, 'init_value': -1.1823958158493042, 'ave_value': -1.1174887227347574, 'soft_opc': nan} step=4814




2022-04-22 04:29.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.38 [info     ] FQE_20220422042858: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00016692747552710842, 'time_algorithm_update': 0.007072779069463891, 'loss': 0.010150094389640558, 'time_step': 0.007316507488848215, 'init_value': -1.2958126068115234, 'ave_value': -1.2236007128825395, 'soft_opc': nan} step=4980




2022-04-22 04:29.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.39 [info     ] FQE_20220422042858: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.0001998206219041204, 'time_algorithm_update': 0.00703866797757436, 'loss': 0.010446009176351264, 'time_step': 0.007315727601568383, 'init_value': -1.2914860248565674, 'ave_value': -1.2249852124635163, 'soft_opc': nan} step=5146




2022-04-22 04:29.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.40 [info     ] FQE_20220422042858: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00016921543213258307, 'time_algorithm_update': 0.006976761013628489, 'loss': 0.011218133504174268, 'time_step': 0.007220577044659351, 'init_value': -1.3163807392120361, 'ave_value': -1.2397262669025777, 'soft_opc': nan} step=5312




2022-04-22 04:29.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.41 [info     ] FQE_20220422042858: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.0001599846116031509, 'time_algorithm_update': 0.006464882069323437, 'loss': 0.011669483915229428, 'time_step': 0.006696461194969085, 'init_value': -1.3213757276535034, 'ave_value': -1.2367179101937122, 'soft_opc': nan} step=5478




2022-04-22 04:29.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.43 [info     ] FQE_20220422042858: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00016517093382686018, 'time_algorithm_update': 0.007071549633899367, 'loss': 0.012824511601511076, 'time_step': 0.00731543891401176, 'init_value': -1.2940046787261963, 'ave_value': -1.1905229628237115, 'soft_opc': nan} step=5644




2022-04-22 04:29.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.44 [info     ] FQE_20220422042858: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00017816618264439595, 'time_algorithm_update': 0.007066364747932158, 'loss': 0.012951104608588818, 'time_step': 0.007317102099039468, 'init_value': -1.3385634422302246, 'ave_value': -1.2340711264443156, 'soft_opc': nan} step=5810




2022-04-22 04:29.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.45 [info     ] FQE_20220422042858: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.0001628427620393684, 'time_algorithm_update': 0.006964169352887625, 'loss': 0.013369030042492548, 'time_step': 0.007201362805194165, 'init_value': -1.3941757678985596, 'ave_value': -1.2759440920189828, 'soft_opc': nan} step=5976




2022-04-22 04:29.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.47 [info     ] FQE_20220422042858: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.0001667048557695136, 'time_algorithm_update': 0.0070217187146106395, 'loss': 0.013840322668877351, 'time_step': 0.00726264499756227, 'init_value': -1.443547010421753, 'ave_value': -1.3244381412335142, 'soft_opc': nan} step=6142




2022-04-22 04:29.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.48 [info     ] FQE_20220422042858: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00016852746526879002, 'time_algorithm_update': 0.007134601294276226, 'loss': 0.014516615146821567, 'time_step': 0.0073800690202827915, 'init_value': -1.4413979053497314, 'ave_value': -1.3332377798888866, 'soft_opc': nan} step=6308




2022-04-22 04:29.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.49 [info     ] FQE_20220422042858: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.0001662696700498282, 'time_algorithm_update': 0.007070937788630107, 'loss': 0.01546012852740676, 'time_step': 0.007316747343683818, 'init_value': -1.482351303100586, 'ave_value': -1.3639070957387285, 'soft_opc': nan} step=6474




2022-04-22 04:29.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.51 [info     ] FQE_20220422042858: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001660614128572395, 'time_algorithm_update': 0.007071334195424275, 'loss': 0.015826887257904914, 'time_step': 0.007314906062850033, 'init_value': -1.558903455734253, 'ave_value': -1.4396503093165791, 'soft_opc': nan} step=6640




2022-04-22 04:29.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.52 [info     ] FQE_20220422042858: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.0001664362758038992, 'time_algorithm_update': 0.006906459130436541, 'loss': 0.016365348621263136, 'time_step': 0.007151609443756471, 'init_value': -1.6232719421386719, 'ave_value': -1.494813913323388, 'soft_opc': nan} step=6806




2022-04-22 04:29.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.53 [info     ] FQE_20220422042858: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00016814254852662603, 'time_algorithm_update': 0.006511974047465497, 'loss': 0.01778660034017458, 'time_step': 0.0067529419818556456, 'init_value': -1.6192114353179932, 'ave_value': -1.4791193818253976, 'soft_opc': nan} step=6972




2022-04-22 04:29.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.55 [info     ] FQE_20220422042858: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00016500720058579044, 'time_algorithm_update': 0.006989395762064371, 'loss': 0.016138695825258807, 'time_step': 0.0072276721517723725, 'init_value': -1.5755603313446045, 'ave_value': -1.4222712751024882, 'soft_opc': nan} step=7138




2022-04-22 04:29.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.56 [info     ] FQE_20220422042858: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016638026180037534, 'time_algorithm_update': 0.007061201405812459, 'loss': 0.019018886104178425, 'time_step': 0.007299635783735528, 'init_value': -1.6498441696166992, 'ave_value': -1.5118322172697196, 'soft_opc': nan} step=7304




2022-04-22 04:29.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.57 [info     ] FQE_20220422042858: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016333252550607705, 'time_algorithm_update': 0.006855549582515855, 'loss': 0.019066021510495256, 'time_step': 0.007090328687644866, 'init_value': -1.6587151288986206, 'ave_value': -1.5210492284565762, 'soft_opc': nan} step=7470




2022-04-22 04:29.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:29.58 [info     ] FQE_20220422042858: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.0001641167215554111, 'time_algorithm_update': 0.007176226880176958, 'loss': 0.019834627424007708, 'time_step': 0.007416002721671599, 'init_value': -1.6902456283569336, 'ave_value': -1.527629067664882, 'soft_opc': nan} step=7636




2022-04-22 04:29.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:30.00 [info     ] FQE_20220422042858: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00016775619552796147, 'time_algorithm_update': 0.006959563278290163, 'loss': 0.02029719630239464, 'time_step': 0.007204098873827831, 'init_value': -1.6250755786895752, 'ave_value': -1.4607748347661784, 'soft_opc': nan} step=7802




2022-04-22 04:30.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:30.01 [info     ] FQE_20220422042858: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00016758815351738986, 'time_algorithm_update': 0.00708627557180014, 'loss': 0.02155051476824243, 'time_step': 0.00732745894466538, 'init_value': -1.7272487878799438, 'ave_value': -1.547511746198241, 'soft_opc': nan} step=7968




2022-04-22 04:30.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:30.02 [info     ] FQE_20220422042858: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.0001676642751119223, 'time_algorithm_update': 0.007169983473168798, 'loss': 0.022196124241538683, 'time_step': 0.007413229310368917, 'init_value': -1.8188598155975342, 'ave_value': -1.6373363178374396, 'soft_opc': nan} step=8134




2022-04-22 04:30.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 04:30.04 [info     ] FQE_20220422042858: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00017798521432531886, 'time_algorithm_update': 0.0069643618112587066, 'loss': 0.021939047682379562, 'time_step': 0.0072171874793179065, 'init_value': -1.8607397079467773, 'ave_value': -1.6795037634242829, 'soft_opc': nan} step=8300




2022-04-22 04:30.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422042858/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-22 04:30.04 [debug    ] RoundIterator is selected.
2022-04-22 04:30.04 [info     ] Directory is created at d3rlpy_logs/FQE_20220422043004
2022-04-22 04:30.04 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 04:30.04 [debug    ] Building models...
2022-04-22 04:30.04 [debug    ] Models have been built.
2022-04-22 04:30.04 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422043004/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}},

Epoch 1/50:   0%|          | 0/355 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 04:30.07 [info     ] FQE_20220422043004: epoch=1 step=355 epoch=1 metrics={'time_sample_batch': 0.0001845709035094355, 'time_algorithm_update': 0.006979311016243948, 'loss': 0.02509277610134491, 'time_step': 0.007238240309164558, 'init_value': -1.153515100479126, 'ave_value': -1.1461234601538153, 'soft_opc': nan} step=355




2022-04-22 04:30.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_355.pt


Epoch 2/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:30.10 [info     ] FQE_20220422043004: epoch=2 step=710 epoch=2 metrics={'time_sample_batch': 0.00017632900829046545, 'time_algorithm_update': 0.007021576250103158, 'loss': 0.023706786604729337, 'time_step': 0.007274387252162879, 'init_value': -2.2885375022888184, 'ave_value': -2.268722174541984, 'soft_opc': nan} step=710




2022-04-22 04:30.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_710.pt


Epoch 3/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:30.13 [info     ] FQE_20220422043004: epoch=3 step=1065 epoch=3 metrics={'time_sample_batch': 0.00017804159245020907, 'time_algorithm_update': 0.007053793651956908, 'loss': 0.026099597974877122, 'time_step': 0.0073092239003785895, 'init_value': -2.708404541015625, 'ave_value': -2.6430689275494874, 'soft_opc': nan} step=1065




2022-04-22 04:30.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_1065.pt


Epoch 4/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:30.16 [info     ] FQE_20220422043004: epoch=4 step=1420 epoch=4 metrics={'time_sample_batch': 0.00017095283723213303, 'time_algorithm_update': 0.007029777177622621, 'loss': 0.0321561367972426, 'time_step': 0.007275467859187596, 'init_value': -3.6034061908721924, 'ave_value': -3.4943083396903027, 'soft_opc': nan} step=1420




2022-04-22 04:30.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_1420.pt


Epoch 5/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:30.18 [info     ] FQE_20220422043004: epoch=5 step=1775 epoch=5 metrics={'time_sample_batch': 0.00017161369323730468, 'time_algorithm_update': 0.006784666088265432, 'loss': 0.03995700699972435, 'time_step': 0.007031136499324315, 'init_value': -3.909248113632202, 'ave_value': -3.7464317389283246, 'soft_opc': nan} step=1775




2022-04-22 04:30.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_1775.pt


Epoch 6/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:30.21 [info     ] FQE_20220422043004: epoch=6 step=2130 epoch=6 metrics={'time_sample_batch': 0.00017703687640982613, 'time_algorithm_update': 0.007050235506514429, 'loss': 0.05119453161220316, 'time_step': 0.007303321865242972, 'init_value': -4.511561870574951, 'ave_value': -4.293508407646164, 'soft_opc': nan} step=2130




2022-04-22 04:30.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_2130.pt


Epoch 7/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:30.24 [info     ] FQE_20220422043004: epoch=7 step=2485 epoch=7 metrics={'time_sample_batch': 0.00017746401504731515, 'time_algorithm_update': 0.007072539396688972, 'loss': 0.05988156409330771, 'time_step': 0.007327272522617394, 'init_value': -4.8713274002075195, 'ave_value': -4.63679318684447, 'soft_opc': nan} step=2485




2022-04-22 04:30.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_2485.pt


Epoch 8/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:30.27 [info     ] FQE_20220422043004: epoch=8 step=2840 epoch=8 metrics={'time_sample_batch': 0.0001801134834826832, 'time_algorithm_update': 0.007078681865208585, 'loss': 0.07566746508728871, 'time_step': 0.007338128962986906, 'init_value': -5.010265350341797, 'ave_value': -4.858224891486699, 'soft_opc': nan} step=2840




2022-04-22 04:30.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_2840.pt


Epoch 9/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:30.29 [info     ] FQE_20220422043004: epoch=9 step=3195 epoch=9 metrics={'time_sample_batch': 0.00017193136080889636, 'time_algorithm_update': 0.006837663516192369, 'loss': 0.09182010324488224, 'time_step': 0.007084262874764456, 'init_value': -5.193562030792236, 'ave_value': -5.1752334286323345, 'soft_opc': nan} step=3195




2022-04-22 04:30.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_3195.pt


Epoch 10/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:30.32 [info     ] FQE_20220422043004: epoch=10 step=3550 epoch=10 metrics={'time_sample_batch': 0.00017577963815608495, 'time_algorithm_update': 0.0071444994966748734, 'loss': 0.11097841901363621, 'time_step': 0.007397945833877778, 'init_value': -5.435248374938965, 'ave_value': -5.708682801303582, 'soft_opc': nan} step=3550




2022-04-22 04:30.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_3550.pt


Epoch 11/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:30.35 [info     ] FQE_20220422043004: epoch=11 step=3905 epoch=11 metrics={'time_sample_batch': 0.00017316173499738667, 'time_algorithm_update': 0.007107605061060946, 'loss': 0.12443460156275353, 'time_step': 0.007356626215115399, 'init_value': -5.898820400238037, 'ave_value': -6.401244184389193, 'soft_opc': nan} step=3905




2022-04-22 04:30.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_3905.pt


Epoch 12/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:30.38 [info     ] FQE_20220422043004: epoch=12 step=4260 epoch=12 metrics={'time_sample_batch': 0.00017462448335029708, 'time_algorithm_update': 0.006809164772570973, 'loss': 0.14291976282084493, 'time_step': 0.007059093260429275, 'init_value': -5.686187744140625, 'ave_value': -6.484401281463747, 'soft_opc': nan} step=4260




2022-04-22 04:30.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_4260.pt


Epoch 13/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:30.41 [info     ] FQE_20220422043004: epoch=13 step=4615 epoch=13 metrics={'time_sample_batch': 0.0001774149881282323, 'time_algorithm_update': 0.0076204951380340145, 'loss': 0.1589609993910286, 'time_step': 0.00787264326928367, 'init_value': -6.075778007507324, 'ave_value': -7.083420852355975, 'soft_opc': nan} step=4615




2022-04-22 04:30.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_4615.pt


Epoch 14/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:30.44 [info     ] FQE_20220422043004: epoch=14 step=4970 epoch=14 metrics={'time_sample_batch': 0.0001759730594258913, 'time_algorithm_update': 0.007758831642043423, 'loss': 0.1760569354283138, 'time_step': 0.008007302082760234, 'init_value': -6.214119911193848, 'ave_value': -7.357206934858281, 'soft_opc': nan} step=4970




2022-04-22 04:30.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_4970.pt


Epoch 15/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:30.47 [info     ] FQE_20220422043004: epoch=15 step=5325 epoch=15 metrics={'time_sample_batch': 0.00017326717645349638, 'time_algorithm_update': 0.007555001890155631, 'loss': 0.19379560808113344, 'time_step': 0.0078033702474244885, 'init_value': -6.369584560394287, 'ave_value': -7.7602921141400225, 'soft_opc': nan} step=5325




2022-04-22 04:30.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_5325.pt


Epoch 16/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:30.50 [info     ] FQE_20220422043004: epoch=16 step=5680 epoch=16 metrics={'time_sample_batch': 0.00017530011459135673, 'time_algorithm_update': 0.007728021245607188, 'loss': 0.21282771634279002, 'time_step': 0.007978312398346376, 'init_value': -6.5809125900268555, 'ave_value': -8.005974666012914, 'soft_opc': nan} step=5680




2022-04-22 04:30.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_5680.pt


Epoch 17/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:30.52 [info     ] FQE_20220422043004: epoch=17 step=6035 epoch=17 metrics={'time_sample_batch': 0.0001770234443772007, 'time_algorithm_update': 0.0074052971853336815, 'loss': 0.22991457858555753, 'time_step': 0.007657450689396388, 'init_value': -6.960404872894287, 'ave_value': -8.44561993351477, 'soft_opc': nan} step=6035




2022-04-22 04:30.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_6035.pt


Epoch 18/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:30.55 [info     ] FQE_20220422043004: epoch=18 step=6390 epoch=18 metrics={'time_sample_batch': 0.00018241036106163348, 'time_algorithm_update': 0.007790517135405205, 'loss': 0.24453255985929093, 'time_step': 0.008051379969422246, 'init_value': -7.361262798309326, 'ave_value': -8.871813069917788, 'soft_opc': nan} step=6390




2022-04-22 04:30.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_6390.pt


Epoch 19/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:30.58 [info     ] FQE_20220422043004: epoch=19 step=6745 epoch=19 metrics={'time_sample_batch': 0.00017475813207492024, 'time_algorithm_update': 0.007543614884497414, 'loss': 0.2639302859109052, 'time_step': 0.007795684438356211, 'init_value': -7.700345039367676, 'ave_value': -9.309144474376122, 'soft_opc': nan} step=6745




2022-04-22 04:30.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_6745.pt


Epoch 20/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:31.02 [info     ] FQE_20220422043004: epoch=20 step=7100 epoch=20 metrics={'time_sample_batch': 0.00017845865706322899, 'time_algorithm_update': 0.007824643229095029, 'loss': 0.2930989471959396, 'time_step': 0.008081639652520838, 'init_value': -7.740589141845703, 'ave_value': -9.310313419801119, 'soft_opc': nan} step=7100




2022-04-22 04:31.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_7100.pt


Epoch 21/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:31.05 [info     ] FQE_20220422043004: epoch=21 step=7455 epoch=21 metrics={'time_sample_batch': 0.00017600798271071743, 'time_algorithm_update': 0.007662221075783313, 'loss': 0.31838691330606667, 'time_step': 0.00791383662693937, 'init_value': -8.260374069213867, 'ave_value': -9.662548729836788, 'soft_opc': nan} step=7455




2022-04-22 04:31.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_7455.pt


Epoch 22/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:31.08 [info     ] FQE_20220422043004: epoch=22 step=7810 epoch=22 metrics={'time_sample_batch': 0.00018084687246403225, 'time_algorithm_update': 0.0075860560779840176, 'loss': 0.34358952803615955, 'time_step': 0.007843364124566736, 'init_value': -8.661778450012207, 'ave_value': -9.895482755711775, 'soft_opc': nan} step=7810




2022-04-22 04:31.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_7810.pt


Epoch 23/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:31.11 [info     ] FQE_20220422043004: epoch=23 step=8165 epoch=23 metrics={'time_sample_batch': 0.0001777581765618123, 'time_algorithm_update': 0.007741774975414007, 'loss': 0.3675981090102397, 'time_step': 0.007999747236010055, 'init_value': -9.571610450744629, 'ave_value': -10.606819276866458, 'soft_opc': nan} step=8165




2022-04-22 04:31.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_8165.pt


Epoch 24/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:31.13 [info     ] FQE_20220422043004: epoch=24 step=8520 epoch=24 metrics={'time_sample_batch': 0.00017337328951123735, 'time_algorithm_update': 0.00745834834139112, 'loss': 0.3917384220470845, 'time_step': 0.00770841249277894, 'init_value': -10.20721435546875, 'ave_value': -11.086797445770857, 'soft_opc': nan} step=8520




2022-04-22 04:31.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_8520.pt


Epoch 25/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:31.17 [info     ] FQE_20220422043004: epoch=25 step=8875 epoch=25 metrics={'time_sample_batch': 0.00017412682654152455, 'time_algorithm_update': 0.007783775598230496, 'loss': 0.4151550135771993, 'time_step': 0.008036902253056916, 'init_value': -10.824877738952637, 'ave_value': -11.53696977471216, 'soft_opc': nan} step=8875




2022-04-22 04:31.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_8875.pt


Epoch 26/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:31.19 [info     ] FQE_20220422043004: epoch=26 step=9230 epoch=26 metrics={'time_sample_batch': 0.00017821419406944598, 'time_algorithm_update': 0.0073547564761739384, 'loss': 0.4344606292761967, 'time_step': 0.007610126952050438, 'init_value': -11.393954277038574, 'ave_value': -11.926185440480172, 'soft_opc': nan} step=9230




2022-04-22 04:31.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_9230.pt


Epoch 27/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:31.22 [info     ] FQE_20220422043004: epoch=27 step=9585 epoch=27 metrics={'time_sample_batch': 0.00017450225185340558, 'time_algorithm_update': 0.007754999483135384, 'loss': 0.4452254969731603, 'time_step': 0.008006192596865372, 'init_value': -11.714118957519531, 'ave_value': -12.184283623213435, 'soft_opc': nan} step=9585




2022-04-22 04:31.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_9585.pt


Epoch 28/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:31.25 [info     ] FQE_20220422043004: epoch=28 step=9940 epoch=28 metrics={'time_sample_batch': 0.00018581202332402618, 'time_algorithm_update': 0.007508534444889552, 'loss': 0.4586635931172002, 'time_step': 0.007771701544103488, 'init_value': -12.2066068649292, 'ave_value': -12.256675187115254, 'soft_opc': nan} step=9940




2022-04-22 04:31.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_9940.pt


Epoch 29/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:31.28 [info     ] FQE_20220422043004: epoch=29 step=10295 epoch=29 metrics={'time_sample_batch': 0.0001736647646192094, 'time_algorithm_update': 0.007758977379597408, 'loss': 0.48890941486707035, 'time_step': 0.008010308171661807, 'init_value': -12.671060562133789, 'ave_value': -12.62025128775486, 'soft_opc': nan} step=10295




2022-04-22 04:31.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_10295.pt


Epoch 30/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:31.31 [info     ] FQE_20220422043004: epoch=30 step=10650 epoch=30 metrics={'time_sample_batch': 0.00017257609837491747, 'time_algorithm_update': 0.007495084279020067, 'loss': 0.5089340483586133, 'time_step': 0.007742103388611699, 'init_value': -13.194574356079102, 'ave_value': -12.756568991537643, 'soft_opc': nan} step=10650




2022-04-22 04:31.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_10650.pt


Epoch 31/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:31.34 [info     ] FQE_20220422043004: epoch=31 step=11005 epoch=31 metrics={'time_sample_batch': 0.00017140079552019146, 'time_algorithm_update': 0.007570217025112099, 'loss': 0.5059223037422963, 'time_step': 0.007816364395786339, 'init_value': -13.738067626953125, 'ave_value': -13.095968914379581, 'soft_opc': nan} step=11005




2022-04-22 04:31.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_11005.pt


Epoch 32/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:31.37 [info     ] FQE_20220422043004: epoch=32 step=11360 epoch=32 metrics={'time_sample_batch': 0.00017362648332622688, 'time_algorithm_update': 0.007856640345613721, 'loss': 0.5203832590454062, 'time_step': 0.008109355980241803, 'init_value': -14.179898262023926, 'ave_value': -13.189353897036119, 'soft_opc': nan} step=11360




2022-04-22 04:31.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_11360.pt


Epoch 33/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:31.40 [info     ] FQE_20220422043004: epoch=33 step=11715 epoch=33 metrics={'time_sample_batch': 0.00017231350213709013, 'time_algorithm_update': 0.00743888666932012, 'loss': 0.5342656344962372, 'time_step': 0.007689118721115757, 'init_value': -15.040154457092285, 'ave_value': -13.566210789347554, 'soft_opc': nan} step=11715




2022-04-22 04:31.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_11715.pt


Epoch 34/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:31.43 [info     ] FQE_20220422043004: epoch=34 step=12070 epoch=34 metrics={'time_sample_batch': 0.00017394885210923747, 'time_algorithm_update': 0.007413162312037508, 'loss': 0.5557229895302107, 'time_step': 0.007661673048852195, 'init_value': -15.297286987304688, 'ave_value': -13.44552655458316, 'soft_opc': nan} step=12070




2022-04-22 04:31.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_12070.pt


Epoch 35/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:31.46 [info     ] FQE_20220422043004: epoch=35 step=12425 epoch=35 metrics={'time_sample_batch': 0.00017659764894297424, 'time_algorithm_update': 0.00748524934473172, 'loss': 0.5611009202530266, 'time_step': 0.007738560018405108, 'init_value': -16.258563995361328, 'ave_value': -13.90499364412995, 'soft_opc': nan} step=12425




2022-04-22 04:31.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_12425.pt


Epoch 36/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:31.49 [info     ] FQE_20220422043004: epoch=36 step=12780 epoch=36 metrics={'time_sample_batch': 0.00017336321548676827, 'time_algorithm_update': 0.007688030726473096, 'loss': 0.5716594246596518, 'time_step': 0.007935920903380488, 'init_value': -16.324434280395508, 'ave_value': -13.762972210663127, 'soft_opc': nan} step=12780




2022-04-22 04:31.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_12780.pt


Epoch 37/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:31.52 [info     ] FQE_20220422043004: epoch=37 step=13135 epoch=37 metrics={'time_sample_batch': 0.0001743121885917556, 'time_algorithm_update': 0.007555553946696537, 'loss': 0.5864238107235918, 'time_step': 0.007806833697036958, 'init_value': -17.19156837463379, 'ave_value': -14.15725951675551, 'soft_opc': nan} step=13135




2022-04-22 04:31.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_13135.pt


Epoch 38/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:31.55 [info     ] FQE_20220422043004: epoch=38 step=13490 epoch=38 metrics={'time_sample_batch': 0.0001754424941371864, 'time_algorithm_update': 0.0076578415615457885, 'loss': 0.590813376208846, 'time_step': 0.007911483334823393, 'init_value': -17.357385635375977, 'ave_value': -13.905117146105074, 'soft_opc': nan} step=13490




2022-04-22 04:31.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_13490.pt


Epoch 39/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:31.58 [info     ] FQE_20220422043004: epoch=39 step=13845 epoch=39 metrics={'time_sample_batch': 0.00017255460712271677, 'time_algorithm_update': 0.007810885469678422, 'loss': 0.5994779803016236, 'time_step': 0.008057322972257372, 'init_value': -17.86465835571289, 'ave_value': -14.117565329937669, 'soft_opc': nan} step=13845




2022-04-22 04:31.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_13845.pt


Epoch 40/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:32.01 [info     ] FQE_20220422043004: epoch=40 step=14200 epoch=40 metrics={'time_sample_batch': 0.00018160242429921325, 'time_algorithm_update': 0.007551221444573201, 'loss': 0.6049942063508739, 'time_step': 0.007807739016035912, 'init_value': -17.891324996948242, 'ave_value': -13.781451466455193, 'soft_opc': nan} step=14200




2022-04-22 04:32.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_14200.pt


Epoch 41/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:32.04 [info     ] FQE_20220422043004: epoch=41 step=14555 epoch=41 metrics={'time_sample_batch': 0.0001731704658185932, 'time_algorithm_update': 0.007735733918740716, 'loss': 0.6158583495713456, 'time_step': 0.00798484976862518, 'init_value': -18.234052658081055, 'ave_value': -13.540466458920001, 'soft_opc': nan} step=14555




2022-04-22 04:32.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_14555.pt


Epoch 42/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:32.07 [info     ] FQE_20220422043004: epoch=42 step=14910 epoch=42 metrics={'time_sample_batch': 0.0001777481025373432, 'time_algorithm_update': 0.007513517057391959, 'loss': 0.6293632547620317, 'time_step': 0.007766658487454266, 'init_value': -18.82090950012207, 'ave_value': -13.814143748890173, 'soft_opc': nan} step=14910




2022-04-22 04:32.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_14910.pt


Epoch 43/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:32.10 [info     ] FQE_20220422043004: epoch=43 step=15265 epoch=43 metrics={'time_sample_batch': 0.00017134773899132096, 'time_algorithm_update': 0.007460548508335167, 'loss': 0.6509685454234271, 'time_step': 0.0077069853393124865, 'init_value': -19.037700653076172, 'ave_value': -13.854252160576435, 'soft_opc': nan} step=15265




2022-04-22 04:32.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_15265.pt


Epoch 44/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:32.13 [info     ] FQE_20220422043004: epoch=44 step=15620 epoch=44 metrics={'time_sample_batch': 0.00017390855601136113, 'time_algorithm_update': 0.00743672209726253, 'loss': 0.6506979932116581, 'time_step': 0.0076867446093492105, 'init_value': -19.130172729492188, 'ave_value': -13.756364697701223, 'soft_opc': nan} step=15620




2022-04-22 04:32.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_15620.pt


Epoch 45/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:32.16 [info     ] FQE_20220422043004: epoch=45 step=15975 epoch=45 metrics={'time_sample_batch': 0.00017617856952506053, 'time_algorithm_update': 0.007788453975193937, 'loss': 0.6566299029796476, 'time_step': 0.008041682041866678, 'init_value': -18.724502563476562, 'ave_value': -13.128836650892785, 'soft_opc': nan} step=15975




2022-04-22 04:32.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_15975.pt


Epoch 46/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:32.19 [info     ] FQE_20220422043004: epoch=46 step=16330 epoch=46 metrics={'time_sample_batch': 0.00017870849287006217, 'time_algorithm_update': 0.007788649411268637, 'loss': 0.6506683070947167, 'time_step': 0.008044561198059942, 'init_value': -19.038555145263672, 'ave_value': -13.265205935265106, 'soft_opc': nan} step=16330




2022-04-22 04:32.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_16330.pt


Epoch 47/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:32.22 [info     ] FQE_20220422043004: epoch=47 step=16685 epoch=47 metrics={'time_sample_batch': 0.000174621796943772, 'time_algorithm_update': 0.007301687186872455, 'loss': 0.664999465670594, 'time_step': 0.0075528225428621535, 'init_value': -19.12198829650879, 'ave_value': -13.160822006707178, 'soft_opc': nan} step=16685




2022-04-22 04:32.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_16685.pt


Epoch 48/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:32.25 [info     ] FQE_20220422043004: epoch=48 step=17040 epoch=48 metrics={'time_sample_batch': 0.00018884094668106293, 'time_algorithm_update': 0.0077593326568603516, 'loss': 0.6723834789554838, 'time_step': 0.008024842302564164, 'init_value': -19.15937614440918, 'ave_value': -13.165747843180307, 'soft_opc': nan} step=17040




2022-04-22 04:32.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_17040.pt


Epoch 49/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:32.28 [info     ] FQE_20220422043004: epoch=49 step=17395 epoch=49 metrics={'time_sample_batch': 0.00017279033929529325, 'time_algorithm_update': 0.007466803805928835, 'loss': 0.6778507359903043, 'time_step': 0.0077155509465177295, 'init_value': -19.576858520507812, 'ave_value': -13.159058847116304, 'soft_opc': nan} step=17395




2022-04-22 04:32.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_17395.pt


Epoch 50/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 04:32.31 [info     ] FQE_20220422043004: epoch=50 step=17750 epoch=50 metrics={'time_sample_batch': 0.00017387027471837862, 'time_algorithm_update': 0.007774091774309185, 'loss': 0.6924350893675861, 'time_step': 0.008024119659208916, 'init_value': -19.832660675048828, 'ave_value': -13.24088280067097, 'soft_opc': nan} step=17750




2022-04-22 04:32.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422043004/model_17750.pt
most optimal hyper params for cql at this point:  [0.008015326902538738, 0.00488678244826896, 5.7138426138602196e-05, 7]
search iteration:  19
using hyper params:  [0.004363634651377042, 0.008303585900915041, 9.83296609019554e-05, 1]
2022-04-22 04:32.31 [debug    ] RoundIterator is selected.
2022-04-22 04:32.31 [info     ] Directory is created at d3rlpy_logs/CQL_20220422043231
2022-04-22 04:32.31 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 04:32.31 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 04:32.31 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422043231/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True

  minimum = torch.tensor(
  maximum = torch.tensor(


Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:32.51 [info     ] CQL_20220422043231: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.00035117471838273063, 'time_algorithm_update': 0.05506995440907561, 'temp_loss': 4.8651862268503, 'temp': 0.982488737802285, 'alpha_loss': -17.686427987379833, 'alpha': 1.0176944853253447, 'critic_loss': 25.300923722327788, 'actor_loss': -1.9226872540278241, 'time_step': 0.055519809612649025, 'td_error': 1.2149344301853522, 'init_value': 0.42698609828948975, 'ave_value': 0.5722622496467622} step=346
2022-04-22 04:32.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:33.11 [info     ] CQL_20220422043231: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.00034557877248422256, 'time_algorithm_update': 0.055553155827384466, 'temp_loss': 4.792136208859482, 'temp': 0.9494860802771729, 'alpha_loss': -18.33968556133998, 'alpha': 1.0541342920650636, 'critic_loss': 31.27521051285584, 'actor_loss': -1.9026412564205986, 'time_step': 0.055999230098173106, 'td_error': 1.2061966099397248, 'init_value': 0.2336588352918625, 'ave_value': 0.4864075919412758} step=692
2022-04-22 04:33.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:33.31 [info     ] CQL_20220422043231: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.0003472483916089714, 'time_algorithm_update': 0.055562628486942, 'temp_loss': 4.634907060964948, 'temp': 0.9184169591851317, 'alpha_loss': -19.013371599891972, 'alpha': 1.0924133455133163, 'critic_loss': 42.11037759284753, 'actor_loss': -1.5535760969784909, 'time_step': 0.05600755614352364, 'td_error': 1.2023592220709476, 'init_value': -0.12181012332439423, 'ave_value': 0.21914588205689442} step=1038
2022-04-22 04:33.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:33.51 [info     ] CQL_20220422043231: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.00035682234460907865, 'time_algorithm_update': 0.05371798669671737, 'temp_loss': 4.486920144516609, 'temp': 0.8887993963812127, 'alpha_loss': -19.723465891931788, 'alpha': 1.1325972273170604, 'critic_loss': 55.17436812516582, 'actor_loss': -1.0571683837326964, 'time_step': 0.05417575794837378, 'td_error': 1.205534522068894, 'init_value': -0.7329408526420593, 'ave_value': -0.26282877637388696} step=1384
2022-04-22 04:33.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:34.10 [info     ] CQL_20220422043231: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.0003512980620985086, 'time_algorithm_update': 0.051638295195695294, 'temp_loss': 4.343180715693214, 'temp': 0.8604667967110011, 'alpha_loss': -20.449887915153724, 'alpha': 1.174715935150323, 'critic_loss': 70.7811322074405, 'actor_loss': -0.5645141889895663, 'time_step': 0.052092034692709156, 'td_error': 1.2044430384221094, 'init_value': -0.8895897269248962, 'ave_value': -0.43666786397730023} step=1730
2022-04-22 04:34.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:34.29 [info     ] CQL_20220422043231: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.0003529077320429631, 'time_algorithm_update': 0.052051052881803124, 'temp_loss': 4.206672199888725, 'temp': 0.8332903805495686, 'alpha_loss': -21.220077591824392, 'alpha': 1.218794683845057, 'critic_loss': 88.73584495941338, 'actor_loss': -0.15304468136086974, 'time_step': 0.05250575570012793, 'td_error': 1.206959839200029, 'init_value': -1.092534065246582, 'ave_value': -0.5961735894480493} step=2076
2022-04-22 04:34.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:34.48 [info     ] CQL_20220422043231: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.000392128277376208, 'time_algorithm_update': 0.05211594339050998, 'temp_loss': 4.075164564083077, 'temp': 0.8071686136240215, 'alpha_loss': -22.025355272899475, 'alpha': 1.2648830410373004, 'critic_loss': 110.96638120529968, 'actor_loss': 0.19417988223621266, 'time_step': 0.05261077563886698, 'td_error': 1.203486822956547, 'init_value': -1.409371256828308, 'ave_value': -0.9251946896328752} step=2422
2022-04-22 04:34.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:35.06 [info     ] CQL_20220422043231: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.00035871315553698236, 'time_algorithm_update': 0.05153770322744557, 'temp_loss': 3.9480126682733525, 'temp': 0.7820256452339922, 'alpha_loss': -22.855013329169655, 'alpha': 1.3130107143021732, 'critic_loss': 141.6436759199021, 'actor_loss': 0.2586265349642218, 'time_step': 0.05199484122281819, 'td_error': 1.2018822824003517, 'init_value': -1.19221031665802, 'ave_value': -0.7952655288154562} step=2768
2022-04-22 04:35.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:35.25 [info     ] CQL_20220422043231: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.00035189341947522467, 'time_algorithm_update': 0.0508110178688358, 'temp_loss': 3.8276726972161, 'temp': 0.7577857981527472, 'alpha_loss': -23.720637646713698, 'alpha': 1.3632088810033192, 'critic_loss': 188.74285527047394, 'actor_loss': -0.27928649053795834, 'time_step': 0.05126273700956664, 'td_error': 1.2082086089135449, 'init_value': -0.40258699655532837, 'ave_value': -0.17486372942492656} step=3114
2022-04-22 04:35.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:35.43 [info     ] CQL_20220422043231: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.0003483481489854052, 'time_algorithm_update': 0.05166358685906912, 'temp_loss': 3.70828678290968, 'temp': 0.734391188001357, 'alpha_loss': -24.62384642893179, 'alpha': 1.415528530330327, 'critic_loss': 253.13018388693044, 'actor_loss': -1.1665152032251302, 'time_step': 0.05211226237302571, 'td_error': 1.2189295758075565, 'init_value': 0.2856939136981964, 'ave_value': 0.40606789136735566} step=3460
2022-04-22 04:35.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:36.02 [info     ] CQL_20220422043231: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.00034977108067859805, 'time_algorithm_update': 0.05211461968504624, 'temp_loss': 3.5951383637555074, 'temp': 0.7117959209260224, 'alpha_loss': -25.568109346952053, 'alpha': 1.4700305227599393, 'critic_loss': 325.61374183610684, 'actor_loss': -1.9387507521348193, 'time_step': 0.052565771720312925, 'td_error': 1.2290060972399284, 'init_value': 0.9955146312713623, 'ave_value': 1.0475789771028015} step=3806
2022-04-22 04:36.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:36.21 [info     ] CQL_20220422043231: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.00034574277139123465, 'time_algorithm_update': 0.05233759411497612, 'temp_loss': 3.485017544272318, 'temp': 0.6899528353545018, 'alpha_loss': -26.549637932308837, 'alpha': 1.5267725280254563, 'critic_loss': 398.5151050545577, 'actor_loss': -2.6117926663746034, 'time_step': 0.052781889204345, 'td_error': 1.2308025605787514, 'init_value': 1.6074373722076416, 'ave_value': 1.6394764828987176} step=4152
2022-04-22 04:36.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:36.40 [info     ] CQL_20220422043231: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.00034213755172112084, 'time_algorithm_update': 0.05075607961312884, 'temp_loss': 3.377710808908319, 'temp': 0.6688269419821701, 'alpha_loss': -27.581676163425335, 'alpha': 1.5858284448612632, 'critic_loss': 476.64039726477824, 'actor_loss': -3.171816953344841, 'time_step': 0.05120321091888957, 'td_error': 1.2326859460027382, 'init_value': 2.134556770324707, 'ave_value': 2.1502038565107067} step=4498
2022-04-22 04:36.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:36.58 [info     ] CQL_20220422043231: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.0003360999112873408, 'time_algorithm_update': 0.05003380637637453, 'temp_loss': 3.2747232011287886, 'temp': 0.6483838006940191, 'alpha_loss': -28.64648973872896, 'alpha': 1.6472706508774289, 'critic_loss': 559.8372137698135, 'actor_loss': -3.6417202694567643, 'time_step': 0.05047094477394413, 'td_error': 1.234987001389011, 'init_value': 2.697471857070923, 'ave_value': 2.708550875782684} step=4844
2022-04-22 04:36.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:37.17 [info     ] CQL_20220422043231: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.0003511857435193365, 'time_algorithm_update': 0.051396340304027406, 'temp_loss': 3.1753393308275695, 'temp': 0.6285870660936212, 'alpha_loss': -29.756782614426807, 'alpha': 1.7111629995307482, 'critic_loss': 649.8421697892205, 'actor_loss': -4.118641098799733, 'time_step': 0.0518532667545914, 'td_error': 1.2352049596607872, 'init_value': 3.12558913230896, 'ave_value': 3.137642274517875} step=5190
2022-04-22 04:37.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:37.36 [info     ] CQL_20220422043231: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.0003515406151038374, 'time_algorithm_update': 0.05337365514281168, 'temp_loss': 3.078237136664418, 'temp': 0.609415485507491, 'alpha_loss': -30.913744606723675, 'alpha': 1.7775953815162526, 'critic_loss': 759.7133974284795, 'actor_loss': -4.49361602695002, 'time_step': 0.05382777983053571, 'td_error': 1.2348027624691382, 'init_value': 3.496061325073242, 'ave_value': 3.5072336852239623} step=5536
2022-04-22 04:37.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:37.55 [info     ] CQL_20220422043231: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.0003434881309553378, 'time_algorithm_update': 0.0538484740119449, 'temp_loss': 2.984134103521446, 'temp': 0.5908440246402873, 'alpha_loss': -32.111300435369415, 'alpha': 1.8466557460713249, 'critic_loss': 880.1170802474711, 'actor_loss': -4.861657937827138, 'time_step': 0.054297307323169154, 'td_error': 1.2359247964314655, 'init_value': 3.904587507247925, 'ave_value': 3.915054885252441} step=5882
2022-04-22 04:37.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:38.15 [info     ] CQL_20220422043231: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.0003520525948849717, 'time_algorithm_update': 0.0539670831206217, 'temp_loss': 2.893519786051932, 'temp': 0.5728513117815028, 'alpha_loss': -33.36062926364083, 'alpha': 1.9184312189934571, 'critic_loss': 1014.2830120814328, 'actor_loss': -5.232463970349703, 'time_step': 0.054419380391953306, 'td_error': 1.2366142769121453, 'init_value': 4.296229362487793, 'ave_value': 4.308444105860971} step=6228
2022-04-22 04:38.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:38.35 [info     ] CQL_20220422043231: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.00034851559324760656, 'time_algorithm_update': 0.0539789516801779, 'temp_loss': 2.8053594800089137, 'temp': 0.5554137762226811, 'alpha_loss': -34.65413319582195, 'alpha': 1.9930238682410621, 'critic_loss': 1164.9235388254156, 'actor_loss': -5.578788932348262, 'time_step': 0.05442333979413688, 'td_error': 1.2390329108978373, 'init_value': 4.697821617126465, 'ave_value': 4.705844136220462} step=6574
2022-04-22 04:38.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:38.54 [info     ] CQL_20220422043231: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.0003595786287605418, 'time_algorithm_update': 0.05359281280826282, 'temp_loss': 2.719540686276607, 'temp': 0.5385143062627384, 'alpha_loss': -36.00224092516596, 'alpha': 2.070538496695502, 'critic_loss': 1341.4824155245212, 'actor_loss': -5.916741562716534, 'time_step': 0.054050187844072464, 'td_error': 1.243709431598668, 'init_value': 5.066396713256836, 'ave_value': 5.067635926662208} step=6920
2022-04-22 04:38.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:39.13 [info     ] CQL_20220422043231: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.00033965965226895545, 'time_algorithm_update': 0.05341799135153004, 'temp_loss': 2.637479178478263, 'temp': 0.5221307405157586, 'alpha_loss': -37.403555302261616, 'alpha': 2.151083013914913, 'critic_loss': 1523.82020030821, 'actor_loss': -6.052633173892953, 'time_step': 0.05385086577751733, 'td_error': 1.2412114415191198, 'init_value': 4.997570037841797, 'ave_value': 5.005052407559991} step=7266
2022-04-22 04:39.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:39.33 [info     ] CQL_20220422043231: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.00034592606428730696, 'time_algorithm_update': 0.05354899615910701, 'temp_loss': 2.557005344787774, 'temp': 0.5062513376419255, 'alpha_loss': -38.86111409402307, 'alpha': 2.2347789968369325, 'critic_loss': 1736.4901768678874, 'actor_loss': -6.1258906008880265, 'time_step': 0.053988755093833614, 'td_error': 1.2445495982217016, 'init_value': 5.238348484039307, 'ave_value': 5.24050518583494} step=7612
2022-04-22 04:39.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:39.52 [info     ] CQL_20220422043231: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.00034802084024241893, 'time_algorithm_update': 0.05340340027230323, 'temp_loss': 2.4787724920779985, 'temp': 0.49085848705272456, 'alpha_loss': -40.37330015546325, 'alpha': 2.3217439093341716, 'critic_loss': 1956.6192351765715, 'actor_loss': -6.242900772590858, 'time_step': 0.05385107663325492, 'td_error': 1.2465889260514975, 'init_value': 5.4771623611450195, 'ave_value': 5.4736869100938765} step=7958
2022-04-22 04:39.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:40.12 [info     ] CQL_20220422043231: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.00033943639325268696, 'time_algorithm_update': 0.05295835547364516, 'temp_loss': 2.403765738354942, 'temp': 0.4759340800474145, 'alpha_loss': -41.946702946128184, 'alpha': 2.412097692489624, 'critic_loss': 2169.291794617052, 'actor_loss': -6.317397900399445, 'time_step': 0.053391198891435745, 'td_error': 1.243397154355405, 'init_value': 5.393107891082764, 'ave_value': 5.397662638600487} step=8304
2022-04-22 04:40.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:40.31 [info     ] CQL_20220422043231: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.00036458266263752315, 'time_algorithm_update': 0.05394675759221777, 'temp_loss': 2.330538267345098, 'temp': 0.46146552747040126, 'alpha_loss': -43.5773196027458, 'alpha': 2.5059819448890024, 'critic_loss': 2377.6754079829752, 'actor_loss': -6.465210674815095, 'time_step': 0.05441218166682073, 'td_error': 1.245948313095062, 'init_value': 5.70007848739624, 'ave_value': 5.703737649266445} step=8650
2022-04-22 04:40.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:40.52 [info     ] CQL_20220422043231: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.00034842945936787336, 'time_algorithm_update': 0.05637934648921724, 'temp_loss': 2.259698964956868, 'temp': 0.4474379350512014, 'alpha_loss': -45.270941761876806, 'alpha': 2.60351216586339, 'critic_loss': 2531.241227166501, 'actor_loss': -6.529306705287426, 'time_step': 0.05682142690427042, 'td_error': 1.246741589401805, 'init_value': 5.7531962394714355, 'ave_value': 5.759415725058659} step=8996
2022-04-22 04:40.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:41.12 [info     ] CQL_20220422043231: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.00036297781619033375, 'time_algorithm_update': 0.055816296208111536, 'temp_loss': 2.191100070931319, 'temp': 0.4338368465101099, 'alpha_loss': -47.0346745948571, 'alpha': 2.7048462966963047, 'critic_loss': 2797.9538228470465, 'actor_loss': -6.636545546482064, 'time_step': 0.05627884823462866, 'td_error': 1.2463048306902071, 'init_value': 5.800879001617432, 'ave_value': 5.803738543839181} step=9342
2022-04-22 04:41.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:41.32 [info     ] CQL_20220422043231: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.0003553642702929546, 'time_algorithm_update': 0.05586827697092398, 'temp_loss': 2.1242484129922237, 'temp': 0.42064904779023515, 'alpha_loss': -48.86823700480378, 'alpha': 2.8101314637013255, 'critic_loss': 3055.902673269283, 'actor_loss': -6.70962069902806, 'time_step': 0.056316110440072294, 'td_error': 1.2476967919884256, 'init_value': 5.908215045928955, 'ave_value': 5.911338003171231} step=9688
2022-04-22 04:41.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:41.53 [info     ] CQL_20220422043231: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.000377403518368054, 'time_algorithm_update': 0.05576531942180126, 'temp_loss': 2.059925297092151, 'temp': 0.4078643779361868, 'alpha_loss': -50.76693962626374, 'alpha': 2.919526821616068, 'critic_loss': 3330.770376569274, 'actor_loss': -6.816803616595406, 'time_step': 0.05624659281934617, 'td_error': 1.2492801208307616, 'init_value': 6.160584926605225, 'ave_value': 6.161695639812421} step=10034
2022-04-22 04:41.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:42.13 [info     ] CQL_20220422043231: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.0003676407599035715, 'time_algorithm_update': 0.05683317556546603, 'temp_loss': 1.9975039958953857, 'temp': 0.3954649974155977, 'alpha_loss': -52.75290549835029, 'alpha': 3.033181670084165, 'critic_loss': 3616.974044182397, 'actor_loss': -6.951525434593245, 'time_step': 0.05729886148706337, 'td_error': 1.2505463232050051, 'init_value': 6.264556884765625, 'ave_value': 6.265418333394543} step=10380
2022-04-22 04:42.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:42.34 [info     ] CQL_20220422043231: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.00036208064569903247, 'time_algorithm_update': 0.05658386001697165, 'temp_loss': 1.936805011909132, 'temp': 0.3834433259302481, 'alpha_loss': -54.79808950699823, 'alpha': 3.15126233293831, 'critic_loss': 3836.546974490833, 'actor_loss': -7.063193104859722, 'time_step': 0.05704520203474629, 'td_error': 1.2514464626319868, 'init_value': 6.386580944061279, 'ave_value': 6.388058453727639} step=10726
2022-04-22 04:42.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:42.54 [info     ] CQL_20220422043231: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.0003624293156441926, 'time_algorithm_update': 0.056182079232497024, 'temp_loss': 1.8777172145126872, 'temp': 0.3717883063017288, 'alpha_loss': -56.92973843613112, 'alpha': 3.2739352229013607, 'critic_loss': 4158.318068664198, 'actor_loss': -7.136124728042955, 'time_step': 0.05664523488524332, 'td_error': 1.2503648855088847, 'init_value': 6.393134117126465, 'ave_value': 6.396231705297955} step=11072
2022-04-22 04:42.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:43.15 [info     ] CQL_20220422043231: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.0003570607631881802, 'time_algorithm_update': 0.05624633028328074, 'temp_loss': 1.820537699440311, 'temp': 0.3604891441460979, 'alpha_loss': -59.141483185608266, 'alpha': 3.401378065864475, 'critic_loss': 4381.962256988349, 'actor_loss': -7.242164449195641, 'time_step': 0.05669921671034973, 'td_error': 1.254787558424262, 'init_value': 6.666411876678467, 'ave_value': 6.665655885391633} step=11418
2022-04-22 04:43.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:43.35 [info     ] CQL_20220422043231: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.0003724442741085339, 'time_algorithm_update': 0.05629479678380007, 'temp_loss': 1.7656307268693956, 'temp': 0.34953080539758496, 'alpha_loss': -61.45316440383823, 'alpha': 3.533785993653226, 'critic_loss': 4648.364079999097, 'actor_loss': -7.169600457814387, 'time_step': 0.05676767729610377, 'td_error': 1.2514195548569083, 'init_value': 6.4438157081604, 'ave_value': 6.445970194370275} step=11764
2022-04-22 04:43.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:43.55 [info     ] CQL_20220422043231: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.0003492721932471832, 'time_algorithm_update': 0.0566381574366134, 'temp_loss': 1.7117872899667377, 'temp': 0.3389059679012078, 'alpha_loss': -63.845095937651706, 'alpha': 3.671354673501384, 'critic_loss': 5060.743268492594, 'actor_loss': -7.065819769236394, 'time_step': 0.05708581450357603, 'td_error': 1.2506487556426151, 'init_value': 6.295580863952637, 'ave_value': 6.296832489142094} step=12110
2022-04-22 04:43.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:44.16 [info     ] CQL_20220422043231: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.00037699352110052384, 'time_algorithm_update': 0.05600148956210627, 'temp_loss': 1.6597394843322004, 'temp': 0.3286051569301958, 'alpha_loss': -66.32004192109741, 'alpha': 3.8142723175831614, 'critic_loss': 5465.8144107884755, 'actor_loss': -6.7960028303840945, 'time_step': 0.0564817789662091, 'td_error': 1.2491976230651212, 'init_value': 6.07072114944458, 'ave_value': 6.0719072213496235} step=12456
2022-04-22 04:44.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:44.36 [info     ] CQL_20220422043231: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.0003510127866888322, 'time_algorithm_update': 0.05620737985379434, 'temp_loss': 1.6093429516505644, 'temp': 0.3186172053131754, 'alpha_loss': -68.9182780932829, 'alpha': 3.962756908697889, 'critic_loss': 5841.990812974169, 'actor_loss': -6.623082822457904, 'time_step': 0.056659956198896286, 'td_error': 1.247885724581966, 'init_value': 5.928299903869629, 'ave_value': 5.932076467276184} step=12802
2022-04-22 04:44.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:44.57 [info     ] CQL_20220422043231: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.000360229800891325, 'time_algorithm_update': 0.05669932558357371, 'temp_loss': 1.560013563302211, 'temp': 0.30893366150773327, 'alpha_loss': -71.58930093842434, 'alpha': 4.117027966273313, 'critic_loss': 6114.872441462699, 'actor_loss': -6.568571976843597, 'time_step': 0.057159942009545474, 'td_error': 1.2506298868761312, 'init_value': 6.043064117431641, 'ave_value': 6.04041795599455} step=13148
2022-04-22 04:44.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:45.17 [info     ] CQL_20220422043231: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.0003476728593682967, 'time_algorithm_update': 0.056290458392545664, 'temp_loss': 1.513010526323594, 'temp': 0.29954443139836967, 'alpha_loss': -74.36765269461395, 'alpha': 4.277287193805496, 'critic_loss': 6029.098143120033, 'actor_loss': -6.661166685854079, 'time_step': 0.05673596900322534, 'td_error': 1.252928334736449, 'init_value': 6.2448883056640625, 'ave_value': 6.238799806498079} step=13494
2022-04-22 04:45.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:45.38 [info     ] CQL_20220422043231: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.0003373753817784304, 'time_algorithm_update': 0.05623918392754704, 'temp_loss': 1.467021962121732, 'temp': 0.29043946962136064, 'alpha_loss': -77.27647935723982, 'alpha': 4.4437932196380086, 'critic_loss': 5714.061570007677, 'actor_loss': -6.722948152894919, 'time_step': 0.0566733117737522, 'td_error': 1.2500522510415175, 'init_value': 6.148326873779297, 'ave_value': 6.150407602663796} step=13840
2022-04-22 04:45.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:45.58 [info     ] CQL_20220422043231: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.0003437272386054772, 'time_algorithm_update': 0.05651583216782939, 'temp_loss': 1.4217891469167148, 'temp': 0.2816126719547834, 'alpha_loss': -80.27366267739004, 'alpha': 4.6167782979204475, 'critic_loss': 5506.003381277095, 'actor_loss': -6.690148258484857, 'time_step': 0.05696087696648747, 'td_error': 1.2528097048534974, 'init_value': 6.270435810089111, 'ave_value': 6.265938548218758} step=14186
2022-04-22 04:45.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:46.19 [info     ] CQL_20220422043231: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.00036881149159690547, 'time_algorithm_update': 0.05643147609137386, 'temp_loss': 1.3790744126187584, 'temp': 0.27305555877657983, 'alpha_loss': -83.40991793064713, 'alpha': 4.796502171224252, 'critic_loss': 4948.902067151373, 'actor_loss': -6.812677393069846, 'time_step': 0.05690371232225716, 'td_error': 1.253214620967532, 'init_value': 6.367903709411621, 'ave_value': 6.365740375419317} step=14532
2022-04-22 04:46.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:46.39 [info     ] CQL_20220422043231: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.00035119539051386663, 'time_algorithm_update': 0.05645563078753521, 'temp_loss': 1.3374234779032668, 'temp': 0.264754511717427, 'alpha_loss': -86.65529165102568, 'alpha': 4.983230345511023, 'critic_loss': 4623.025917009122, 'actor_loss': -6.829039452392931, 'time_step': 0.0569125317424708, 'td_error': 1.2494922739720187, 'init_value': 6.245438098907471, 'ave_value': 6.249774203565009} step=14878
2022-04-22 04:46.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:46.59 [info     ] CQL_20220422043231: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.00034202109871572155, 'time_algorithm_update': 0.05420058517786809, 'temp_loss': 1.29669302322961, 'temp': 0.2567056476036248, 'alpha_loss': -90.0256036526895, 'alpha': 5.177222381437445, 'critic_loss': 4431.470115352918, 'actor_loss': -6.886803884726728, 'time_step': 0.05463988794756763, 'td_error': 1.2542298845042386, 'init_value': 6.537285804748535, 'ave_value': 6.534567865233333} step=15224
2022-04-22 04:46.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:47.18 [info     ] CQL_20220422043231: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.00035390343969267916, 'time_algorithm_update': 0.05391322670644418, 'temp_loss': 1.257235353048137, 'temp': 0.2489026810306345, 'alpha_loss': -93.5323488753655, 'alpha': 5.378771434629583, 'critic_loss': 3835.1485786217486, 'actor_loss': -7.080601685308997, 'time_step': 0.054368768813293104, 'td_error': 1.2534283640137134, 'init_value': 6.596268653869629, 'ave_value': 6.597229232625296} step=15570
2022-04-22 04:47.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:47.38 [info     ] CQL_20220422043231: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.0003672962243846386, 'time_algorithm_update': 0.053273633036310275, 'temp_loss': 1.2189736066526071, 'temp': 0.2413379305359945, 'alpha_loss': -97.16810023577916, 'alpha': 5.588165212917879, 'critic_loss': 3648.9047865674675, 'actor_loss': -7.125262279731, 'time_step': 0.053739369949164416, 'td_error': 1.255999808639451, 'init_value': 6.816286563873291, 'ave_value': 6.813916676210645} step=15916
2022-04-22 04:47.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:47.57 [info     ] CQL_20220422043231: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.0003630873784853544, 'time_algorithm_update': 0.05370573570273515, 'temp_loss': 1.1820095178019794, 'temp': 0.23400212544409527, 'alpha_loss': -100.94823874765738, 'alpha': 5.805682346310919, 'critic_loss': 3092.7966181584175, 'actor_loss': -7.363413459303751, 'time_step': 0.05416963693034442, 'td_error': 1.2586070584848124, 'init_value': 7.04008150100708, 'ave_value': 7.035012297632571} step=16262
2022-04-22 04:47.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:48.17 [info     ] CQL_20220422043231: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.0003560278457024194, 'time_algorithm_update': 0.05335969800893971, 'temp_loss': 1.1457384180471388, 'temp': 0.22689102943232983, 'alpha_loss': -104.88313977015501, 'alpha': 6.031687831603033, 'critic_loss': 2853.769117763277, 'actor_loss': -7.466791303171588, 'time_step': 0.05381220820322202, 'td_error': 1.2567683209080858, 'init_value': 7.0843825340271, 'ave_value': 7.085902659233965} step=16608
2022-04-22 04:48.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:48.35 [info     ] CQL_20220422043231: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.0003520994517155465, 'time_algorithm_update': 0.0518891563305276, 'temp_loss': 1.110958585160316, 'temp': 0.21999620585944613, 'alpha_loss': -108.97775193583759, 'alpha': 6.266499978269456, 'critic_loss': 2814.4805879978776, 'actor_loss': -7.4951697470824845, 'time_step': 0.05233540355814675, 'td_error': 1.2564955540380338, 'init_value': 7.063434600830078, 'ave_value': 7.064548099951023} step=16954
2022-04-22 04:48.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:48.54 [info     ] CQL_20220422043231: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.00033397963970382776, 'time_algorithm_update': 0.05015872944297129, 'temp_loss': 1.0771532399805983, 'temp': 0.2133111677676267, 'alpha_loss': -113.21725309515276, 'alpha': 6.510461586748244, 'critic_loss': 2937.4889515783057, 'actor_loss': -7.51070134901587, 'time_step': 0.05059104434327583, 'td_error': 1.2593576045310673, 'init_value': 7.225947380065918, 'ave_value': 7.2230275703980045} step=17300
2022-04-22 04:48.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422043231/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.5191

Epoch 1/50:   0%|          | 0/177 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 04:48.55 [info     ] FQE_20220422044854: epoch=1 step=177 epoch=1 metrics={'time_sample_batch': 0.0001673011456505727, 'time_algorithm_update': 0.00624266452034988, 'loss': 0.00665345494003998, 'time_step': 0.006486170709469897, 'init_value': -0.3184739947319031, 'ave_value': -0.2768472292938748, 'soft_opc': nan} step=177




2022-04-22 04:48.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_177.pt


Epoch 2/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:48.56 [info     ] FQE_20220422044854: epoch=2 step=354 epoch=2 metrics={'time_sample_batch': 0.00015947239547126036, 'time_algorithm_update': 0.005861382026456844, 'loss': 0.004070785397789993, 'time_step': 0.006091870830557441, 'init_value': -0.39871928095817566, 'ave_value': -0.3165314046157015, 'soft_opc': nan} step=354




2022-04-22 04:48.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_354.pt


Epoch 3/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:48.58 [info     ] FQE_20220422044854: epoch=3 step=531 epoch=3 metrics={'time_sample_batch': 0.00016122214538229389, 'time_algorithm_update': 0.006191019284523139, 'loss': 0.003461215246500353, 'time_step': 0.006425896606876352, 'init_value': -0.47788214683532715, 'ave_value': -0.3666648353273804, 'soft_opc': nan} step=531




2022-04-22 04:48.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_531.pt


Epoch 4/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:48.59 [info     ] FQE_20220422044854: epoch=4 step=708 epoch=4 metrics={'time_sample_batch': 0.00016465025433039262, 'time_algorithm_update': 0.006011102159144514, 'loss': 0.0030573276601637066, 'time_step': 0.006247284722193486, 'init_value': -0.4806528091430664, 'ave_value': -0.35304607290763396, 'soft_opc': nan} step=708




2022-04-22 04:48.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_708.pt


Epoch 5/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.00 [info     ] FQE_20220422044854: epoch=5 step=885 epoch=5 metrics={'time_sample_batch': 0.00016108340462722348, 'time_algorithm_update': 0.006304992794317041, 'loss': 0.002757292014604571, 'time_step': 0.006541501330790547, 'init_value': -0.4920330345630646, 'ave_value': -0.3452842366095778, 'soft_opc': nan} step=885




2022-04-22 04:49.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_885.pt


Epoch 6/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.01 [info     ] FQE_20220422044854: epoch=6 step=1062 epoch=6 metrics={'time_sample_batch': 0.00016308639009120102, 'time_algorithm_update': 0.005988803960509219, 'loss': 0.0025559356725527406, 'time_step': 0.00622734646339201, 'init_value': -0.5206173062324524, 'ave_value': -0.36413546337916686, 'soft_opc': nan} step=1062




2022-04-22 04:49.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_1062.pt


Epoch 7/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.02 [info     ] FQE_20220422044854: epoch=7 step=1239 epoch=7 metrics={'time_sample_batch': 0.0001618942971956932, 'time_algorithm_update': 0.006239713248559984, 'loss': 0.0023634124492652882, 'time_step': 0.006475879647637491, 'init_value': -0.5551934838294983, 'ave_value': -0.39784298893567677, 'soft_opc': nan} step=1239




2022-04-22 04:49.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_1239.pt


Epoch 8/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.04 [info     ] FQE_20220422044854: epoch=8 step=1416 epoch=8 metrics={'time_sample_batch': 0.0001629530373266188, 'time_algorithm_update': 0.005852363877377267, 'loss': 0.002174270621469339, 'time_step': 0.006090319089296847, 'init_value': -0.5283748507499695, 'ave_value': -0.3838526086719544, 'soft_opc': nan} step=1416




2022-04-22 04:49.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_1416.pt


Epoch 9/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.05 [info     ] FQE_20220422044854: epoch=9 step=1593 epoch=9 metrics={'time_sample_batch': 0.00016467988827807754, 'time_algorithm_update': 0.006040334701538086, 'loss': 0.00191624739929497, 'time_step': 0.006278505433077192, 'init_value': -0.5254666805267334, 'ave_value': -0.3858776843561246, 'soft_opc': nan} step=1593




2022-04-22 04:49.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_1593.pt


Epoch 10/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.06 [info     ] FQE_20220422044854: epoch=10 step=1770 epoch=10 metrics={'time_sample_batch': 0.0001662289355434267, 'time_algorithm_update': 0.00589012964970648, 'loss': 0.0018020381021055546, 'time_step': 0.0061271244523215425, 'init_value': -0.559065043926239, 'ave_value': -0.4296636523684492, 'soft_opc': nan} step=1770




2022-04-22 04:49.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_1770.pt


Epoch 11/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.07 [info     ] FQE_20220422044854: epoch=11 step=1947 epoch=11 metrics={'time_sample_batch': 0.00016397944951461533, 'time_algorithm_update': 0.006127804686120675, 'loss': 0.0017116303222864276, 'time_step': 0.0063633635892706404, 'init_value': -0.5775253176689148, 'ave_value': -0.4548064227554354, 'soft_opc': nan} step=1947




2022-04-22 04:49.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_1947.pt


Epoch 12/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.08 [info     ] FQE_20220422044854: epoch=12 step=2124 epoch=12 metrics={'time_sample_batch': 0.0001633315436584128, 'time_algorithm_update': 0.006066904229632878, 'loss': 0.0017209014985881388, 'time_step': 0.006301429985606738, 'init_value': -0.5752726197242737, 'ave_value': -0.46015797764085253, 'soft_opc': nan} step=2124




2022-04-22 04:49.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_2124.pt


Epoch 13/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.10 [info     ] FQE_20220422044854: epoch=13 step=2301 epoch=13 metrics={'time_sample_batch': 0.00016477417811162055, 'time_algorithm_update': 0.006145516357852914, 'loss': 0.0017349083030183331, 'time_step': 0.006383058041502527, 'init_value': -0.6060063242912292, 'ave_value': -0.482367286350097, 'soft_opc': nan} step=2301




2022-04-22 04:49.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_2301.pt


Epoch 14/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.11 [info     ] FQE_20220422044854: epoch=14 step=2478 epoch=14 metrics={'time_sample_batch': 0.00016205324291509424, 'time_algorithm_update': 0.00610565869821667, 'loss': 0.001769059342937281, 'time_step': 0.006340300295986025, 'init_value': -0.6247475743293762, 'ave_value': -0.5047569807775475, 'soft_opc': nan} step=2478




2022-04-22 04:49.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_2478.pt


Epoch 15/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.12 [info     ] FQE_20220422044854: epoch=15 step=2655 epoch=15 metrics={'time_sample_batch': 0.00016190507317666953, 'time_algorithm_update': 0.006097318088940981, 'loss': 0.0017713644146514099, 'time_step': 0.006329175442625574, 'init_value': -0.6466341018676758, 'ave_value': -0.535504660739734, 'soft_opc': nan} step=2655




2022-04-22 04:49.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_2655.pt


Epoch 16/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.13 [info     ] FQE_20220422044854: epoch=16 step=2832 epoch=16 metrics={'time_sample_batch': 0.00016567127852790099, 'time_algorithm_update': 0.006163215906606556, 'loss': 0.0018564448016103736, 'time_step': 0.006404232844121039, 'init_value': -0.6717799305915833, 'ave_value': -0.5559186639795611, 'soft_opc': nan} step=2832




2022-04-22 04:49.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_2832.pt


Epoch 17/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.14 [info     ] FQE_20220422044854: epoch=17 step=3009 epoch=17 metrics={'time_sample_batch': 0.00016045839773059565, 'time_algorithm_update': 0.00587898593837932, 'loss': 0.0019116231855623278, 'time_step': 0.00611175790344928, 'init_value': -0.7128877639770508, 'ave_value': -0.5852209485675748, 'soft_opc': nan} step=3009




2022-04-22 04:49.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_3009.pt


Epoch 18/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.16 [info     ] FQE_20220422044854: epoch=18 step=3186 epoch=18 metrics={'time_sample_batch': 0.00016227415052510923, 'time_algorithm_update': 0.006136461839837543, 'loss': 0.002083449276219422, 'time_step': 0.006371173481483244, 'init_value': -0.7438614964485168, 'ave_value': -0.6146055101788974, 'soft_opc': nan} step=3186




2022-04-22 04:49.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_3186.pt


Epoch 19/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.17 [info     ] FQE_20220422044854: epoch=19 step=3363 epoch=19 metrics={'time_sample_batch': 0.0001622633745441329, 'time_algorithm_update': 0.0059310527844617595, 'loss': 0.0021498130259859126, 'time_step': 0.006164962962522345, 'init_value': -0.7538781762123108, 'ave_value': -0.6113256483319196, 'soft_opc': nan} step=3363




2022-04-22 04:49.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_3363.pt


Epoch 20/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.18 [info     ] FQE_20220422044854: epoch=20 step=3540 epoch=20 metrics={'time_sample_batch': 0.0001717718307581325, 'time_algorithm_update': 0.00625080846797275, 'loss': 0.0022748458359685155, 'time_step': 0.006497279398858884, 'init_value': -0.7903950810432434, 'ave_value': -0.6362289337458106, 'soft_opc': nan} step=3540




2022-04-22 04:49.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_3540.pt


Epoch 21/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.19 [info     ] FQE_20220422044854: epoch=21 step=3717 epoch=21 metrics={'time_sample_batch': 0.00016686471842103086, 'time_algorithm_update': 0.006045598768245029, 'loss': 0.0023136612057151787, 'time_step': 0.006291832627549683, 'init_value': -0.828355610370636, 'ave_value': -0.6614604615502887, 'soft_opc': nan} step=3717




2022-04-22 04:49.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_3717.pt


Epoch 22/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.20 [info     ] FQE_20220422044854: epoch=22 step=3894 epoch=22 metrics={'time_sample_batch': 0.00016427040100097656, 'time_algorithm_update': 0.006077076755674545, 'loss': 0.0024536761230299787, 'time_step': 0.006315236711232675, 'init_value': -0.8362119793891907, 'ave_value': -0.657897848201854, 'soft_opc': nan} step=3894




2022-04-22 04:49.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_3894.pt


Epoch 23/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.22 [info     ] FQE_20220422044854: epoch=23 step=4071 epoch=23 metrics={'time_sample_batch': 0.00016112785554875088, 'time_algorithm_update': 0.006044875430521992, 'loss': 0.0025911893734069133, 'time_step': 0.006281115914468711, 'init_value': -0.8901569843292236, 'ave_value': -0.6929897277674697, 'soft_opc': nan} step=4071




2022-04-22 04:49.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_4071.pt


Epoch 24/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.23 [info     ] FQE_20220422044854: epoch=24 step=4248 epoch=24 metrics={'time_sample_batch': 0.00016592586107846708, 'time_algorithm_update': 0.006137282161389367, 'loss': 0.002737208227771676, 'time_step': 0.0063752858652233405, 'init_value': -0.9330272078514099, 'ave_value': -0.7304299034066386, 'soft_opc': nan} step=4248




2022-04-22 04:49.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_4248.pt


Epoch 25/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.24 [info     ] FQE_20220422044854: epoch=25 step=4425 epoch=25 metrics={'time_sample_batch': 0.00016877476104908745, 'time_algorithm_update': 0.005906272069209039, 'loss': 0.00285722268136918, 'time_step': 0.00614848109961903, 'init_value': -0.9383239150047302, 'ave_value': -0.7185452507199109, 'soft_opc': nan} step=4425




2022-04-22 04:49.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_4425.pt


Epoch 26/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.25 [info     ] FQE_20220422044854: epoch=26 step=4602 epoch=26 metrics={'time_sample_batch': 0.00016335040162512138, 'time_algorithm_update': 0.006235163090592724, 'loss': 0.0030357998325213223, 'time_step': 0.006474606734884661, 'init_value': -0.9952735304832458, 'ave_value': -0.7730047568499863, 'soft_opc': nan} step=4602




2022-04-22 04:49.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_4602.pt


Epoch 27/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.26 [info     ] FQE_20220422044854: epoch=27 step=4779 epoch=27 metrics={'time_sample_batch': 0.000164031982421875, 'time_algorithm_update': 0.005877019321851138, 'loss': 0.003079360370697864, 'time_step': 0.006110642589418228, 'init_value': -0.9991857409477234, 'ave_value': -0.7708662711755112, 'soft_opc': nan} step=4779




2022-04-22 04:49.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_4779.pt


Epoch 28/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.27 [info     ] FQE_20220422044854: epoch=28 step=4956 epoch=28 metrics={'time_sample_batch': 0.00016156562977591476, 'time_algorithm_update': 0.005169414531039653, 'loss': 0.003231386236430772, 'time_step': 0.005399918152114092, 'init_value': -1.0198158025741577, 'ave_value': -0.7839803415241542, 'soft_opc': nan} step=4956




2022-04-22 04:49.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_4956.pt


Epoch 29/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.29 [info     ] FQE_20220422044854: epoch=29 step=5133 epoch=29 metrics={'time_sample_batch': 0.00018512057719257592, 'time_algorithm_update': 0.00621946652730306, 'loss': 0.003444950639580687, 'time_step': 0.006478884799332269, 'init_value': -1.045906901359558, 'ave_value': -0.782903312289858, 'soft_opc': nan} step=5133




2022-04-22 04:49.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_5133.pt


Epoch 30/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.30 [info     ] FQE_20220422044854: epoch=30 step=5310 epoch=30 metrics={'time_sample_batch': 0.00016660340088235455, 'time_algorithm_update': 0.006041626472257625, 'loss': 0.0033041747865046106, 'time_step': 0.006281037788606633, 'init_value': -1.0950770378112793, 'ave_value': -0.8097042305870457, 'soft_opc': nan} step=5310




2022-04-22 04:49.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_5310.pt


Epoch 31/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.31 [info     ] FQE_20220422044854: epoch=31 step=5487 epoch=31 metrics={'time_sample_batch': 0.0001642623190152443, 'time_algorithm_update': 0.0061348158087434065, 'loss': 0.003530301222637543, 'time_step': 0.0063729932752706235, 'init_value': -1.142405390739441, 'ave_value': -0.8375259140724535, 'soft_opc': nan} step=5487




2022-04-22 04:49.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_5487.pt


Epoch 32/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.32 [info     ] FQE_20220422044854: epoch=32 step=5664 epoch=32 metrics={'time_sample_batch': 0.0001677335318872484, 'time_algorithm_update': 0.006121693357909467, 'loss': 0.0037396666924350785, 'time_step': 0.00636393067526952, 'init_value': -1.1864495277404785, 'ave_value': -0.8620462296424327, 'soft_opc': nan} step=5664




2022-04-22 04:49.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_5664.pt


Epoch 33/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.33 [info     ] FQE_20220422044854: epoch=33 step=5841 epoch=33 metrics={'time_sample_batch': 0.00016411684327206368, 'time_algorithm_update': 0.006158198340464447, 'loss': 0.003659698563972541, 'time_step': 0.006394188282853466, 'init_value': -1.1989190578460693, 'ave_value': -0.8587608788509269, 'soft_opc': nan} step=5841




2022-04-22 04:49.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_5841.pt


Epoch 34/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.35 [info     ] FQE_20220422044854: epoch=34 step=6018 epoch=34 metrics={'time_sample_batch': 0.00015854431410967294, 'time_algorithm_update': 0.006077911894200212, 'loss': 0.003790692011246952, 'time_step': 0.006306722339263744, 'init_value': -1.2260035276412964, 'ave_value': -0.8752430050491212, 'soft_opc': nan} step=6018




2022-04-22 04:49.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_6018.pt


Epoch 35/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.36 [info     ] FQE_20220422044854: epoch=35 step=6195 epoch=35 metrics={'time_sample_batch': 0.00016554061975856285, 'time_algorithm_update': 0.006120420445156636, 'loss': 0.003877896497533841, 'time_step': 0.006357892084929903, 'init_value': -1.2446786165237427, 'ave_value': -0.8773700866881792, 'soft_opc': nan} step=6195




2022-04-22 04:49.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_6195.pt


Epoch 36/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.37 [info     ] FQE_20220422044854: epoch=36 step=6372 epoch=36 metrics={'time_sample_batch': 0.0001627644576595328, 'time_algorithm_update': 0.006139194898012667, 'loss': 0.0038986056778345804, 'time_step': 0.006373561708267125, 'init_value': -1.2697283029556274, 'ave_value': -0.8977421241255852, 'soft_opc': nan} step=6372




2022-04-22 04:49.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_6372.pt


Epoch 37/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.38 [info     ] FQE_20220422044854: epoch=37 step=6549 epoch=37 metrics={'time_sample_batch': 0.0001725665593551377, 'time_algorithm_update': 0.006101214953061551, 'loss': 0.0040564494680068695, 'time_step': 0.0063425592109981905, 'init_value': -1.3148853778839111, 'ave_value': -0.9285250856532706, 'soft_opc': nan} step=6549




2022-04-22 04:49.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_6549.pt


Epoch 38/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.39 [info     ] FQE_20220422044854: epoch=38 step=6726 epoch=38 metrics={'time_sample_batch': 0.0001660713368216477, 'time_algorithm_update': 0.006051389511022191, 'loss': 0.004250691450229914, 'time_step': 0.006292432041491492, 'init_value': -1.3212553262710571, 'ave_value': -0.9281046973692404, 'soft_opc': nan} step=6726




2022-04-22 04:49.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_6726.pt


Epoch 39/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.41 [info     ] FQE_20220422044854: epoch=39 step=6903 epoch=39 metrics={'time_sample_batch': 0.00016327901075115312, 'time_algorithm_update': 0.006201784489518505, 'loss': 0.004326442554349733, 'time_step': 0.006436901577448441, 'init_value': -1.4013413190841675, 'ave_value': -0.9896687558508134, 'soft_opc': nan} step=6903




2022-04-22 04:49.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_6903.pt


Epoch 40/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.42 [info     ] FQE_20220422044854: epoch=40 step=7080 epoch=40 metrics={'time_sample_batch': 0.00016437950780836202, 'time_algorithm_update': 0.005973335039817681, 'loss': 0.004524132004007697, 'time_step': 0.006210919827391199, 'init_value': -1.386756420135498, 'ave_value': -0.9624672322928368, 'soft_opc': nan} step=7080




2022-04-22 04:49.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_7080.pt


Epoch 41/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.43 [info     ] FQE_20220422044854: epoch=41 step=7257 epoch=41 metrics={'time_sample_batch': 0.00016549347484179136, 'time_algorithm_update': 0.006036789403796869, 'loss': 0.004611302716532554, 'time_step': 0.006276860748980678, 'init_value': -1.438272476196289, 'ave_value': -0.9994719587109826, 'soft_opc': nan} step=7257




2022-04-22 04:49.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_7257.pt


Epoch 42/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.44 [info     ] FQE_20220422044854: epoch=42 step=7434 epoch=42 metrics={'time_sample_batch': 0.0001638299327785686, 'time_algorithm_update': 0.0058749355165298375, 'loss': 0.004702489448206152, 'time_step': 0.006109307714774784, 'init_value': -1.4657424688339233, 'ave_value': -1.0198770757059794, 'soft_opc': nan} step=7434




2022-04-22 04:49.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_7434.pt


Epoch 43/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.45 [info     ] FQE_20220422044854: epoch=43 step=7611 epoch=43 metrics={'time_sample_batch': 0.00016268767879507636, 'time_algorithm_update': 0.006106683763407044, 'loss': 0.004876736678133093, 'time_step': 0.006339276577793272, 'init_value': -1.4473018646240234, 'ave_value': -0.998734564273744, 'soft_opc': nan} step=7611




2022-04-22 04:49.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_7611.pt


Epoch 44/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.47 [info     ] FQE_20220422044854: epoch=44 step=7788 epoch=44 metrics={'time_sample_batch': 0.00016350126535879018, 'time_algorithm_update': 0.006045953028619626, 'loss': 0.005012907026635515, 'time_step': 0.006282949178232311, 'init_value': -1.4656286239624023, 'ave_value': -1.010622177561661, 'soft_opc': nan} step=7788




2022-04-22 04:49.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_7788.pt


Epoch 45/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.48 [info     ] FQE_20220422044854: epoch=45 step=7965 epoch=45 metrics={'time_sample_batch': 0.00016124100334900248, 'time_algorithm_update': 0.006042410424873653, 'loss': 0.005049484738511807, 'time_step': 0.00627165864416435, 'init_value': -1.5164722204208374, 'ave_value': -1.0466834473448832, 'soft_opc': nan} step=7965




2022-04-22 04:49.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_7965.pt


Epoch 46/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.49 [info     ] FQE_20220422044854: epoch=46 step=8142 epoch=46 metrics={'time_sample_batch': 0.00016319819089383056, 'time_algorithm_update': 0.006031960417321847, 'loss': 0.005433912186404574, 'time_step': 0.006272701220323811, 'init_value': -1.5915287733078003, 'ave_value': -1.0988605220128107, 'soft_opc': nan} step=8142




2022-04-22 04:49.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_8142.pt


Epoch 47/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.50 [info     ] FQE_20220422044854: epoch=47 step=8319 epoch=47 metrics={'time_sample_batch': 0.00016047186770681607, 'time_algorithm_update': 0.006049521225320417, 'loss': 0.0052262233942812885, 'time_step': 0.006281039135604255, 'init_value': -1.5733643770217896, 'ave_value': -1.0744164155454965, 'soft_opc': nan} step=8319




2022-04-22 04:49.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_8319.pt


Epoch 48/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.51 [info     ] FQE_20220422044854: epoch=48 step=8496 epoch=48 metrics={'time_sample_batch': 0.00016481997603076998, 'time_algorithm_update': 0.006118049729341841, 'loss': 0.005435704945364314, 'time_step': 0.006357500108621888, 'init_value': -1.5816599130630493, 'ave_value': -1.0756638414240456, 'soft_opc': nan} step=8496




2022-04-22 04:49.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_8496.pt


Epoch 49/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.53 [info     ] FQE_20220422044854: epoch=49 step=8673 epoch=49 metrics={'time_sample_batch': 0.00016241962626828985, 'time_algorithm_update': 0.0060772814993130956, 'loss': 0.005469461691521087, 'time_step': 0.0063127730525819594, 'init_value': -1.6283942461013794, 'ave_value': -1.1095313760119156, 'soft_opc': nan} step=8673




2022-04-22 04:49.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_8673.pt


Epoch 50/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 04:49.54 [info     ] FQE_20220422044854: epoch=50 step=8850 epoch=50 metrics={'time_sample_batch': 0.00016361845415190787, 'time_algorithm_update': 0.005921335543616344, 'loss': 0.005749106126650642, 'time_step': 0.006154048240790933, 'init_value': -1.6951364278793335, 'ave_value': -1.1558691334975015, 'soft_opc': nan} step=8850




2022-04-22 04:49.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044854/model_8850.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-22 04:49.54 [info     ] Directory is created at d3rlpy_logs/FQE_20220422044954
2022-04-22 04:49.54 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 04:49.54 [debug    ] Building models...
2022-04-22 04:49.54 [debug    ] Models have been built.
2022-04-22 04:49.54 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422044954/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 04:49.56 [info     ] FQE_20220422044954: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016839975534483443, 'time_algorithm_update': 0.006050080060958862, 'loss': 0.021507359421162237, 'time_step': 0.006292856016824412, 'init_value': -1.0684486627578735, 'ave_value': -1.1069591639076803, 'soft_opc': nan} step=344




2022-04-22 04:49.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:49.59 [info     ] FQE_20220422044954: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00016576121019762615, 'time_algorithm_update': 0.005987784890241401, 'loss': 0.02026928616904242, 'time_step': 0.006225667027539985, 'init_value': -1.864067554473877, 'ave_value': -1.9615232350858482, 'soft_opc': nan} step=688




2022-04-22 04:49.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:50.01 [info     ] FQE_20220422044954: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016732548558434776, 'time_algorithm_update': 0.006048685589502024, 'loss': 0.02233770633722807, 'time_step': 0.0062873509041098665, 'init_value': -2.6931159496307373, 'ave_value': -2.944022503723432, 'soft_opc': nan} step=1032




2022-04-22 04:50.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:50.03 [info     ] FQE_20220422044954: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.0001722387103147285, 'time_algorithm_update': 0.006034894738086435, 'loss': 0.02441494388852355, 'time_step': 0.006282278271608575, 'init_value': -3.2976064682006836, 'ave_value': -3.782880761360263, 'soft_opc': nan} step=1376




2022-04-22 04:50.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:50.06 [info     ] FQE_20220422044954: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00016678488531778025, 'time_algorithm_update': 0.006026475235473278, 'loss': 0.03223851929028887, 'time_step': 0.006269073763559031, 'init_value': -4.042573928833008, 'ave_value': -4.911442366242409, 'soft_opc': nan} step=1720




2022-04-22 04:50.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:50.08 [info     ] FQE_20220422044954: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00016968818598015365, 'time_algorithm_update': 0.006130529004474019, 'loss': 0.04153194324503284, 'time_step': 0.006379894738973573, 'init_value': -4.3887248039245605, 'ave_value': -5.759004316861565, 'soft_opc': nan} step=2064




2022-04-22 04:50.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:50.11 [info     ] FQE_20220422044954: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001676027164902798, 'time_algorithm_update': 0.006107127943704295, 'loss': 0.056241382043375525, 'time_step': 0.006349957266519236, 'init_value': -4.8937811851501465, 'ave_value': -6.892443650907224, 'soft_opc': nan} step=2408




2022-04-22 04:50.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:50.13 [info     ] FQE_20220422044954: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00017670559328655864, 'time_algorithm_update': 0.006293734145718951, 'loss': 0.0709962289574621, 'time_step': 0.006548307662786439, 'init_value': -5.028049468994141, 'ave_value': -7.720843049991238, 'soft_opc': nan} step=2752




2022-04-22 04:50.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:50.15 [info     ] FQE_20220422044954: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00016878441322681515, 'time_algorithm_update': 0.006072749232136926, 'loss': 0.0812563770883825, 'time_step': 0.006318612154140029, 'init_value': -5.27500057220459, 'ave_value': -8.637471910492257, 'soft_opc': nan} step=3096




2022-04-22 04:50.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:50.18 [info     ] FQE_20220422044954: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.0001685231230979742, 'time_algorithm_update': 0.006037407143171443, 'loss': 0.0967141830275745, 'time_step': 0.006280708451603734, 'init_value': -5.469540119171143, 'ave_value': -9.514719947534907, 'soft_opc': nan} step=3440




2022-04-22 04:50.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:50.20 [info     ] FQE_20220422044954: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016517625298610952, 'time_algorithm_update': 0.00608490927274837, 'loss': 0.10006242560569284, 'time_step': 0.006319009980490041, 'init_value': -5.698538303375244, 'ave_value': -10.410703980076958, 'soft_opc': nan} step=3784




2022-04-22 04:50.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:50.22 [info     ] FQE_20220422044954: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.0001731688200041305, 'time_algorithm_update': 0.006213569363882375, 'loss': 0.11273639397912247, 'time_step': 0.00646325945854187, 'init_value': -6.120589256286621, 'ave_value': -11.406024756777596, 'soft_opc': nan} step=4128




2022-04-22 04:50.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:50.25 [info     ] FQE_20220422044954: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00017135365064754041, 'time_algorithm_update': 0.006082979745643083, 'loss': 0.11429976991399429, 'time_step': 0.006327912557956784, 'init_value': -6.191299915313721, 'ave_value': -12.167884176079738, 'soft_opc': nan} step=4472




2022-04-22 04:50.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:50.27 [info     ] FQE_20220422044954: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00017147355301435605, 'time_algorithm_update': 0.006208807923072992, 'loss': 0.1227640108595234, 'time_step': 0.006459350502768228, 'init_value': -6.731102466583252, 'ave_value': -13.23071096399883, 'soft_opc': nan} step=4816




2022-04-22 04:50.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:50.30 [info     ] FQE_20220422044954: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.0001715095930321272, 'time_algorithm_update': 0.006815265777499177, 'loss': 0.13098986201573076, 'time_step': 0.007064546956572422, 'init_value': -6.756930351257324, 'ave_value': -13.767784417522988, 'soft_opc': nan} step=5160




2022-04-22 04:50.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:50.32 [info     ] FQE_20220422044954: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00018097356308338254, 'time_algorithm_update': 0.0064856971419134804, 'loss': 0.13853605236414127, 'time_step': 0.006748371346052303, 'init_value': -7.13131046295166, 'ave_value': -14.586898859439897, 'soft_opc': nan} step=5504




2022-04-22 04:50.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:50.35 [info     ] FQE_20220422044954: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00017453210298405138, 'time_algorithm_update': 0.00696467868117399, 'loss': 0.14647843534137708, 'time_step': 0.00721824238466662, 'init_value': -7.1106181144714355, 'ave_value': -14.91711829505349, 'soft_opc': nan} step=5848




2022-04-22 04:50.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:50.38 [info     ] FQE_20220422044954: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00018148158871850303, 'time_algorithm_update': 0.006817119066105332, 'loss': 0.15903728621264615, 'time_step': 0.007076145604599354, 'init_value': -7.499945163726807, 'ave_value': -15.773493613123156, 'soft_opc': nan} step=6192




2022-04-22 04:50.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:50.40 [info     ] FQE_20220422044954: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00017321109771728516, 'time_algorithm_update': 0.006554388029630794, 'loss': 0.16537369950133007, 'time_step': 0.006803702476412751, 'init_value': -7.869137763977051, 'ave_value': -16.725642936889226, 'soft_opc': nan} step=6536




2022-04-22 04:50.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:50.43 [info     ] FQE_20220422044954: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00017126285752584768, 'time_algorithm_update': 0.006816142520239187, 'loss': 0.17547443732186113, 'time_step': 0.0070642634879711065, 'init_value': -7.962214469909668, 'ave_value': -17.358863928605725, 'soft_opc': nan} step=6880




2022-04-22 04:50.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:50.45 [info     ] FQE_20220422044954: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.0001690429310465968, 'time_algorithm_update': 0.00640622197195541, 'loss': 0.18072508799648562, 'time_step': 0.006651127061178518, 'init_value': -8.059650421142578, 'ave_value': -17.96829486939561, 'soft_opc': nan} step=7224




2022-04-22 04:50.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:50.48 [info     ] FQE_20220422044954: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.0001701726469882699, 'time_algorithm_update': 0.006805056056310964, 'loss': 0.1973282300628895, 'time_step': 0.007054396840028985, 'init_value': -8.087847709655762, 'ave_value': -18.47309996394172, 'soft_opc': nan} step=7568




2022-04-22 04:50.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:50.50 [info     ] FQE_20220422044954: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00017386259034622546, 'time_algorithm_update': 0.006773335989131484, 'loss': 0.20850983457260786, 'time_step': 0.007027568512184675, 'init_value': -8.442665100097656, 'ave_value': -19.360011587387184, 'soft_opc': nan} step=7912




2022-04-22 04:50.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:50.53 [info     ] FQE_20220422044954: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00018078712529914323, 'time_algorithm_update': 0.006585547397303027, 'loss': 0.21828090546511894, 'time_step': 0.006844581559646961, 'init_value': -8.205788612365723, 'ave_value': -19.642709623585112, 'soft_opc': nan} step=8256




2022-04-22 04:50.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:50.56 [info     ] FQE_20220422044954: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00017065710799638614, 'time_algorithm_update': 0.006832746572272722, 'loss': 0.23338570038593093, 'time_step': 0.007080828727677811, 'init_value': -8.55289077758789, 'ave_value': -20.098907086783644, 'soft_opc': nan} step=8600




2022-04-22 04:50.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:50.58 [info     ] FQE_20220422044954: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00017183811165565668, 'time_algorithm_update': 0.006384502316630164, 'loss': 0.2553279111378415, 'time_step': 0.006634247164393581, 'init_value': -9.027413368225098, 'ave_value': -20.820003191561664, 'soft_opc': nan} step=8944




2022-04-22 04:50.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:51.01 [info     ] FQE_20220422044954: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00017291446064793788, 'time_algorithm_update': 0.006893578656884127, 'loss': 0.2610767507806507, 'time_step': 0.007144893324652383, 'init_value': -9.628889083862305, 'ave_value': -21.530485587103318, 'soft_opc': nan} step=9288




2022-04-22 04:51.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:51.03 [info     ] FQE_20220422044954: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.0001714014729788137, 'time_algorithm_update': 0.006617954997129218, 'loss': 0.28594282150896655, 'time_step': 0.00687355496162592, 'init_value': -9.87710189819336, 'ave_value': -21.86744525701999, 'soft_opc': nan} step=9632




2022-04-22 04:51.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:51.06 [info     ] FQE_20220422044954: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00017304545225099076, 'time_algorithm_update': 0.006710227145705112, 'loss': 0.29724417273821524, 'time_step': 0.00695992486421452, 'init_value': -10.131306648254395, 'ave_value': -22.4037250062241, 'soft_opc': nan} step=9976




2022-04-22 04:51.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:51.08 [info     ] FQE_20220422044954: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00017098909200623978, 'time_algorithm_update': 0.006795218517613965, 'loss': 0.3204404704305235, 'time_step': 0.007042087787805602, 'init_value': -10.580045700073242, 'ave_value': -22.887845901077245, 'soft_opc': nan} step=10320




2022-04-22 04:51.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:51.11 [info     ] FQE_20220422044954: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.0001684843107711437, 'time_algorithm_update': 0.006403206392776134, 'loss': 0.33665138957372237, 'time_step': 0.006648001975791399, 'init_value': -10.755964279174805, 'ave_value': -23.23524748573839, 'soft_opc': nan} step=10664




2022-04-22 04:51.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:51.14 [info     ] FQE_20220422044954: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.000170368094776952, 'time_algorithm_update': 0.006823322107625562, 'loss': 0.34222411663747976, 'time_step': 0.007071246241414269, 'init_value': -10.599220275878906, 'ave_value': -23.2408671339785, 'soft_opc': nan} step=11008




2022-04-22 04:51.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:51.16 [info     ] FQE_20220422044954: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.0001737780349199162, 'time_algorithm_update': 0.0066837155541708305, 'loss': 0.36369082175173556, 'time_step': 0.006934481997822606, 'init_value': -10.292118072509766, 'ave_value': -23.21655695570616, 'soft_opc': nan} step=11352




2022-04-22 04:51.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:51.19 [info     ] FQE_20220422044954: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00017398110655851142, 'time_algorithm_update': 0.006772541722585988, 'loss': 0.36766577704787945, 'time_step': 0.0070261497830235685, 'init_value': -10.805932998657227, 'ave_value': -23.764830869706852, 'soft_opc': nan} step=11696




2022-04-22 04:51.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:51.21 [info     ] FQE_20220422044954: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00017226712648258654, 'time_algorithm_update': 0.0068550130655599196, 'loss': 0.39465443184057813, 'time_step': 0.007107793591743292, 'init_value': -11.356471061706543, 'ave_value': -24.233179027345955, 'soft_opc': nan} step=12040




2022-04-22 04:51.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:51.24 [info     ] FQE_20220422044954: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00017503597015558288, 'time_algorithm_update': 0.007420720749123152, 'loss': 0.41457050260113076, 'time_step': 0.007674352374187735, 'init_value': -11.633956909179688, 'ave_value': -24.603780451959402, 'soft_opc': nan} step=12384




2022-04-22 04:51.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:51.27 [info     ] FQE_20220422044954: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00017517943714940272, 'time_algorithm_update': 0.0078064230985419696, 'loss': 0.4331278922090437, 'time_step': 0.008058311634285505, 'init_value': -11.809635162353516, 'ave_value': -24.77322406762632, 'soft_opc': nan} step=12728




2022-04-22 04:51.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:51.30 [info     ] FQE_20220422044954: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00017471923384555551, 'time_algorithm_update': 0.007447634325471035, 'loss': 0.44567244193451694, 'time_step': 0.007700101580730704, 'init_value': -11.765233993530273, 'ave_value': -24.704500189230583, 'soft_opc': nan} step=13072




2022-04-22 04:51.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:51.33 [info     ] FQE_20220422044954: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.0001781173916750176, 'time_algorithm_update': 0.00784440511880919, 'loss': 0.4585101447598792, 'time_step': 0.008098989725112915, 'init_value': -12.191459655761719, 'ave_value': -25.263878689456714, 'soft_opc': nan} step=13416




2022-04-22 04:51.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:51.36 [info     ] FQE_20220422044954: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00017385358034178268, 'time_algorithm_update': 0.007469857154890548, 'loss': 0.4816803388665755, 'time_step': 0.007722948179688565, 'init_value': -12.251996994018555, 'ave_value': -25.24585261314642, 'soft_opc': nan} step=13760




2022-04-22 04:51.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:51.39 [info     ] FQE_20220422044954: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00017449883527533952, 'time_algorithm_update': 0.007544030283772668, 'loss': 0.5042254078949173, 'time_step': 0.007794236720994462, 'init_value': -12.086647033691406, 'ave_value': -25.104082138568312, 'soft_opc': nan} step=14104




2022-04-22 04:51.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:51.42 [info     ] FQE_20220422044954: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.000177022329596586, 'time_algorithm_update': 0.007911335590273835, 'loss': 0.5236977094589451, 'time_step': 0.008168589930201685, 'init_value': -12.196405410766602, 'ave_value': -25.204326887404218, 'soft_opc': nan} step=14448




2022-04-22 04:51.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:51.45 [info     ] FQE_20220422044954: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00017536656801090685, 'time_algorithm_update': 0.007547432600065719, 'loss': 0.5315507392312379, 'time_step': 0.007799809062203696, 'init_value': -12.45358657836914, 'ave_value': -25.24024582843187, 'soft_opc': nan} step=14792




2022-04-22 04:51.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:51.48 [info     ] FQE_20220422044954: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001778089722921682, 'time_algorithm_update': 0.007599642110425372, 'loss': 0.5586780913980938, 'time_step': 0.007853895425796509, 'init_value': -12.777835845947266, 'ave_value': -25.521408315437956, 'soft_opc': nan} step=15136




2022-04-22 04:51.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:51.51 [info     ] FQE_20220422044954: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00017863581346911053, 'time_algorithm_update': 0.007663659578145936, 'loss': 0.5748070832612643, 'time_step': 0.00792279700900233, 'init_value': -13.334402084350586, 'ave_value': -26.031440666217257, 'soft_opc': nan} step=15480




2022-04-22 04:51.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:51.54 [info     ] FQE_20220422044954: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00017645123393036599, 'time_algorithm_update': 0.007799722427545592, 'loss': 0.587833481537568, 'time_step': 0.008056596268055051, 'init_value': -13.001874923706055, 'ave_value': -25.850995991454536, 'soft_opc': nan} step=15824




2022-04-22 04:51.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:51.57 [info     ] FQE_20220422044954: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00017837244410847509, 'time_algorithm_update': 0.007916507332823997, 'loss': 0.617889046138369, 'time_step': 0.008175002281055894, 'init_value': -13.668818473815918, 'ave_value': -26.420837117158722, 'soft_opc': nan} step=16168




2022-04-22 04:51.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:51.59 [info     ] FQE_20220422044954: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00016965561134870663, 'time_algorithm_update': 0.007319080275158549, 'loss': 0.6375189971417016, 'time_step': 0.007567493721496227, 'init_value': -13.811402320861816, 'ave_value': -26.3369984826008, 'soft_opc': nan} step=16512




2022-04-22 04:51.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:52.02 [info     ] FQE_20220422044954: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.0001809222753657851, 'time_algorithm_update': 0.007830231688743415, 'loss': 0.6521178073043999, 'time_step': 0.008090369923170223, 'init_value': -14.43007755279541, 'ave_value': -26.75473582682846, 'soft_opc': nan} step=16856




2022-04-22 04:52.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 04:52.05 [info     ] FQE_20220422044954: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00017406704813935036, 'time_algorithm_update': 0.00745916158653969, 'loss': 0.6697926231779071, 'time_step': 0.007709212774454161, 'init_value': -14.804027557373047, 'ave_value': -27.278089136084926, 'soft_opc': nan} step=17200




2022-04-22 04:52.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422044954/model_17200.pt
search iteration:  20
using hyper params:  [0.009964022862240704, 0.007644229022148978, 7.624075055528004e-05, 5]
2022-04-22 04:52.05 [debug    ] RoundIterator is selected.
2022-04-22 04:52.05 [info     ] Directory is created at d3rlpy_logs/CQL_20220422045205
2022-04-22 04:52.05 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 04:52.05 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 04:52.05 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422045205/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.009964022862240704, 'actor_optim_factory': {'optim

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:52.25 [info     ] CQL_20220422045205: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.00042462142216676923, 'time_algorithm_update': 0.055128666017785925, 'temp_loss': 4.94658809038945, 'temp': 0.9866988421175521, 'alpha_loss': -17.767985906215074, 'alpha': 1.0176861262045844, 'critic_loss': 98.74467441801391, 'actor_loss': 4.008803851665617, 'time_step': 0.055656331812026186, 'td_error': 1.3005720992317296, 'init_value': -7.640960693359375, 'ave_value': -6.948562948468625} step=346
2022-04-22 04:52.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:52.45 [info     ] CQL_20220422045205: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.0004196931861039531, 'time_algorithm_update': 0.05519790869916795, 'temp_loss': 4.850033343871894, 'temp': 0.9611383087373193, 'alpha_loss': -18.403513285466012, 'alpha': 1.0541023295049723, 'critic_loss': 185.22853123659343, 'actor_loss': 8.11431097295243, 'time_step': 0.05571973392729125, 'td_error': 1.368025648207456, 'init_value': -9.957201957702637, 'ave_value': -9.140070462102514} step=692
2022-04-22 04:52.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:53.06 [info     ] CQL_20220422045205: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.0004217920964852923, 'time_algorithm_update': 0.05505340430088815, 'temp_loss': 4.728144684278896, 'temp': 0.9366019297541911, 'alpha_loss': -19.05599515837741, 'alpha': 1.0923492429573411, 'critic_loss': 395.6891224591029, 'actor_loss': 9.548260608849498, 'time_step': 0.05557278401589807, 'td_error': 1.3474030660058656, 'init_value': -9.893611907958984, 'ave_value': -9.31318614730455} step=1038
2022-04-22 04:53.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:53.26 [info     ] CQL_20220422045205: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.000426049866428265, 'time_algorithm_update': 0.055500730613752595, 'temp_loss': 4.609043110312754, 'temp': 0.912935150841068, 'alpha_loss': -19.716900323856773, 'alpha': 1.1324520183436444, 'critic_loss': 730.2201306999074, 'actor_loss': 7.1213454276840125, 'time_step': 0.05603117267520442, 'td_error': 1.2946565484597823, 'init_value': -7.013574600219727, 'ave_value': -6.8045116525851705} step=1384
2022-04-22 04:53.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:53.46 [info     ] CQL_20220422045205: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.00042836859047068335, 'time_algorithm_update': 0.05585944928185788, 'temp_loss': 4.495446159660472, 'temp': 0.8900562602316024, 'alpha_loss': -20.429974274828254, 'alpha': 1.174457538679156, 'critic_loss': 1149.4345225075076, 'actor_loss': 5.152461932573704, 'time_step': 0.056388776426370434, 'td_error': 1.3021070227693514, 'init_value': -6.509466648101807, 'ave_value': -6.402144406722472} step=1730
2022-04-22 04:53.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:54.06 [info     ] CQL_20220422045205: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.0004276505784492272, 'time_algorithm_update': 0.055373297950435926, 'temp_loss': 4.383373693234659, 'temp': 0.8679063525158546, 'alpha_loss': -21.192589588937043, 'alpha': 1.2184488949059062, 'critic_loss': 1550.377136583273, 'actor_loss': 5.146983528412836, 'time_step': 0.05590112636544112, 'td_error': 1.3158184736196539, 'init_value': -6.814698219299316, 'ave_value': -6.742203702605579} step=2076
2022-04-22 04:54.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:54.26 [info     ] CQL_20220422045205: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.0004266590052257384, 'time_algorithm_update': 0.05554990892465404, 'temp_loss': 4.274164972966806, 'temp': 0.8464355449800547, 'alpha_loss': -21.990999310002852, 'alpha': 1.2644633699014696, 'critic_loss': 1928.508819403676, 'actor_loss': 5.585619467531325, 'time_step': 0.05607797369102522, 'td_error': 1.3278140499217705, 'init_value': -7.2398576736450195, 'ave_value': -7.192347156814966} step=2422
2022-04-22 04:54.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:54.46 [info     ] CQL_20220422045205: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.0004209162871961649, 'time_algorithm_update': 0.05449680648098102, 'temp_loss': 4.169901606664492, 'temp': 0.8255941662829736, 'alpha_loss': -22.82678608536031, 'alpha': 1.3125327222609107, 'critic_loss': 2290.077186540372, 'actor_loss': 6.168262007608579, 'time_step': 0.055018019814022705, 'td_error': 1.3444157917722597, 'init_value': -7.989383220672607, 'ave_value': -7.945831552689083} step=2768
2022-04-22 04:54.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:55.06 [info     ] CQL_20220422045205: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.00041172614676414886, 'time_algorithm_update': 0.055129784380080385, 'temp_loss': 4.066759041968108, 'temp': 0.80534807795045, 'alpha_loss': -23.69768504324676, 'alpha': 1.3626983262210912, 'critic_loss': 2612.0381226291547, 'actor_loss': 6.871861354464051, 'time_step': 0.05564270543225239, 'td_error': 1.3608283886200372, 'init_value': -8.506839752197266, 'ave_value': -8.48679772897488} step=3114
2022-04-22 04:55.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:55.26 [info     ] CQL_20220422045205: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.00042160604730506854, 'time_algorithm_update': 0.05521551446418542, 'temp_loss': 3.9674765918985266, 'temp': 0.7856596804767675, 'alpha_loss': -24.6074294051683, 'alpha': 1.4150012683317152, 'critic_loss': 2880.169880244084, 'actor_loss': 7.6295997779493385, 'time_step': 0.05573476876826645, 'td_error': 1.382400998372081, 'init_value': -9.523634910583496, 'ave_value': -9.496324520282917} step=3460
2022-04-22 04:55.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:55.46 [info     ] CQL_20220422045205: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.0004236711932055523, 'time_algorithm_update': 0.05513576413854698, 'temp_loss': 3.8707253099176926, 'temp': 0.7665038525713661, 'alpha_loss': -25.55458252416181, 'alpha': 1.4694953395452113, 'critic_loss': 3120.2349966413026, 'actor_loss': 8.495790779246072, 'time_step': 0.05566373450218597, 'td_error': 1.404239518294556, 'init_value': -10.325211524963379, 'ave_value': -10.304231567256199} step=3806
2022-04-22 04:55.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:56.07 [info     ] CQL_20220422045205: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.0004228070981240686, 'time_algorithm_update': 0.05535404048214069, 'temp_loss': 3.7766177268386576, 'temp': 0.7478500570175964, 'alpha_loss': -26.539107730622927, 'alpha': 1.5262319666112778, 'critic_loss': 3326.7578520140896, 'actor_loss': 9.483525722702115, 'time_step': 0.055880911777474285, 'td_error': 1.4316573741288259, 'init_value': -11.22813606262207, 'ave_value': -11.21779975597188} step=4152
2022-04-22 04:56.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:56.27 [info     ] CQL_20220422045205: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.00040771230796858067, 'time_algorithm_update': 0.05485783009170797, 'temp_loss': 3.6845695180010933, 'temp': 0.7296779279763987, 'alpha_loss': -27.564415534796744, 'alpha': 1.585274428301464, 'critic_loss': 3526.5853850083545, 'actor_loss': 10.512008978452297, 'time_step': 0.055357571282138716, 'td_error': 1.46394056909925, 'init_value': -12.430910110473633, 'ave_value': -12.413691330408247} step=4498
2022-04-22 04:56.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:56.46 [info     ] CQL_20220422045205: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.000411520114523827, 'time_algorithm_update': 0.053233839183873526, 'temp_loss': 3.5948607990507444, 'temp': 0.711973101589721, 'alpha_loss': -28.636584689851443, 'alpha': 1.6466990068468745, 'critic_loss': 3692.918157853143, 'actor_loss': 11.637054561879593, 'time_step': 0.053740746024027036, 'td_error': 1.499584295835415, 'init_value': -13.436361312866211, 'ave_value': -13.43147189464542} step=4844
2022-04-22 04:56.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:57.05 [info     ] CQL_20220422045205: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.000439588734180252, 'time_algorithm_update': 0.05366890830111641, 'temp_loss': 3.5076207418662273, 'temp': 0.6947171531316173, 'alpha_loss': -29.742982131208297, 'alpha': 1.7105769749321689, 'critic_loss': 3877.7758436258127, 'actor_loss': 12.841669041297338, 'time_step': 0.054199547436885064, 'td_error': 1.5387912897334932, 'init_value': -14.55374813079834, 'ave_value': -14.552056316178577} step=5190
2022-04-22 04:57.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:57.25 [info     ] CQL_20220422045205: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.00041602112654316633, 'time_algorithm_update': 0.052948631992229836, 'temp_loss': 3.4224557470035, 'temp': 0.6778880802193129, 'alpha_loss': -30.897483913884688, 'alpha': 1.776986454952659, 'critic_loss': 3995.061129002213, 'actor_loss': 14.053632518459606, 'time_step': 0.053457478567354944, 'td_error': 1.5884386377979482, 'init_value': -16.12284278869629, 'ave_value': -16.107137135251996} step=5536
2022-04-22 04:57.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:57.44 [info     ] CQL_20220422045205: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.00041667712217121456, 'time_algorithm_update': 0.05280876297482176, 'temp_loss': 3.3405978721001244, 'temp': 0.6614769524921572, 'alpha_loss': -32.10164153369176, 'alpha': 1.8460275964929878, 'critic_loss': 4074.239988117549, 'actor_loss': 15.320589864874162, 'time_step': 0.053319692611694336, 'td_error': 1.6333016216603868, 'init_value': -17.11321449279785, 'ave_value': -17.112195726885734} step=5882
2022-04-22 04:57.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:58.03 [info     ] CQL_20220422045205: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.00041988681506559337, 'time_algorithm_update': 0.05296882521899449, 'temp_loss': 3.259760241977052, 'temp': 0.6454682251966068, 'alpha_loss': -33.34491735248896, 'alpha': 1.9177817495572085, 'critic_loss': 4075.932554388322, 'actor_loss': 16.604435419071617, 'time_step': 0.05348254559357042, 'td_error': 1.68422322019394, 'init_value': -18.26404571533203, 'ave_value': -18.267226668928064} step=6228
2022-04-22 04:58.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:58.22 [info     ] CQL_20220422045205: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.00040137629977540475, 'time_algorithm_update': 0.05130410263304076, 'temp_loss': 3.1803517086657487, 'temp': 0.629853555405071, 'alpha_loss': -34.648654243160536, 'alpha': 1.9923587272621992, 'critic_loss': 4083.9587148324604, 'actor_loss': 17.959566066719894, 'time_step': 0.05179781583003226, 'td_error': 1.7437699052047366, 'init_value': -19.725616455078125, 'ave_value': -19.7225039919387} step=6574
2022-04-22 04:58.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:58.41 [info     ] CQL_20220422045205: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.00041606453801855186, 'time_algorithm_update': 0.053230238787700675, 'temp_loss': 3.1040740323204528, 'temp': 0.6146195477488413, 'alpha_loss': -35.99371821894122, 'alpha': 2.0698563232587253, 'critic_loss': 4025.7936687138726, 'actor_loss': 19.268340320256403, 'time_step': 0.05374110503003776, 'td_error': 1.8052036200491315, 'init_value': -21.104598999023438, 'ave_value': -21.09859168838132} step=6920
2022-04-22 04:58.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:59.01 [info     ] CQL_20220422045205: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.0004149117221722024, 'time_algorithm_update': 0.05359387259951906, 'temp_loss': 3.0275944305982203, 'temp': 0.599760812658795, 'alpha_loss': -37.391983792960986, 'alpha': 2.150380877401098, 'critic_loss': 4020.4482873464594, 'actor_loss': 20.61972748750896, 'time_step': 0.05410526046863181, 'td_error': 1.871702107739342, 'init_value': -22.508529663085938, 'ave_value': -22.497375894259818} step=7266
2022-04-22 04:59.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:59.20 [info     ] CQL_20220422045205: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.00041288102982361193, 'time_algorithm_update': 0.05401639028780722, 'temp_loss': 2.954915816384244, 'temp': 0.5852643487425898, 'alpha_loss': -38.85106278568334, 'alpha': 2.234051025671766, 'critic_loss': 4061.743254380419, 'actor_loss': 21.945214392821914, 'time_step': 0.05452717315254873, 'td_error': 1.9338142279194168, 'init_value': -23.548612594604492, 'ave_value': -23.54928193596541} step=7612
2022-04-22 04:59.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:59.40 [info     ] CQL_20220422045205: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.0004140889713529907, 'time_algorithm_update': 0.05427705552536628, 'temp_loss': 2.884240798867507, 'temp': 0.5711150167649881, 'alpha_loss': -40.36169716939761, 'alpha': 2.3209918682285817, 'critic_loss': 3952.695718225027, 'actor_loss': 23.23035837735744, 'time_step': 0.05478840205021676, 'td_error': 2.004934323354801, 'init_value': -24.944442749023438, 'ave_value': -24.94183960159902} step=7958
2022-04-22 04:59.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 04:59.59 [info     ] CQL_20220422045205: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.000412475166982309, 'time_algorithm_update': 0.053369663354289325, 'temp_loss': 2.8141426619766765, 'temp': 0.5573106449463464, 'alpha_loss': -41.93210682290138, 'alpha': 2.411316408587329, 'critic_loss': 3816.012105423591, 'actor_loss': 24.500746787627996, 'time_step': 0.05387719931630041, 'td_error': 2.075109346766397, 'init_value': -26.21201515197754, 'ave_value': -26.204554839968512} step=8304
2022-04-22 04:59.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:00.19 [info     ] CQL_20220422045205: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.00041463471561498035, 'time_algorithm_update': 0.053157773321074554, 'temp_loss': 2.7463027383550744, 'temp': 0.5438416262582547, 'alpha_loss': -43.56177635413374, 'alpha': 2.505163744694925, 'critic_loss': 3666.439767826499, 'actor_loss': 25.765865981923362, 'time_step': 0.05367200912078681, 'td_error': 2.1466493020396507, 'init_value': -27.353309631347656, 'ave_value': -27.353330494522087} step=8650
2022-04-22 05:00.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:00.38 [info     ] CQL_20220422045205: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.0004172097740834848, 'time_algorithm_update': 0.05395142191407309, 'temp_loss': 2.6796033975016864, 'temp': 0.5306982124127404, 'alpha_loss': -45.25575526463503, 'alpha': 2.6026598304682382, 'critic_loss': 3658.307721617594, 'actor_loss': 27.067285653483662, 'time_step': 0.05446438706679151, 'td_error': 2.219057286210661, 'init_value': -28.492507934570312, 'ave_value': -28.4931945199365} step=8996
2022-04-22 05:00.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:00.58 [info     ] CQL_20220422045205: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.00044060029046383894, 'time_algorithm_update': 0.05529655197452259, 'temp_loss': 2.6151679774929333, 'temp': 0.5178723926144528, 'alpha_loss': -47.02408455424226, 'alpha': 2.7039616183738486, 'critic_loss': 3459.034812618542, 'actor_loss': 28.21498128306659, 'time_step': 0.05583377380591596, 'td_error': 2.29695274213831, 'init_value': -29.794696807861328, 'ave_value': -29.78910538577084} step=9342
2022-04-22 05:00.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:01.19 [info     ] CQL_20220422045205: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.0004190812910223283, 'time_algorithm_update': 0.05567752488086678, 'temp_loss': 2.5524622057214637, 'temp': 0.5053563125733006, 'alpha_loss': -48.847962506244635, 'alpha': 2.8092191295127646, 'critic_loss': 3299.2405984691113, 'actor_loss': 29.41438307789709, 'time_step': 0.05619376381008611, 'td_error': 2.3734400758072107, 'init_value': -30.969097137451172, 'ave_value': -30.962833464140008} step=9688
2022-04-22 05:01.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:01.39 [info     ] CQL_20220422045205: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.00041914468555781195, 'time_algorithm_update': 0.05513975868335349, 'temp_loss': 2.490306687492856, 'temp': 0.4931425697713918, 'alpha_loss': -50.74818585787205, 'alpha': 2.9185687923707024, 'critic_loss': 3143.35853320877, 'actor_loss': 30.57780232181439, 'time_step': 0.05565608512459463, 'td_error': 2.44646213530559, 'init_value': -31.995994567871094, 'ave_value': -31.992461891409455} step=10034
2022-04-22 05:01.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:01.59 [info     ] CQL_20220422045205: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.00041022397190160144, 'time_algorithm_update': 0.05527472496032715, 'temp_loss': 2.4305673134809282, 'temp': 0.4812256860078415, 'alpha_loss': -52.72257110286999, 'alpha': 3.032180376824616, 'critic_loss': 3015.5551694307715, 'actor_loss': 31.686146532179993, 'time_step': 0.05578006898736678, 'td_error': 2.518608729214633, 'init_value': -32.95844650268555, 'ave_value': -32.96068776502831} step=10380
2022-04-22 05:01.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:02.19 [info     ] CQL_20220422045205: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.00044700244947665, 'time_algorithm_update': 0.055323369241174244, 'temp_loss': 2.3709059960580285, 'temp': 0.46959731895799584, 'alpha_loss': -54.779415130615234, 'alpha': 3.150222252559111, 'critic_loss': 2888.262006638367, 'actor_loss': 32.721102863377915, 'time_step': 0.05586791658677118, 'td_error': 2.588025551130971, 'init_value': -33.890899658203125, 'ave_value': -33.89479290856858} step=10726
2022-04-22 05:02.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:02.39 [info     ] CQL_20220422045205: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.00042256385604770195, 'time_algorithm_update': 0.05493198860587412, 'temp_loss': 2.3139839909669293, 'temp': 0.458251418836544, 'alpha_loss': -56.90661559352985, 'alpha': 3.2728547119680855, 'critic_loss': 2772.9503385510748, 'actor_loss': 33.707453645033645, 'time_step': 0.05544931626733328, 'td_error': 2.665441486854025, 'init_value': -35.02831268310547, 'ave_value': -35.02518242320047} step=11072
2022-04-22 05:02.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:02.58 [info     ] CQL_20220422045205: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.0004126722412991386, 'time_algorithm_update': 0.051781909314194165, 'temp_loss': 2.2576604144421615, 'temp': 0.44717889068099115, 'alpha_loss': -59.12503321873659, 'alpha': 3.4002566261787637, 'critic_loss': 2797.1562782243495, 'actor_loss': 34.669161824132665, 'time_step': 0.052286874352162976, 'td_error': 2.7383364926356357, 'init_value': -36.00114440917969, 'ave_value': -35.99453610166547} step=11418
2022-04-22 05:02.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:03.16 [info     ] CQL_20220422045205: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.0004131766412988564, 'time_algorithm_update': 0.05143824684826625, 'temp_loss': 2.203580150714499, 'temp': 0.43637392252166834, 'alpha_loss': -61.43577856824577, 'alpha': 3.532629590503053, 'critic_loss': 2810.806896055365, 'actor_loss': 35.60526077182307, 'time_step': 0.05194876717694233, 'td_error': 2.8090874587309953, 'init_value': -36.858062744140625, 'ave_value': -36.85539984614886} step=11764
2022-04-22 05:03.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:03.35 [info     ] CQL_20220422045205: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.00043236864784549424, 'time_algorithm_update': 0.051820269209801116, 'temp_loss': 2.150761571233672, 'temp': 0.42582855739689973, 'alpha_loss': -63.82376185731392, 'alpha': 3.670158956092217, 'critic_loss': 2896.8294430771316, 'actor_loss': 36.547565471230214, 'time_step': 0.052352905962508536, 'td_error': 2.8803841493950753, 'init_value': -37.75922393798828, 'ave_value': -37.75550382839328} step=12110
2022-04-22 05:03.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:03.54 [info     ] CQL_20220422045205: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.0004178285598754883, 'time_algorithm_update': 0.05163179036509784, 'temp_loss': 2.0985946579475625, 'temp': 0.41553712296003553, 'alpha_loss': -66.30325736338004, 'alpha': 3.81303308327074, 'critic_loss': 3028.060028958183, 'actor_loss': 37.38671605986667, 'time_step': 0.05214515793530238, 'td_error': 2.9520473856831226, 'init_value': -38.610382080078125, 'ave_value': -38.61125194717776} step=12456
2022-04-22 05:03.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:04.13 [info     ] CQL_20220422045205: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.0004126646615177221, 'time_algorithm_update': 0.05242495867558297, 'temp_loss': 2.0476358770635086, 'temp': 0.40549619495868683, 'alpha_loss': -68.89163521121692, 'alpha': 3.961476649163086, 'critic_loss': 3125.801192619897, 'actor_loss': 38.23882542869259, 'time_step': 0.052933088616828695, 'td_error': 3.0132587502292005, 'init_value': -39.28569412231445, 'ave_value': -39.28759043021132} step=12802
2022-04-22 05:04.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:04.32 [info     ] CQL_20220422045205: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.0004121306314633761, 'time_algorithm_update': 0.05245129290343709, 'temp_loss': 1.9986975792515485, 'temp': 0.39569674742359645, 'alpha_loss': -71.56914989934491, 'alpha': 4.115694868771327, 'critic_loss': 3066.392234493542, 'actor_loss': 38.98788498454011, 'time_step': 0.05295711928020323, 'td_error': 3.0777446233850867, 'init_value': -40.0659294128418, 'ave_value': -40.067443374120096} step=13148
2022-04-22 05:04.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:04.51 [info     ] CQL_20220422045205: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.00040802583529080957, 'time_algorithm_update': 0.052359470742286286, 'temp_loss': 1.9499266233747405, 'temp': 0.38613477581842787, 'alpha_loss': -74.35487733962219, 'alpha': 4.275916703174569, 'critic_loss': 2935.4410478007585, 'actor_loss': 39.740390237356195, 'time_step': 0.05285989824747075, 'td_error': 3.142199435549384, 'init_value': -40.87104034423828, 'ave_value': -40.8690087540447} step=13494
2022-04-22 05:04.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:05.10 [info     ] CQL_20220422045205: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.00043539229155964936, 'time_algorithm_update': 0.05228883131391051, 'temp_loss': 1.9032223224639893, 'temp': 0.3768042251898374, 'alpha_loss': -77.24819119403817, 'alpha': 4.4423695699327945, 'critic_loss': 2895.7405645998915, 'actor_loss': 40.52352166589285, 'time_step': 0.05281910662017117, 'td_error': 3.216035990425885, 'init_value': -41.6827507019043, 'ave_value': -41.680256757921605} step=13840
2022-04-22 05:05.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:05.29 [info     ] CQL_20220422045205: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.0004393048369126513, 'time_algorithm_update': 0.0516536215137195, 'temp_loss': 1.8574670739256578, 'temp': 0.3676978722645368, 'alpha_loss': -80.25273676965968, 'alpha': 4.615313206104874, 'critic_loss': 2949.3969409038564, 'actor_loss': 41.43637735443997, 'time_step': 0.05219008329975812, 'td_error': 3.294095947629405, 'init_value': -42.59538269042969, 'ave_value': -42.59352345401046} step=14186
2022-04-22 05:05.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:05.48 [info     ] CQL_20220422045205: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.0004216473915673405, 'time_algorithm_update': 0.052090611071944924, 'temp_loss': 1.8115437688855078, 'temp': 0.35881345783699453, 'alpha_loss': -83.38100711183051, 'alpha': 4.794984456431659, 'critic_loss': 3173.373581020818, 'actor_loss': 42.30590321976325, 'time_step': 0.05261161079296487, 'td_error': 3.37013349079153, 'init_value': -43.39984893798828, 'ave_value': -43.39750672254929} step=14532
2022-04-22 05:05.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:06.07 [info     ] CQL_20220422045205: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.0004545463991992046, 'time_algorithm_update': 0.05181990400215105, 'temp_loss': 1.7681232821734654, 'temp': 0.3501447717406157, 'alpha_loss': -86.62271806132587, 'alpha': 4.981645110025571, 'critic_loss': 3277.9172934824333, 'actor_loss': 43.09108245717308, 'time_step': 0.05237588096905306, 'td_error': 3.446273964906964, 'init_value': -44.218318939208984, 'ave_value': -44.22030440303826} step=14878
2022-04-22 05:06.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:06.27 [info     ] CQL_20220422045205: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.0004098132855630334, 'time_algorithm_update': 0.05441621548867639, 'temp_loss': 1.7255347240177883, 'temp': 0.34168500836529486, 'alpha_loss': -90.00625019404241, 'alpha': 5.1755834218394545, 'critic_loss': 3447.438194319003, 'actor_loss': 43.94524470643501, 'time_step': 0.054928241437570205, 'td_error': 3.5171174966472596, 'init_value': -44.94169235229492, 'ave_value': -44.94563560540103} step=15224
2022-04-22 05:06.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:06.46 [info     ] CQL_20220422045205: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.00042111818501025956, 'time_algorithm_update': 0.053383892671221254, 'temp_loss': 1.6839160429949016, 'temp': 0.33342839338186847, 'alpha_loss': -93.50394133198468, 'alpha': 5.377063957942014, 'critic_loss': 3693.821344099982, 'actor_loss': 44.890436657591366, 'time_step': 0.05390860166163803, 'td_error': 3.612827352810741, 'init_value': -45.97566223144531, 'ave_value': -45.97212945604392} step=15570
2022-04-22 05:06.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:07.05 [info     ] CQL_20220422045205: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.000415663498674514, 'time_algorithm_update': 0.052178836282278075, 'temp_loss': 1.642843147578267, 'temp': 0.325371354904478, 'alpha_loss': -97.14642845550713, 'alpha': 5.586393117904663, 'critic_loss': 3942.493284721595, 'actor_loss': 45.80429902931169, 'time_step': 0.05269550519182503, 'td_error': 3.692822958330382, 'init_value': -46.719757080078125, 'ave_value': -46.71848780368391} step=15916
2022-04-22 05:07.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:07.24 [info     ] CQL_20220422045205: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.0004204180888357879, 'time_algorithm_update': 0.05246345225097127, 'temp_loss': 1.6028527548547424, 'temp': 0.3175110601620867, 'alpha_loss': -100.92260920794713, 'alpha': 5.803861090213577, 'critic_loss': 4232.700757682668, 'actor_loss': 46.67743184525153, 'time_step': 0.052978513557786884, 'td_error': 3.790777100919669, 'init_value': -47.769378662109375, 'ave_value': -47.76792043375709} step=16262
2022-04-22 05:07.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:07.43 [info     ] CQL_20220422045205: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.0004128286604247341, 'time_algorithm_update': 0.05251404384657138, 'temp_loss': 1.564817173632583, 'temp': 0.30983801125791033, 'alpha_loss': -104.84852205397766, 'alpha': 6.0297687260401736, 'critic_loss': 4076.9051972317557, 'actor_loss': 47.344258600576765, 'time_step': 0.05302455935175019, 'td_error': 3.8364602743294682, 'init_value': -48.13269805908203, 'ave_value': -48.13361357539228} step=16608
2022-04-22 05:07.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:08.02 [info     ] CQL_20220422045205: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.00041808489430157435, 'time_algorithm_update': 0.05206756715829662, 'temp_loss': 1.5267629671648058, 'temp': 0.30235120464611603, 'alpha_loss': -108.93958641890157, 'alpha': 6.26450248536347, 'critic_loss': 3784.4379318325505, 'actor_loss': 47.94646999601684, 'time_step': 0.05257727162686387, 'td_error': 3.9107055768166665, 'init_value': -48.924381256103516, 'ave_value': -48.92217966371828} step=16954
2022-04-22 05:08.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:08.21 [info     ] CQL_20220422045205: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.0004142681298228358, 'time_algorithm_update': 0.05290968431902759, 'temp_loss': 1.4899588296179138, 'temp': 0.29504483817629734, 'alpha_loss': -113.1587873028882, 'alpha': 6.508356746221553, 'critic_loss': 3619.247447107569, 'actor_loss': 48.631882011545876, 'time_step': 0.0534208330804902, 'td_error': 3.9710615585948195, 'init_value': -49.43289566040039, 'ave_value': -49.43263643101077} step=17300
2022-04-22 05:08.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422045205/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.51910

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 05:08.23 [info     ] FQE_20220422050822: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.000160557677946895, 'time_algorithm_update': 0.006458671696214791, 'loss': 0.00639190514858094, 'time_step': 0.006691932678222656, 'init_value': -0.43455612659454346, 'ave_value': -0.3847346966129703, 'soft_opc': nan} step=166




2022-04-22 05:08.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:08.24 [info     ] FQE_20220422050822: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00018153707665133188, 'time_algorithm_update': 0.00696357187018337, 'loss': 0.004148031070166413, 'time_step': 0.00721890955086214, 'init_value': -0.4856443703174591, 'ave_value': -0.40118165564563896, 'soft_opc': nan} step=332




2022-04-22 05:08.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:08.25 [info     ] FQE_20220422050822: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.0001624291201671922, 'time_algorithm_update': 0.0070131097931459725, 'loss': 0.0036084277636696956, 'time_step': 0.007245473114840956, 'init_value': -0.4925922751426697, 'ave_value': -0.3990333156744102, 'soft_opc': nan} step=498




2022-04-22 05:08.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:08.27 [info     ] FQE_20220422050822: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00016391277313232422, 'time_algorithm_update': 0.007026607731738722, 'loss': 0.0034201677274295545, 'time_step': 0.007261251828756677, 'init_value': -0.5623700618743896, 'ave_value': -0.4458561082479653, 'soft_opc': nan} step=664




2022-04-22 05:08.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:08.28 [info     ] FQE_20220422050822: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00016170668314738446, 'time_algorithm_update': 0.007097324693059346, 'loss': 0.003084490172486438, 'time_step': 0.0073265871369695085, 'init_value': -0.6399184465408325, 'ave_value': -0.49811674000040906, 'soft_opc': nan} step=830




2022-04-22 05:08.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:08.29 [info     ] FQE_20220422050822: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00016457632363560688, 'time_algorithm_update': 0.0070253395172486825, 'loss': 0.0029084616563709296, 'time_step': 0.007262640688792768, 'init_value': -0.6552918553352356, 'ave_value': -0.5100216855404076, 'soft_opc': nan} step=996




2022-04-22 05:08.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:08.31 [info     ] FQE_20220422050822: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00016394293451883705, 'time_algorithm_update': 0.007105023027902626, 'loss': 0.002701210461175406, 'time_step': 0.007341090455112687, 'init_value': -0.7132419347763062, 'ave_value': -0.5514132827688176, 'soft_opc': nan} step=1162




2022-04-22 05:08.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:08.32 [info     ] FQE_20220422050822: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00017642831227865564, 'time_algorithm_update': 0.007004443421421281, 'loss': 0.0025813757047107645, 'time_step': 0.007254314709858722, 'init_value': -0.7414575815200806, 'ave_value': -0.5607894274498428, 'soft_opc': nan} step=1328




2022-04-22 05:08.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:08.33 [info     ] FQE_20220422050822: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00016971955816429783, 'time_algorithm_update': 0.006947177002228886, 'loss': 0.002326129903071504, 'time_step': 0.007189112973500447, 'init_value': -0.7947809100151062, 'ave_value': -0.6034797843277185, 'soft_opc': nan} step=1494




2022-04-22 05:08.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:08.35 [info     ] FQE_20220422050822: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.0001611551606511495, 'time_algorithm_update': 0.006542458591690983, 'loss': 0.002430577786970345, 'time_step': 0.006781118461884648, 'init_value': -0.8642821311950684, 'ave_value': -0.655811578876062, 'soft_opc': nan} step=1660




2022-04-22 05:08.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:08.36 [info     ] FQE_20220422050822: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00016184743628444443, 'time_algorithm_update': 0.006952051656791963, 'loss': 0.0024703707842644394, 'time_step': 0.007184831492872123, 'init_value': -0.905922532081604, 'ave_value': -0.6851268635169175, 'soft_opc': nan} step=1826




2022-04-22 05:08.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:08.37 [info     ] FQE_20220422050822: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00016595656613269485, 'time_algorithm_update': 0.0070760034653077645, 'loss': 0.002571117012279041, 'time_step': 0.007318811244275196, 'init_value': -0.9558337926864624, 'ave_value': -0.7215002145542687, 'soft_opc': nan} step=1992




2022-04-22 05:08.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:08.38 [info     ] FQE_20220422050822: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00016319751739501953, 'time_algorithm_update': 0.007019986589270902, 'loss': 0.002893416939656733, 'time_step': 0.007256548088717173, 'init_value': -1.0309009552001953, 'ave_value': -0.7812309211833184, 'soft_opc': nan} step=2158




2022-04-22 05:08.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:08.40 [info     ] FQE_20220422050822: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.0002004812998944018, 'time_algorithm_update': 0.009247752557317895, 'loss': 0.003180778411627994, 'time_step': 0.009524204644812158, 'init_value': -1.1178250312805176, 'ave_value': -0.860052003995893, 'soft_opc': nan} step=2324




2022-04-22 05:08.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:08.42 [info     ] FQE_20220422050822: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00016589480710316854, 'time_algorithm_update': 0.0069930596523974315, 'loss': 0.0034055597836457484, 'time_step': 0.007236136011330478, 'init_value': -1.1596599817276, 'ave_value': -0.8924246919341385, 'soft_opc': nan} step=2490




2022-04-22 05:08.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:08.43 [info     ] FQE_20220422050822: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00016462228384362646, 'time_algorithm_update': 0.007007709468703672, 'loss': 0.0040382330140368225, 'time_step': 0.007244680301252618, 'init_value': -1.1936299800872803, 'ave_value': -0.9082203924400011, 'soft_opc': nan} step=2656




2022-04-22 05:08.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:08.44 [info     ] FQE_20220422050822: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00017166855823562806, 'time_algorithm_update': 0.006991173847612128, 'loss': 0.004153181809801552, 'time_step': 0.007238113736531821, 'init_value': -1.335777997970581, 'ave_value': -1.038206608038446, 'soft_opc': nan} step=2822




2022-04-22 05:08.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:08.45 [info     ] FQE_20220422050822: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00016895977847547415, 'time_algorithm_update': 0.00696042503219053, 'loss': 0.004808370954762438, 'time_step': 0.0072060866528246776, 'init_value': -1.3638423681259155, 'ave_value': -1.0735269012550512, 'soft_opc': nan} step=2988




2022-04-22 05:08.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:08.47 [info     ] FQE_20220422050822: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00016439248280352857, 'time_algorithm_update': 0.006765339747968927, 'loss': 0.005022555866162293, 'time_step': 0.0070068362247513, 'init_value': -1.4170777797698975, 'ave_value': -1.1155312309461134, 'soft_opc': nan} step=3154




2022-04-22 05:08.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:08.48 [info     ] FQE_20220422050822: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00017156802028058524, 'time_algorithm_update': 0.007104648164955966, 'loss': 0.0056049592478456065, 'time_step': 0.0073542278933237835, 'init_value': -1.4942916631698608, 'ave_value': -1.1918859171379055, 'soft_opc': nan} step=3320




2022-04-22 05:08.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:08.49 [info     ] FQE_20220422050822: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00017105240419686558, 'time_algorithm_update': 0.0070671862866505085, 'loss': 0.005984440300556135, 'time_step': 0.007313804454114063, 'init_value': -1.4851562976837158, 'ave_value': -1.1927945781213878, 'soft_opc': nan} step=3486




2022-04-22 05:08.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:08.51 [info     ] FQE_20220422050822: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00016941938055566996, 'time_algorithm_update': 0.006914236459387354, 'loss': 0.006561368917737785, 'time_step': 0.0071577235876795755, 'init_value': -1.5487382411956787, 'ave_value': -1.259335288508619, 'soft_opc': nan} step=3652




2022-04-22 05:08.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:08.52 [info     ] FQE_20220422050822: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00016840825597923923, 'time_algorithm_update': 0.0069884205439004555, 'loss': 0.006926830807674104, 'time_step': 0.0072291672947895095, 'init_value': -1.6117749214172363, 'ave_value': -1.3252978059052078, 'soft_opc': nan} step=3818




2022-04-22 05:08.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:08.53 [info     ] FQE_20220422050822: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.0001644068453685347, 'time_algorithm_update': 0.0069486434201160106, 'loss': 0.0070054685996413365, 'time_step': 0.0071859503366861, 'init_value': -1.5916788578033447, 'ave_value': -1.2894588997746976, 'soft_opc': nan} step=3984




2022-04-22 05:08.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:08.55 [info     ] FQE_20220422050822: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00016567793237157614, 'time_algorithm_update': 0.0069397687911987305, 'loss': 0.007350546769841282, 'time_step': 0.007176943572170763, 'init_value': -1.5944461822509766, 'ave_value': -1.3014332933947108, 'soft_opc': nan} step=4150




2022-04-22 05:08.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:08.56 [info     ] FQE_20220422050822: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00017195868204875165, 'time_algorithm_update': 0.007051571305975856, 'loss': 0.007524973849965125, 'time_step': 0.007302142051329096, 'init_value': -1.7080481052398682, 'ave_value': -1.4069388991523837, 'soft_opc': nan} step=4316




2022-04-22 05:08.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:08.57 [info     ] FQE_20220422050822: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.0001655386154910168, 'time_algorithm_update': 0.006721617227577302, 'loss': 0.008308455494410606, 'time_step': 0.006960110492016895, 'init_value': -1.7094093561172485, 'ave_value': -1.4171582491925776, 'soft_opc': nan} step=4482




2022-04-22 05:08.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:08.58 [info     ] FQE_20220422050822: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001709059060338032, 'time_algorithm_update': 0.006917008434433535, 'loss': 0.007864841627145007, 'time_step': 0.00716367399836161, 'init_value': -1.7286018133163452, 'ave_value': -1.4461309474305595, 'soft_opc': nan} step=4648




2022-04-22 05:08.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:09.00 [info     ] FQE_20220422050822: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00017022081168301134, 'time_algorithm_update': 0.007150252181363393, 'loss': 0.008955000493681082, 'time_step': 0.0073977435927793204, 'init_value': -1.7304328680038452, 'ave_value': -1.4364067874399962, 'soft_opc': nan} step=4814




2022-04-22 05:09.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:09.01 [info     ] FQE_20220422050822: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00016386394041130342, 'time_algorithm_update': 0.007058153669518161, 'loss': 0.009141961734493668, 'time_step': 0.00729943039905594, 'init_value': -1.7673903703689575, 'ave_value': -1.4856621404107122, 'soft_opc': nan} step=4980




2022-04-22 05:09.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:09.02 [info     ] FQE_20220422050822: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00017018346901399544, 'time_algorithm_update': 0.006955508726188935, 'loss': 0.009595127167889893, 'time_step': 0.00720111576907606, 'init_value': -1.8331005573272705, 'ave_value': -1.5539879407133772, 'soft_opc': nan} step=5146




2022-04-22 05:09.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:09.04 [info     ] FQE_20220422050822: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.0001671055713331843, 'time_algorithm_update': 0.0071651993027652605, 'loss': 0.010103748996983316, 'time_step': 0.007413416023713997, 'init_value': -1.9384257793426514, 'ave_value': -1.6602163200140805, 'soft_opc': nan} step=5312




2022-04-22 05:09.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:09.05 [info     ] FQE_20220422050822: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00016740287642881093, 'time_algorithm_update': 0.007036924362182617, 'loss': 0.0105239148206128, 'time_step': 0.0072773982243365554, 'init_value': -1.9310710430145264, 'ave_value': -1.6455093991187577, 'soft_opc': nan} step=5478




2022-04-22 05:09.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:09.06 [info     ] FQE_20220422050822: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00016758815351738986, 'time_algorithm_update': 0.007153317152735698, 'loss': 0.011062948680335532, 'time_step': 0.007398276443941048, 'init_value': -1.9801009893417358, 'ave_value': -1.7251470983280122, 'soft_opc': nan} step=5644




2022-04-22 05:09.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:09.08 [info     ] FQE_20220422050822: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00016847432377826735, 'time_algorithm_update': 0.007076092513210802, 'loss': 0.011230165856793602, 'time_step': 0.007318338715886495, 'init_value': -2.002774477005005, 'ave_value': -1.727109577873017, 'soft_opc': nan} step=5810




2022-04-22 05:09.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:09.09 [info     ] FQE_20220422050822: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.0001676427312644131, 'time_algorithm_update': 0.006796865578157356, 'loss': 0.0115025993783717, 'time_step': 0.007039565637887242, 'init_value': -2.0175161361694336, 'ave_value': -1.7395530629739466, 'soft_opc': nan} step=5976




2022-04-22 05:09.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:09.10 [info     ] FQE_20220422050822: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00016615046076027743, 'time_algorithm_update': 0.0069231297596391425, 'loss': 0.012113185088901994, 'time_step': 0.007160669349762331, 'init_value': -2.044388771057129, 'ave_value': -1.772298623580771, 'soft_opc': nan} step=6142




2022-04-22 05:09.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:09.12 [info     ] FQE_20220422050822: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00017987676413662462, 'time_algorithm_update': 0.007044945854738534, 'loss': 0.012249742658087894, 'time_step': 0.007298410656940506, 'init_value': -2.0913615226745605, 'ave_value': -1.8175677908013936, 'soft_opc': nan} step=6308




2022-04-22 05:09.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:09.13 [info     ] FQE_20220422050822: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00016909478658653167, 'time_algorithm_update': 0.00710714581501053, 'loss': 0.012353602585176579, 'time_step': 0.0073539607496146695, 'init_value': -2.131772756576538, 'ave_value': -1.8515014450778966, 'soft_opc': nan} step=6474




2022-04-22 05:09.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:09.14 [info     ] FQE_20220422050822: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001795708415019943, 'time_algorithm_update': 0.007001843797155173, 'loss': 0.012979839230306918, 'time_step': 0.007259449326848409, 'init_value': -2.1031129360198975, 'ave_value': -1.8150278256627153, 'soft_opc': nan} step=6640




2022-04-22 05:09.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:09.16 [info     ] FQE_20220422050822: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.0001516054911785815, 'time_algorithm_update': 0.006935198623013784, 'loss': 0.01295702082647785, 'time_step': 0.0071556898484747096, 'init_value': -2.145197868347168, 'ave_value': -1.8373932437723721, 'soft_opc': nan} step=6806




2022-04-22 05:09.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:09.17 [info     ] FQE_20220422050822: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00016075588134397943, 'time_algorithm_update': 0.006942369851721339, 'loss': 0.013097045404544795, 'time_step': 0.007173357239688735, 'init_value': -2.136448860168457, 'ave_value': -1.833384605188359, 'soft_opc': nan} step=6972




2022-04-22 05:09.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:09.18 [info     ] FQE_20220422050822: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00015259507190750306, 'time_algorithm_update': 0.0070430327610797195, 'loss': 0.013700071277646685, 'time_step': 0.007269776011087808, 'init_value': -2.1648948192596436, 'ave_value': -1.8345684469664922, 'soft_opc': nan} step=7138




2022-04-22 05:09.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:09.19 [info     ] FQE_20220422050822: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.0001466475337384695, 'time_algorithm_update': 0.006700327597468732, 'loss': 0.013836402013681897, 'time_step': 0.00691347524344203, 'init_value': -2.224649667739868, 'ave_value': -1.8856176510016929, 'soft_opc': nan} step=7304




2022-04-22 05:09.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:09.21 [info     ] FQE_20220422050822: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00014940227370664297, 'time_algorithm_update': 0.006262193243187594, 'loss': 0.013960434312171987, 'time_step': 0.006481838513569659, 'init_value': -2.2288095951080322, 'ave_value': -1.8911028669131835, 'soft_opc': nan} step=7470




2022-04-22 05:09.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:09.22 [info     ] FQE_20220422050822: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00015535124813217716, 'time_algorithm_update': 0.0070930647562785325, 'loss': 0.014651932698044734, 'time_step': 0.007318187908953931, 'init_value': -2.225475311279297, 'ave_value': -1.9133310010144906, 'soft_opc': nan} step=7636




2022-04-22 05:09.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:09.23 [info     ] FQE_20220422050822: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00015901944723473974, 'time_algorithm_update': 0.006921427795685917, 'loss': 0.014800794201367254, 'time_step': 0.00715374372091638, 'init_value': -2.295642852783203, 'ave_value': -1.9766847328576553, 'soft_opc': nan} step=7802




2022-04-22 05:09.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:09.25 [info     ] FQE_20220422050822: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.0001673009022172675, 'time_algorithm_update': 0.007168537162872682, 'loss': 0.015030654900285123, 'time_step': 0.007410994495253965, 'init_value': -2.31772518157959, 'ave_value': -2.000918668803029, 'soft_opc': nan} step=7968




2022-04-22 05:09.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:09.26 [info     ] FQE_20220422050822: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.0001669605094266225, 'time_algorithm_update': 0.007014086447566389, 'loss': 0.01561514557660445, 'time_step': 0.007257780396794698, 'init_value': -2.2689976692199707, 'ave_value': -1.9318905682336573, 'soft_opc': nan} step=8134




2022-04-22 05:09.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:09.27 [info     ] FQE_20220422050822: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00016689300537109375, 'time_algorithm_update': 0.007143315062465438, 'loss': 0.01542730089059627, 'time_step': 0.007385736488434206, 'init_value': -2.244215488433838, 'ave_value': -1.8944894871515063, 'soft_opc': nan} step=8300




2022-04-22 05:09.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050822/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-22 05:09.28 [info     ] Directory is created at d3rlpy_logs/FQE_20220422050928
2022-04-22 05:09.28 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 05:09.28 [debug    ] Building models...
2022-04-22 05:09.28 [debug    ] Models have been built.
2022-04-22 05:09.28 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422050928/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/355 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 05:09.30 [info     ] FQE_20220422050928: epoch=1 step=355 epoch=1 metrics={'time_sample_batch': 0.00017668159914688324, 'time_algorithm_update': 0.007001367085416552, 'loss': 0.024424235058397473, 'time_step': 0.007254939683726136, 'init_value': -1.2124375104904175, 'ave_value': -1.2149904125215465, 'soft_opc': nan} step=355




2022-04-22 05:09.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_355.pt


Epoch 2/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:09.33 [info     ] FQE_20220422050928: epoch=2 step=710 epoch=2 metrics={'time_sample_batch': 0.00017635318594919124, 'time_algorithm_update': 0.006790132925543987, 'loss': 0.023264101488699375, 'time_step': 0.007042714239845813, 'init_value': -2.4031083583831787, 'ave_value': -2.430966884427089, 'soft_opc': nan} step=710




2022-04-22 05:09.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_710.pt


Epoch 3/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:09.36 [info     ] FQE_20220422050928: epoch=3 step=1065 epoch=3 metrics={'time_sample_batch': 0.00017886027483872963, 'time_algorithm_update': 0.007184809698185451, 'loss': 0.02515852362127371, 'time_step': 0.0074429693356366225, 'init_value': -3.040728807449341, 'ave_value': -3.0784956508314902, 'soft_opc': nan} step=1065




2022-04-22 05:09.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_1065.pt


Epoch 4/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:09.39 [info     ] FQE_20220422050928: epoch=4 step=1420 epoch=4 metrics={'time_sample_batch': 0.0001775009531370351, 'time_algorithm_update': 0.007143898413214885, 'loss': 0.02904102634113859, 'time_step': 0.007397894792153802, 'init_value': -4.019383430480957, 'ave_value': -4.123067906617812, 'soft_opc': nan} step=1420




2022-04-22 05:09.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_1420.pt


Epoch 5/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:09.41 [info     ] FQE_20220422050928: epoch=5 step=1775 epoch=5 metrics={'time_sample_batch': 0.00017976962344747193, 'time_algorithm_update': 0.0071230371233443135, 'loss': 0.03262940896281474, 'time_step': 0.0073787581752723375, 'init_value': -4.588176250457764, 'ave_value': -4.779057718351234, 'soft_opc': nan} step=1775




2022-04-22 05:09.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_1775.pt


Epoch 6/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:09.44 [info     ] FQE_20220422050928: epoch=6 step=2130 epoch=6 metrics={'time_sample_batch': 0.00017718865837849362, 'time_algorithm_update': 0.0067305443992077465, 'loss': 0.04347136494979052, 'time_step': 0.0069830733285823334, 'init_value': -5.4381632804870605, 'ave_value': -5.737724771953764, 'soft_opc': nan} step=2130




2022-04-22 05:09.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_2130.pt


Epoch 7/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:09.47 [info     ] FQE_20220422050928: epoch=7 step=2485 epoch=7 metrics={'time_sample_batch': 0.00018210411071777343, 'time_algorithm_update': 0.007019505702273946, 'loss': 0.05150632289251391, 'time_step': 0.007277596164757098, 'init_value': -5.958229064941406, 'ave_value': -6.391660365512165, 'soft_opc': nan} step=2485




2022-04-22 05:09.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_2485.pt


Epoch 8/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:09.50 [info     ] FQE_20220422050928: epoch=8 step=2840 epoch=8 metrics={'time_sample_batch': 0.0001779811483033946, 'time_algorithm_update': 0.007026327831644408, 'loss': 0.06489808377088375, 'time_step': 0.007283908548489423, 'init_value': -6.694090843200684, 'ave_value': -7.2102391943201285, 'soft_opc': nan} step=2840




2022-04-22 05:09.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_2840.pt


Epoch 9/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:09.52 [info     ] FQE_20220422050928: epoch=9 step=3195 epoch=9 metrics={'time_sample_batch': 0.00017726186295630226, 'time_algorithm_update': 0.007037399856137558, 'loss': 0.07554992069818184, 'time_step': 0.007293485587751362, 'init_value': -7.281425476074219, 'ave_value': -7.902130462917736, 'soft_opc': nan} step=3195




2022-04-22 05:09.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_3195.pt


Epoch 10/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:09.55 [info     ] FQE_20220422050928: epoch=10 step=3550 epoch=10 metrics={'time_sample_batch': 0.00017524302845269861, 'time_algorithm_update': 0.006970905250226948, 'loss': 0.09576680804274872, 'time_step': 0.007222716909059336, 'init_value': -8.146919250488281, 'ave_value': -8.834690029311211, 'soft_opc': nan} step=3550




2022-04-22 05:09.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_3550.pt


Epoch 11/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:09.58 [info     ] FQE_20220422050928: epoch=11 step=3905 epoch=11 metrics={'time_sample_batch': 0.000173271206063284, 'time_algorithm_update': 0.006896239267268651, 'loss': 0.1140488944790313, 'time_step': 0.007143574701228612, 'init_value': -9.14687442779541, 'ave_value': -9.933218850224174, 'soft_opc': nan} step=3905




2022-04-22 05:09.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_3905.pt


Epoch 12/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:10.01 [info     ] FQE_20220422050928: epoch=12 step=4260 epoch=12 metrics={'time_sample_batch': 0.00017679912943235586, 'time_algorithm_update': 0.007054625094776422, 'loss': 0.13303965598239864, 'time_step': 0.00730381750724685, 'init_value': -9.470853805541992, 'ave_value': -10.31570004864549, 'soft_opc': nan} step=4260




2022-04-22 05:10.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_4260.pt


Epoch 13/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:10.03 [info     ] FQE_20220422050928: epoch=13 step=4615 epoch=13 metrics={'time_sample_batch': 0.00017718127076054964, 'time_algorithm_update': 0.007065024846036669, 'loss': 0.15501251345579053, 'time_step': 0.007317644441631479, 'init_value': -10.1846923828125, 'ave_value': -11.10091987269879, 'soft_opc': nan} step=4615




2022-04-22 05:10.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_4615.pt


Epoch 14/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:10.06 [info     ] FQE_20220422050928: epoch=14 step=4970 epoch=14 metrics={'time_sample_batch': 0.00017971522371533892, 'time_algorithm_update': 0.007022581637745173, 'loss': 0.17653654712501546, 'time_step': 0.007280278541672397, 'init_value': -10.509178161621094, 'ave_value': -11.610826207526708, 'soft_opc': nan} step=4970




2022-04-22 05:10.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_4970.pt


Epoch 15/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:10.09 [info     ] FQE_20220422050928: epoch=15 step=5325 epoch=15 metrics={'time_sample_batch': 0.0001800946786370076, 'time_algorithm_update': 0.006799581688894353, 'loss': 0.20245461799466694, 'time_step': 0.007059781652101329, 'init_value': -11.091567039489746, 'ave_value': -12.27329152975917, 'soft_opc': nan} step=5325




2022-04-22 05:10.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_5325.pt


Epoch 16/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:10.12 [info     ] FQE_20220422050928: epoch=16 step=5680 epoch=16 metrics={'time_sample_batch': 0.00017919540405273438, 'time_algorithm_update': 0.007118508513544647, 'loss': 0.22945668021033347, 'time_step': 0.007375629183272241, 'init_value': -11.316174507141113, 'ave_value': -12.613710989654448, 'soft_opc': nan} step=5680




2022-04-22 05:10.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_5680.pt


Epoch 17/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:10.14 [info     ] FQE_20220422050928: epoch=17 step=6035 epoch=17 metrics={'time_sample_batch': 0.0001769777754662742, 'time_algorithm_update': 0.007090256247721928, 'loss': 0.26336924890712116, 'time_step': 0.007345223091018032, 'init_value': -11.96446418762207, 'ave_value': -13.365461140537354, 'soft_opc': nan} step=6035




2022-04-22 05:10.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_6035.pt


Epoch 18/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:10.17 [info     ] FQE_20220422050928: epoch=18 step=6390 epoch=18 metrics={'time_sample_batch': 0.0001780234592061647, 'time_algorithm_update': 0.006976527227482325, 'loss': 0.28406055293662447, 'time_step': 0.007233509547273878, 'init_value': -12.445892333984375, 'ave_value': -13.87124871502809, 'soft_opc': nan} step=6390




2022-04-22 05:10.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_6390.pt


Epoch 19/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:10.20 [info     ] FQE_20220422050928: epoch=19 step=6745 epoch=19 metrics={'time_sample_batch': 0.00018445203002070038, 'time_algorithm_update': 0.006807592553152165, 'loss': 0.3242306022157132, 'time_step': 0.007069910076302542, 'init_value': -12.868417739868164, 'ave_value': -14.422194963361843, 'soft_opc': nan} step=6745




2022-04-22 05:10.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_6745.pt


Epoch 20/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:10.23 [info     ] FQE_20220422050928: epoch=20 step=7100 epoch=20 metrics={'time_sample_batch': 0.00017840492893272722, 'time_algorithm_update': 0.007127662443778884, 'loss': 0.3366413247338693, 'time_step': 0.0073832102224860394, 'init_value': -13.107741355895996, 'ave_value': -14.805194880394325, 'soft_opc': nan} step=7100




2022-04-22 05:10.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_7100.pt


Epoch 21/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:10.26 [info     ] FQE_20220422050928: epoch=21 step=7455 epoch=21 metrics={'time_sample_batch': 0.0001788804228876678, 'time_algorithm_update': 0.007122555584974692, 'loss': 0.3634085211797919, 'time_step': 0.007377114094478983, 'init_value': -13.467833518981934, 'ave_value': -15.267810224408962, 'soft_opc': nan} step=7455




2022-04-22 05:10.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_7455.pt


Epoch 22/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:10.29 [info     ] FQE_20220422050928: epoch=22 step=7810 epoch=22 metrics={'time_sample_batch': 0.00018428681601940746, 'time_algorithm_update': 0.007853834393998267, 'loss': 0.38055863993793304, 'time_step': 0.008117405797394228, 'init_value': -14.096423149108887, 'ave_value': -16.003064733529836, 'soft_opc': nan} step=7810




2022-04-22 05:10.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_7810.pt


Epoch 23/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:10.31 [info     ] FQE_20220422050928: epoch=23 step=8165 epoch=23 metrics={'time_sample_batch': 0.00017984954404159332, 'time_algorithm_update': 0.007316632337973151, 'loss': 0.39206070125522746, 'time_step': 0.007574588480130048, 'init_value': -14.542279243469238, 'ave_value': -16.612229787320032, 'soft_opc': nan} step=8165




2022-04-22 05:10.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_8165.pt


Epoch 24/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:10.34 [info     ] FQE_20220422050928: epoch=24 step=8520 epoch=24 metrics={'time_sample_batch': 0.00018241909188284, 'time_algorithm_update': 0.007696907285233618, 'loss': 0.4117443705003866, 'time_step': 0.007959236225611727, 'init_value': -14.580339431762695, 'ave_value': -16.849958495064094, 'soft_opc': nan} step=8520




2022-04-22 05:10.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_8520.pt


Epoch 25/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:10.38 [info     ] FQE_20220422050928: epoch=25 step=8875 epoch=25 metrics={'time_sample_batch': 0.00017848350632358604, 'time_algorithm_update': 0.007776207991049324, 'loss': 0.4265406508051174, 'time_step': 0.008032486472331303, 'init_value': -15.170090675354004, 'ave_value': -17.474351661729997, 'soft_opc': nan} step=8875




2022-04-22 05:10.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_8875.pt


Epoch 26/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:10.40 [info     ] FQE_20220422050928: epoch=26 step=9230 epoch=26 metrics={'time_sample_batch': 0.00020727439665458572, 'time_algorithm_update': 0.0075573014541411064, 'loss': 0.44059663927051385, 'time_step': 0.007844480998079542, 'init_value': -15.15832805633545, 'ave_value': -17.572818906231277, 'soft_opc': nan} step=9230




2022-04-22 05:10.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_9230.pt


Epoch 27/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:10.44 [info     ] FQE_20220422050928: epoch=27 step=9585 epoch=27 metrics={'time_sample_batch': 0.0001823284256626183, 'time_algorithm_update': 0.007814901647433429, 'loss': 0.45414989637342135, 'time_step': 0.008076448843512737, 'init_value': -16.04145622253418, 'ave_value': -18.425401186174984, 'soft_opc': nan} step=9585




2022-04-22 05:10.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_9585.pt


Epoch 28/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:10.46 [info     ] FQE_20220422050928: epoch=28 step=9940 epoch=28 metrics={'time_sample_batch': 0.00017896370148994553, 'time_algorithm_update': 0.0075223983173639, 'loss': 0.4586359912293478, 'time_step': 0.007779360489106514, 'init_value': -16.13863182067871, 'ave_value': -18.63650957238327, 'soft_opc': nan} step=9940




2022-04-22 05:10.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_9940.pt


Epoch 29/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:10.50 [info     ] FQE_20220422050928: epoch=29 step=10295 epoch=29 metrics={'time_sample_batch': 0.00019479066553250164, 'time_algorithm_update': 0.007981452135972573, 'loss': 0.4679323431838986, 'time_step': 0.008258392441440636, 'init_value': -16.37392807006836, 'ave_value': -19.008538352154396, 'soft_opc': nan} step=10295




2022-04-22 05:10.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_10295.pt


Epoch 30/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:10.53 [info     ] FQE_20220422050928: epoch=30 step=10650 epoch=30 metrics={'time_sample_batch': 0.0001832350878648355, 'time_algorithm_update': 0.007659313040719905, 'loss': 0.4604335158800995, 'time_step': 0.007920318925884408, 'init_value': -16.829599380493164, 'ave_value': -19.626329570542662, 'soft_opc': nan} step=10650




2022-04-22 05:10.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_10650.pt


Epoch 31/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:10.56 [info     ] FQE_20220422050928: epoch=31 step=11005 epoch=31 metrics={'time_sample_batch': 0.00018379990483673525, 'time_algorithm_update': 0.0078352303572104, 'loss': 0.4718491288715265, 'time_step': 0.008099637233035665, 'init_value': -17.004772186279297, 'ave_value': -19.941699609529117, 'soft_opc': nan} step=11005




2022-04-22 05:10.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_11005.pt


Epoch 32/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:10.59 [info     ] FQE_20220422050928: epoch=32 step=11360 epoch=32 metrics={'time_sample_batch': 0.00019521578936509683, 'time_algorithm_update': 0.007646555967733893, 'loss': 0.47946527278391826, 'time_step': 0.007919176531509614, 'init_value': -17.006982803344727, 'ave_value': -20.089426347442654, 'soft_opc': nan} step=11360




2022-04-22 05:10.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_11360.pt


Epoch 33/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:11.02 [info     ] FQE_20220422050928: epoch=33 step=11715 epoch=33 metrics={'time_sample_batch': 0.00018260714033959617, 'time_algorithm_update': 0.007621795358792157, 'loss': 0.47041342318005547, 'time_step': 0.00788511894118618, 'init_value': -17.141551971435547, 'ave_value': -20.440096802066442, 'soft_opc': nan} step=11715




2022-04-22 05:11.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_11715.pt


Epoch 34/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:11.05 [info     ] FQE_20220422050928: epoch=34 step=12070 epoch=34 metrics={'time_sample_batch': 0.00018320486579142826, 'time_algorithm_update': 0.007893043840435189, 'loss': 0.4702802316482428, 'time_step': 0.00815593423977704, 'init_value': -17.457548141479492, 'ave_value': -20.86857247617062, 'soft_opc': nan} step=12070




2022-04-22 05:11.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_12070.pt


Epoch 35/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:11.08 [info     ] FQE_20220422050928: epoch=35 step=12425 epoch=35 metrics={'time_sample_batch': 0.00018252990615199988, 'time_algorithm_update': 0.007560170536309901, 'loss': 0.4713488329779094, 'time_step': 0.007820885617968062, 'init_value': -17.467079162597656, 'ave_value': -21.165023705845595, 'soft_opc': nan} step=12425




2022-04-22 05:11.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_12425.pt


Epoch 36/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:11.11 [info     ] FQE_20220422050928: epoch=36 step=12780 epoch=36 metrics={'time_sample_batch': 0.0001847515643482477, 'time_algorithm_update': 0.007616831551135426, 'loss': 0.47157278557793353, 'time_step': 0.007881483561556104, 'init_value': -17.331445693969727, 'ave_value': -21.25851585569107, 'soft_opc': nan} step=12780




2022-04-22 05:11.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_12780.pt


Epoch 37/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:11.14 [info     ] FQE_20220422050928: epoch=37 step=13135 epoch=37 metrics={'time_sample_batch': 0.00018438621306083572, 'time_algorithm_update': 0.007654961733750894, 'loss': 0.4722512254517683, 'time_step': 0.00792031489627462, 'init_value': -17.38767433166504, 'ave_value': -21.564823799971084, 'soft_opc': nan} step=13135




2022-04-22 05:11.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_13135.pt


Epoch 38/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:11.17 [info     ] FQE_20220422050928: epoch=38 step=13490 epoch=38 metrics={'time_sample_batch': 0.00018314710805113888, 'time_algorithm_update': 0.007945002972240179, 'loss': 0.4611225955057102, 'time_step': 0.008210779243791606, 'init_value': -17.206113815307617, 'ave_value': -21.65873390178039, 'soft_opc': nan} step=13490




2022-04-22 05:11.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_13490.pt


Epoch 39/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:11.20 [info     ] FQE_20220422050928: epoch=39 step=13845 epoch=39 metrics={'time_sample_batch': 0.00018198120761925065, 'time_algorithm_update': 0.007902749155608702, 'loss': 0.4672786566363254, 'time_step': 0.008165618735299984, 'init_value': -17.467960357666016, 'ave_value': -22.174596237008636, 'soft_opc': nan} step=13845




2022-04-22 05:11.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_13845.pt


Epoch 40/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:11.23 [info     ] FQE_20220422050928: epoch=40 step=14200 epoch=40 metrics={'time_sample_batch': 0.00018287376618721115, 'time_algorithm_update': 0.007335196750264773, 'loss': 0.4718082649607054, 'time_step': 0.007599406846812074, 'init_value': -17.437721252441406, 'ave_value': -22.19522677633427, 'soft_opc': nan} step=14200




2022-04-22 05:11.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_14200.pt


Epoch 41/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:11.26 [info     ] FQE_20220422050928: epoch=41 step=14555 epoch=41 metrics={'time_sample_batch': 0.00017989655615578234, 'time_algorithm_update': 0.007838249206542969, 'loss': 0.46562018890825796, 'time_step': 0.008098762136110117, 'init_value': -17.61357307434082, 'ave_value': -22.43383381386483, 'soft_opc': nan} step=14555




2022-04-22 05:11.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_14555.pt


Epoch 42/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:11.29 [info     ] FQE_20220422050928: epoch=42 step=14910 epoch=42 metrics={'time_sample_batch': 0.00018369043377083793, 'time_algorithm_update': 0.007532325261075732, 'loss': 0.47100809313042064, 'time_step': 0.007798204287676744, 'init_value': -17.530698776245117, 'ave_value': -22.558386365344877, 'soft_opc': nan} step=14910




2022-04-22 05:11.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_14910.pt


Epoch 43/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:11.32 [info     ] FQE_20220422050928: epoch=43 step=15265 epoch=43 metrics={'time_sample_batch': 0.00018515586853027344, 'time_algorithm_update': 0.007922885115717499, 'loss': 0.4713928250307348, 'time_step': 0.008183772798994897, 'init_value': -17.643558502197266, 'ave_value': -22.764872585737567, 'soft_opc': nan} step=15265




2022-04-22 05:11.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_15265.pt


Epoch 44/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:11.35 [info     ] FQE_20220422050928: epoch=44 step=15620 epoch=44 metrics={'time_sample_batch': 0.0001776184834225077, 'time_algorithm_update': 0.007254151895012654, 'loss': 0.4678103859418295, 'time_step': 0.007510472687197403, 'init_value': -17.670446395874023, 'ave_value': -22.931541450363866, 'soft_opc': nan} step=15620




2022-04-22 05:11.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_15620.pt


Epoch 45/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:11.38 [info     ] FQE_20220422050928: epoch=45 step=15975 epoch=45 metrics={'time_sample_batch': 0.00017971522371533892, 'time_algorithm_update': 0.0078674531318772, 'loss': 0.4757288194668125, 'time_step': 0.008126428765310369, 'init_value': -17.598766326904297, 'ave_value': -22.982129502915893, 'soft_opc': nan} step=15975




2022-04-22 05:11.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_15975.pt


Epoch 46/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:11.41 [info     ] FQE_20220422050928: epoch=46 step=16330 epoch=46 metrics={'time_sample_batch': 0.00018765019698881768, 'time_algorithm_update': 0.00799331933679715, 'loss': 0.47239135133076304, 'time_step': 0.008261945885671695, 'init_value': -17.133764266967773, 'ave_value': -22.73973368911631, 'soft_opc': nan} step=16330




2022-04-22 05:11.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_16330.pt


Epoch 47/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:11.44 [info     ] FQE_20220422050928: epoch=47 step=16685 epoch=47 metrics={'time_sample_batch': 0.00018259370830697074, 'time_algorithm_update': 0.007736682891845703, 'loss': 0.4469800889544504, 'time_step': 0.007999431583243356, 'init_value': -17.30426025390625, 'ave_value': -22.964521685646346, 'soft_opc': nan} step=16685




2022-04-22 05:11.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_16685.pt


Epoch 48/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:11.47 [info     ] FQE_20220422050928: epoch=48 step=17040 epoch=48 metrics={'time_sample_batch': 0.00018077232468296104, 'time_algorithm_update': 0.0076320077331972795, 'loss': 0.47644622695540456, 'time_step': 0.007894164071956151, 'init_value': -16.777950286865234, 'ave_value': -22.506213399811983, 'soft_opc': nan} step=17040




2022-04-22 05:11.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_17040.pt


Epoch 49/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:11.50 [info     ] FQE_20220422050928: epoch=49 step=17395 epoch=49 metrics={'time_sample_batch': 0.00018507326152962698, 'time_algorithm_update': 0.0073967463533643266, 'loss': 0.4645283582817081, 'time_step': 0.007661505820046008, 'init_value': -16.487529754638672, 'ave_value': -22.382956968963224, 'soft_opc': nan} step=17395




2022-04-22 05:11.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_17395.pt


Epoch 50/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 05:11.53 [info     ] FQE_20220422050928: epoch=50 step=17750 epoch=50 metrics={'time_sample_batch': 0.00018176025068256216, 'time_algorithm_update': 0.007847105617254553, 'loss': 0.4728080188855529, 'time_step': 0.008109573579170335, 'init_value': -16.90799331665039, 'ave_value': -22.74485133877132, 'soft_opc': nan} step=17750




2022-04-22 05:11.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422050928/model_17750.pt
search iteration:  21
using hyper params:  [4.21349016227252e-05, 0.004084358066818564, 4.4615450525265395e-05, 1]
2022-04-22 05:11.53 [debug    ] RoundIterator is selected.
2022-04-22 05:11.53 [info     ] Directory is created at d3rlpy_logs/CQL_20220422051153
2022-04-22 05:11.53 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 05:11.53 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 05:11.53 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422051153/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 4.21349016227252e-05, 'actor_optim_factory': {'opti

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:12.13 [info     ] CQL_20220422051153: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.0003493052686570008, 'time_algorithm_update': 0.05538698427939002, 'temp_loss': 3.464365387238519, 'temp': 0.9921454677002968, 'alpha_loss': -17.009036273625544, 'alpha': 1.017675700904317, 'critic_loss': 25.447690941694844, 'actor_loss': -0.36259888668108536, 'time_step': 0.055839421432142315, 'td_error': 1.2144252836278029, 'init_value': 0.17257966101169586, 'ave_value': 0.37059895732321263} step=346
2022-04-22 05:12.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:12.33 [info     ] CQL_20220422051153: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.00034733797084389396, 'time_algorithm_update': 0.05583777868678804, 'temp_loss': 3.7833954474829525, 'temp': 0.9762559474548164, 'alpha_loss': -17.80748722318969, 'alpha': 1.054268196483568, 'critic_loss': 30.037711534886, 'actor_loss': -0.5367192015740913, 'time_step': 0.056288289196918466, 'td_error': 1.2045621635179486, 'init_value': -0.11004071682691574, 'ave_value': 0.2106630042545063} step=692
2022-04-22 05:12.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:12.54 [info     ] CQL_20220422051153: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.00037627826536321916, 'time_algorithm_update': 0.05590656589221403, 'temp_loss': 3.943350089078694, 'temp': 0.9604777414674703, 'alpha_loss': -18.565139346040052, 'alpha': 1.0928042296729337, 'critic_loss': 39.52928792258908, 'actor_loss': -0.3712722924672541, 'time_step': 0.056388346446042806, 'td_error': 1.2019076129197654, 'init_value': -0.643660306930542, 'ave_value': -0.22028143871531433} step=1038
2022-04-22 05:12.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:13.14 [info     ] CQL_20220422051153: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.00034614587794838615, 'time_algorithm_update': 0.05552255969516115, 'temp_loss': 4.060883174741888, 'temp': 0.9449773442883023, 'alpha_loss': -19.344499009193022, 'alpha': 1.1332887652292416, 'critic_loss': 51.48499569314064, 'actor_loss': -0.043762630411569095, 'time_step': 0.05597172031512839, 'td_error': 1.2016385589044185, 'init_value': -0.8894607424736023, 'ave_value': -0.4363323323396836} step=1384
2022-04-22 05:13.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:13.34 [info     ] CQL_20220422051153: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.0003507523178365189, 'time_algorithm_update': 0.0559559557479241, 'temp_loss': 4.120890710395195, 'temp': 0.9297956832916061, 'alpha_loss': -20.138530400447074, 'alpha': 1.175739713142373, 'critic_loss': 65.18986483667628, 'actor_loss': 0.3607380109263121, 'time_step': 0.056410421525811875, 'td_error': 1.2074944073174285, 'init_value': -1.2730607986450195, 'ave_value': -0.7850974904637361} step=1730
2022-04-22 05:13.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:13.55 [info     ] CQL_20220422051153: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.00035317509160565503, 'time_algorithm_update': 0.05598389206594125, 'temp_loss': 4.153938044702387, 'temp': 0.9149979270262525, 'alpha_loss': -20.949480613532096, 'alpha': 1.2201646945380062, 'critic_loss': 80.83412543059774, 'actor_loss': 0.7433236938594394, 'time_step': 0.056441287084811, 'td_error': 1.2113256707571451, 'init_value': -1.9143664836883545, 'ave_value': -1.3643632193674913} step=2076
2022-04-22 05:13.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:14.15 [info     ] CQL_20220422051153: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.0003560960637351681, 'time_algorithm_update': 0.05631483014608394, 'temp_loss': 4.149150507298508, 'temp': 0.9005901622289867, 'alpha_loss': -21.792734978515977, 'alpha': 1.2665950042663972, 'critic_loss': 98.57094951034281, 'actor_loss': 1.0462133984345232, 'time_step': 0.05677302379828657, 'td_error': 1.2104402192120751, 'init_value': -2.012164354324341, 'ave_value': -1.5091494166268846} step=2422
2022-04-22 05:14.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:14.36 [info     ] CQL_20220422051153: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.00035631656646728516, 'time_algorithm_update': 0.05652453375689556, 'temp_loss': 4.14283631440532, 'temp': 0.8865506571496842, 'alpha_loss': -22.665986959644826, 'alpha': 1.3150672478482903, 'critic_loss': 119.60128129286574, 'actor_loss': 1.2051676686616302, 'time_step': 0.05697745601565852, 'td_error': 1.2209632240064068, 'init_value': -2.42789626121521, 'ave_value': -1.893229197147278} step=2768
2022-04-22 05:14.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:14.56 [info     ] CQL_20220422051153: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.00034908200964073224, 'time_algorithm_update': 0.05654960353939519, 'temp_loss': 4.127380698402493, 'temp': 0.8728437528789388, 'alpha_loss': -23.56224037457064, 'alpha': 1.3656178185705505, 'critic_loss': 147.93999150447073, 'actor_loss': 0.9861628613461649, 'time_step': 0.05699751556264183, 'td_error': 1.2089556825811418, 'init_value': -1.6174927949905396, 'ave_value': -1.250895072357979} step=3114
2022-04-22 05:14.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:15.16 [info     ] CQL_20220422051153: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.0003657306549866075, 'time_algorithm_update': 0.056148839134701416, 'temp_loss': 4.104183527086511, 'temp': 0.8594346814761961, 'alpha_loss': -24.49667578349913, 'alpha': 1.418274982816222, 'critic_loss': 190.05979522528676, 'actor_loss': 0.33664975437534855, 'time_step': 0.05662052135247027, 'td_error': 1.2113810483637373, 'init_value': -1.1363741159439087, 'ave_value': -0.8233621016307259} step=3460
2022-04-22 05:15.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:15.37 [info     ] CQL_20220422051153: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.00036311976482413406, 'time_algorithm_update': 0.055520762597894395, 'temp_loss': 4.072203177248122, 'temp': 0.846314555820013, 'alpha_loss': -25.457061249396705, 'alpha': 1.4731042216278913, 'critic_loss': 245.20369045720625, 'actor_loss': -0.5020418956531265, 'time_step': 0.05598598615282533, 'td_error': 1.2186714073466736, 'init_value': -0.3994705379009247, 'ave_value': -0.21258725726587505} step=3806
2022-04-22 05:15.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:15.57 [info     ] CQL_20220422051153: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.0003676786588106541, 'time_algorithm_update': 0.055646888782523274, 'temp_loss': 4.036037773066173, 'temp': 0.8334746155780175, 'alpha_loss': -26.468484294207798, 'alpha': 1.5301662108112621, 'critic_loss': 307.4649574412087, 'actor_loss': -1.3558846715557782, 'time_step': 0.05611733273963708, 'td_error': 1.2271909618088148, 'init_value': 0.3521082103252411, 'ave_value': 0.44542698431715916} step=4152
2022-04-22 05:15.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:16.17 [info     ] CQL_20220422051153: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.00035237921455692, 'time_algorithm_update': 0.055979738345725, 'temp_loss': 3.993072076339942, 'temp': 0.8208971736748094, 'alpha_loss': -27.50242384320739, 'alpha': 1.5895216561466283, 'critic_loss': 369.36781540358, 'actor_loss': -2.0637193866547823, 'time_step': 0.05643930118207987, 'td_error': 1.2315155890910012, 'init_value': 0.9773885011672974, 'ave_value': 1.0208894478703856} step=4498
2022-04-22 05:16.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:16.38 [info     ] CQL_20220422051153: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.00036104221564496874, 'time_algorithm_update': 0.05601483066647039, 'temp_loss': 3.951286081633816, 'temp': 0.8085658150601249, 'alpha_loss': -28.589074895561087, 'alpha': 1.6512459826607235, 'critic_loss': 430.41330070716106, 'actor_loss': -2.640755694036539, 'time_step': 0.056478986161292634, 'td_error': 1.2325857611028486, 'init_value': 1.413902759552002, 'ave_value': 1.4523724992787346} step=4844
2022-04-22 05:16.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:16.57 [info     ] CQL_20220422051153: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.0003451611954352759, 'time_algorithm_update': 0.0530399766271514, 'temp_loss': 3.9106609221827777, 'temp': 0.7964541521031043, 'alpha_loss': -29.706402971565378, 'alpha': 1.7154208318346498, 'critic_loss': 492.62889928211365, 'actor_loss': -3.1934982310829825, 'time_step': 0.053484352337831706, 'td_error': 1.234383139100879, 'init_value': 2.003037214279175, 'ave_value': 2.0214434660345866} step=5190
2022-04-22 05:16.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:17.15 [info     ] CQL_20220422051153: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.00033993045718683673, 'time_algorithm_update': 0.051122723287240615, 'temp_loss': 3.8645639061238723, 'temp': 0.7845571697447341, 'alpha_loss': -30.87865467292036, 'alpha': 1.7821273410940446, 'critic_loss': 561.6121506883919, 'actor_loss': -3.7093656435178195, 'time_step': 0.05156560578098187, 'td_error': 1.234927659682427, 'init_value': 2.5031869411468506, 'ave_value': 2.51499183454916} step=5536
2022-04-22 05:17.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:17.33 [info     ] CQL_20220422051153: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.0003452239008997217, 'time_algorithm_update': 0.04977611379127282, 'temp_loss': 3.8167924288380353, 'temp': 0.7728747828847411, 'alpha_loss': -32.08620447368291, 'alpha': 1.8514636162388531, 'critic_loss': 639.182210580462, 'actor_loss': -4.185760492534307, 'time_step': 0.050225016009600866, 'td_error': 1.2355982523117615, 'init_value': 3.004176616668701, 'ave_value': 3.0153804710093355} step=5882
2022-04-22 05:17.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:17.51 [info     ] CQL_20220422051153: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.00034284660581908473, 'time_algorithm_update': 0.049870931344225225, 'temp_loss': 3.7687146953075605, 'temp': 0.7613943348385696, 'alpha_loss': -33.3537733485933, 'alpha': 1.923526279499076, 'critic_loss': 729.7105820495958, 'actor_loss': -4.644198989592535, 'time_step': 0.05031797031446689, 'td_error': 1.236713295977886, 'init_value': 3.4781200885772705, 'ave_value': 3.487726701747303} step=6228
2022-04-22 05:17.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:18.10 [info     ] CQL_20220422051153: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.00033609164243488643, 'time_algorithm_update': 0.04990666863546206, 'temp_loss': 3.721557397373839, 'temp': 0.7501013685513094, 'alpha_loss': -34.65450176613869, 'alpha': 1.9983991746268521, 'critic_loss': 832.0842191663091, 'actor_loss': -5.113042310483194, 'time_step': 0.050344598086583134, 'td_error': 1.2366271353280647, 'init_value': 3.862581729888916, 'ave_value': 3.872314207256547} step=6574
2022-04-22 05:18.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:18.28 [info     ] CQL_20220422051153: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.00034818139379424166, 'time_algorithm_update': 0.05032409546692247, 'temp_loss': 3.6762288884620444, 'temp': 0.7389925902633998, 'alpha_loss': -36.00584192220875, 'alpha': 2.076197620072117, 'critic_loss': 946.1226679631052, 'actor_loss': -5.550943240954008, 'time_step': 0.050773721898911314, 'td_error': 1.2389759627103938, 'init_value': 4.348001003265381, 'ave_value': 4.354239418338181} step=6920
2022-04-22 05:18.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:18.46 [info     ] CQL_20220422051153: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.0003456938473475462, 'time_algorithm_update': 0.04996187631794483, 'temp_loss': 3.625895389242668, 'temp': 0.7280610544833145, 'alpha_loss': -37.422772710722995, 'alpha': 2.157032425693005, 'critic_loss': 1072.393553805489, 'actor_loss': -5.946079835726347, 'time_step': 0.05040513297725964, 'td_error': 1.2396524071341883, 'init_value': 4.694789886474609, 'ave_value': 4.703615487746686} step=7266
2022-04-22 05:18.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:19.04 [info     ] CQL_20220422051153: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.0003448152817742673, 'time_algorithm_update': 0.049705160835574816, 'temp_loss': 3.578362517274184, 'temp': 0.7173069143570916, 'alpha_loss': -38.89002761399815, 'alpha': 2.241026320898464, 'critic_loss': 1212.4120878539334, 'actor_loss': -6.2926574469990815, 'time_step': 0.05014574252112063, 'td_error': 1.241594138166406, 'init_value': 5.046350002288818, 'ave_value': 5.055769199147051} step=7612
2022-04-22 05:19.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:19.22 [info     ] CQL_20220422051153: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.0003397051309574546, 'time_algorithm_update': 0.050053909334833224, 'temp_loss': 3.5301104687541898, 'temp': 0.7067180704863775, 'alpha_loss': -40.41223105943272, 'alpha': 2.328292057693349, 'critic_loss': 1363.025399092305, 'actor_loss': -6.599574921448107, 'time_step': 0.05049034970344147, 'td_error': 1.244873168254902, 'init_value': 5.450276851654053, 'ave_value': 5.45385278830205} step=7958
2022-04-22 05:19.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:19.40 [info     ] CQL_20220422051153: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.00034070566210443574, 'time_algorithm_update': 0.0497699135300741, 'temp_loss': 3.484680844869228, 'temp': 0.6962942789400244, 'alpha_loss': -41.99167145745603, 'alpha': 2.4189594419016314, 'critic_loss': 1523.7992088009166, 'actor_loss': -6.912832537138393, 'time_step': 0.05020873395004714, 'td_error': 1.245197384546369, 'init_value': 5.682022571563721, 'ave_value': 5.691179840414374} step=8304
2022-04-22 05:19.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:19.58 [info     ] CQL_20220422051153: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.0003393909145641878, 'time_algorithm_update': 0.04928603544400607, 'temp_loss': 3.436173675377245, 'temp': 0.686029650916943, 'alpha_loss': -43.637943532425545, 'alpha': 2.5131587665205055, 'critic_loss': 1698.877410006661, 'actor_loss': -7.203907954210491, 'time_step': 0.04972265566015519, 'td_error': 1.248004369294124, 'init_value': 6.037937164306641, 'ave_value': 6.043323785448142} step=8650
2022-04-22 05:19.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:20.16 [info     ] CQL_20220422051153: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.00033988980199560264, 'time_algorithm_update': 0.04924764316206034, 'temp_loss': 3.390086072028717, 'temp': 0.6759223293706861, 'alpha_loss': -45.35124527374444, 'alpha': 2.61103568325153, 'critic_loss': 1886.8161797495936, 'actor_loss': -7.483145461606153, 'time_step': 0.049681679361817464, 'td_error': 1.2499035720849048, 'init_value': 6.296735763549805, 'ave_value': 6.3035319499463025} step=8996
2022-04-22 05:20.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:20.33 [info     ] CQL_20220422051153: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.0003422533156554823, 'time_algorithm_update': 0.047103066664899704, 'temp_loss': 3.341907534296113, 'temp': 0.665969479463004, 'alpha_loss': -47.120248640203755, 'alpha': 2.712715600267311, 'critic_loss': 2085.9373303010975, 'actor_loss': -7.696729460203579, 'time_step': 0.047539622108371274, 'td_error': 1.2517646888052174, 'init_value': 6.534512519836426, 'ave_value': 6.538905580465462} step=9342
2022-04-22 05:20.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:20.50 [info     ] CQL_20220422051153: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.0003470189309533621, 'time_algorithm_update': 0.04714215146323849, 'temp_loss': 3.2972663923495076, 'temp': 0.6561689066749088, 'alpha_loss': -48.96020965355669, 'alpha': 2.8183512260459063, 'critic_loss': 2302.9155280493587, 'actor_loss': -7.935166455417699, 'time_step': 0.047585723028017606, 'td_error': 1.2532891519059204, 'init_value': 6.748620510101318, 'ave_value': 6.753921904276761} step=9688
2022-04-22 05:20.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:21.07 [info     ] CQL_20220422051153: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.0003352323708506678, 'time_algorithm_update': 0.04668255830775796, 'temp_loss': 3.24859360187729, 'temp': 0.6465171301640527, 'alpha_loss': -50.87503658140326, 'alpha': 2.9280978275861353, 'critic_loss': 2524.136933255058, 'actor_loss': -8.134563516330168, 'time_step': 0.04711878230806031, 'td_error': 1.2558555849185091, 'init_value': 6.9974589347839355, 'ave_value': 7.000487877441956} step=10034
2022-04-22 05:21.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:21.23 [info     ] CQL_20220422051153: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.0003304198987222131, 'time_algorithm_update': 0.04531426788065475, 'temp_loss': 3.204536647465877, 'temp': 0.6370108716749732, 'alpha_loss': -52.86773882298111, 'alpha': 3.0421148070021173, 'critic_loss': 2727.842935727511, 'actor_loss': -8.331983731661229, 'time_step': 0.04574051344325777, 'td_error': 1.2586575481105877, 'init_value': 7.243422508239746, 'ave_value': 7.2435212696033835} step=10380
2022-04-22 05:21.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:21.41 [info     ] CQL_20220422051153: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.00033543840309098965, 'time_algorithm_update': 0.0477029925825968, 'temp_loss': 3.158244050307081, 'temp': 0.6276477487445566, 'alpha_loss': -54.93002954383806, 'alpha': 3.1605685632352882, 'critic_loss': 2951.8856441078847, 'actor_loss': -8.473842593286768, 'time_step': 0.048134180162683386, 'td_error': 1.2589939700013535, 'init_value': 7.368810653686523, 'ave_value': 7.372871551359927} step=10726
2022-04-22 05:21.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:21.58 [info     ] CQL_20220422051153: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.00034033976538332903, 'time_algorithm_update': 0.046910735913094755, 'temp_loss': 3.1138159050417773, 'temp': 0.6184247706322311, 'alpha_loss': -57.074100758988045, 'alpha': 3.2836306977134218, 'critic_loss': 3150.465485853956, 'actor_loss': -8.651452378730554, 'time_step': 0.047350751871318486, 'td_error': 1.2611152887828438, 'init_value': 7.562370777130127, 'ave_value': 7.564914655572377} step=11072
2022-04-22 05:21.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:22.15 [info     ] CQL_20220422051153: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.0003333222659337038, 'time_algorithm_update': 0.04708483177802466, 'temp_loss': 3.0695388537610886, 'temp': 0.6093398858012492, 'alpha_loss': -59.29996024115237, 'alpha': 3.411485757441879, 'critic_loss': 3221.5372342677474, 'actor_loss': -8.892534018941008, 'time_step': 0.047516086198001926, 'td_error': 1.264909544625811, 'init_value': 7.8703293800354, 'ave_value': 7.87018067281695} step=11418
2022-04-22 05:22.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:22.32 [info     ] CQL_20220422051153: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.0003380175959857213, 'time_algorithm_update': 0.047408065354892975, 'temp_loss': 3.0260664610504415, 'temp': 0.6003889937621321, 'alpha_loss': -61.61964680004671, 'alpha': 3.544313855942963, 'critic_loss': 3081.8851367751986, 'actor_loss': -9.122202746440909, 'time_step': 0.04784386006393874, 'td_error': 1.2663628488744685, 'init_value': 8.040980339050293, 'ave_value': 8.041510317754044} step=11764
2022-04-22 05:22.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:22.49 [info     ] CQL_20220422051153: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.00033952114899034444, 'time_algorithm_update': 0.04654074817723622, 'temp_loss': 2.9830916499815925, 'temp': 0.5915702496994437, 'alpha_loss': -64.02377295080638, 'alpha': 3.6823144041733933, 'critic_loss': 2985.310829824106, 'actor_loss': -9.312909831890481, 'time_step': 0.04697527086114608, 'td_error': 1.2695894980938371, 'init_value': 8.274864196777344, 'ave_value': 8.272737395882098} step=12110
2022-04-22 05:22.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:23.06 [info     ] CQL_20220422051153: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.0003433964845073016, 'time_algorithm_update': 0.04723819211728311, 'temp_loss': 2.939624368110833, 'temp': 0.5828843902301237, 'alpha_loss': -66.51418710168386, 'alpha': 3.8256802427975427, 'critic_loss': 2582.046180680997, 'actor_loss': -9.683987297763714, 'time_step': 0.047680025155833694, 'td_error': 1.273695303504993, 'init_value': 8.618412971496582, 'ave_value': 8.614774871923846} step=12456
2022-04-22 05:23.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:23.23 [info     ] CQL_20220422051153: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.0003465717238497872, 'time_algorithm_update': 0.046719087341617296, 'temp_loss': 2.8968496164145496, 'temp': 0.5743278180588187, 'alpha_loss': -69.10344078637272, 'alpha': 3.974625135432778, 'critic_loss': 2308.175213234962, 'actor_loss': -9.91416810427098, 'time_step': 0.04716413214027537, 'td_error': 1.2768418979447955, 'init_value': 8.879931449890137, 'ave_value': 8.876106642184522} step=12802
2022-04-22 05:23.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:23.40 [info     ] CQL_20220422051153: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.00033186143533342835, 'time_algorithm_update': 0.04600089340540715, 'temp_loss': 2.8548849283615287, 'temp': 0.5658986268705026, 'alpha_loss': -71.78858275220573, 'alpha': 4.129362952502476, 'critic_loss': 2122.710545181539, 'actor_loss': -10.133232860895939, 'time_step': 0.046425964790961644, 'td_error': 1.281297476920459, 'init_value': 9.199357032775879, 'ave_value': 9.192067985932598} step=13148
2022-04-22 05:23.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:23.57 [info     ] CQL_20220422051153: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.0003341760249496195, 'time_algorithm_update': 0.04690709141637549, 'temp_loss': 2.8131788500471613, 'temp': 0.5575954914093018, 'alpha_loss': -74.5900487072895, 'alpha': 4.290120317756785, 'critic_loss': 1945.2590952966943, 'actor_loss': -10.359323264546477, 'time_step': 0.047330857012313224, 'td_error': 1.280313002797397, 'init_value': 9.270467758178711, 'ave_value': 9.273331888008027} step=13494
2022-04-22 05:23.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:24.14 [info     ] CQL_20220422051153: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.000337741967570575, 'time_algorithm_update': 0.0469878732813576, 'temp_loss': 2.7727084194304625, 'temp': 0.5494146488305461, 'alpha_loss': -77.4974078801326, 'alpha': 4.457139584370431, 'critic_loss': 1922.6300796773392, 'actor_loss': -10.51009894519872, 'time_step': 0.047422679862535067, 'td_error': 1.2840209038661727, 'init_value': 9.516773223876953, 'ave_value': 9.517960191801748} step=13840
2022-04-22 05:24.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:24.31 [info     ] CQL_20220422051153: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.0003714575243823101, 'time_algorithm_update': 0.047248287007987846, 'temp_loss': 2.7329245468095547, 'temp': 0.5413535845417508, 'alpha_loss': -80.5148027012114, 'alpha': 4.630664549811038, 'critic_loss': 1869.408624020615, 'actor_loss': -10.708570055878921, 'time_step': 0.04771276360991373, 'td_error': 1.2860269315352126, 'init_value': 9.695967674255371, 'ave_value': 9.699102100753965} step=14186
2022-04-22 05:24.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:24.48 [info     ] CQL_20220422051153: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.000346033559369214, 'time_algorithm_update': 0.04693042680707281, 'temp_loss': 2.6930442717723073, 'temp': 0.5334106187599932, 'alpha_loss': -83.6557022050626, 'alpha': 4.810941386085025, 'critic_loss': 1838.397953099598, 'actor_loss': -10.945021993163005, 'time_step': 0.04737345951830031, 'td_error': 1.2873131465281247, 'init_value': 9.857407569885254, 'ave_value': 9.865301672165401} step=14532
2022-04-22 05:24.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:25.05 [info     ] CQL_20220422051153: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.00033646649707948543, 'time_algorithm_update': 0.04722231385335757, 'temp_loss': 2.652726994084485, 'temp': 0.525587456247021, 'alpha_loss': -86.90671232807843, 'alpha': 4.99823564325454, 'critic_loss': 1972.4656110995077, 'actor_loss': -11.065197605618163, 'time_step': 0.047659490149834255, 'td_error': 1.2933993944432365, 'init_value': 10.175849914550781, 'ave_value': 10.176942210697122} step=14878
2022-04-22 05:25.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:25.22 [info     ] CQL_20220422051153: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.0003432848549991674, 'time_algorithm_update': 0.047259658747325746, 'temp_loss': 2.614504551611884, 'temp': 0.5178804390692298, 'alpha_loss': -90.28900675690932, 'alpha': 5.192821131965329, 'critic_loss': 2078.4154430235053, 'actor_loss': -11.266272081805102, 'time_step': 0.04769755029953973, 'td_error': 1.2955202247598676, 'init_value': 10.353941917419434, 'ave_value': 10.35776068346938} step=15224
2022-04-22 05:25.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:25.39 [info     ] CQL_20220422051153: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.00035635859980059497, 'time_algorithm_update': 0.04763769896733278, 'temp_loss': 2.576662193143988, 'temp': 0.5102838224413767, 'alpha_loss': -93.81723064356457, 'alpha': 5.394982712806304, 'critic_loss': 2204.9846565378884, 'actor_loss': -11.476216881261395, 'time_step': 0.0480901547250031, 'td_error': 1.299237109007395, 'init_value': 10.573051452636719, 'ave_value': 10.574863950151467} step=15570
2022-04-22 05:25.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:25.56 [info     ] CQL_20220422051153: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.00033639276647843376, 'time_algorithm_update': 0.04680964988091089, 'temp_loss': 2.5384728639801115, 'temp': 0.5028008803122306, 'alpha_loss': -97.45435670621133, 'alpha': 5.605004324389331, 'critic_loss': 2309.955728696261, 'actor_loss': -11.624357466063747, 'time_step': 0.04724253739924789, 'td_error': 1.3022310130791417, 'init_value': 10.746577262878418, 'ave_value': 10.74720349603493} step=15916
2022-04-22 05:25.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:26.13 [info     ] CQL_20220422051153: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.0003414533041805201, 'time_algorithm_update': 0.04734962730738469, 'temp_loss': 2.5010604596551445, 'temp': 0.4954280059117113, 'alpha_loss': -101.25888879588574, 'alpha': 5.823188441337188, 'critic_loss': 2427.051936331512, 'actor_loss': -11.79538928291012, 'time_step': 0.0477868387464843, 'td_error': 1.3024545261735758, 'init_value': 10.833963394165039, 'ave_value': 10.839425010735415} step=16262
2022-04-22 05:26.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:26.30 [info     ] CQL_20220422051153: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.0003465393375110075, 'time_algorithm_update': 0.04741192139642087, 'temp_loss': 2.4649465952305434, 'temp': 0.48816331049610423, 'alpha_loss': -105.19585707559752, 'alpha': 6.0498800112332916, 'critic_loss': 2556.5897922405616, 'actor_loss': -11.934391291844362, 'time_step': 0.0478510856628418, 'td_error': 1.3089833979387955, 'init_value': 11.125404357910156, 'ave_value': 11.123560039598042} step=16608
2022-04-22 05:26.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:26.47 [info     ] CQL_20220422051153: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.0003516081440655482, 'time_algorithm_update': 0.047068890808634675, 'temp_loss': 2.4287179164114714, 'temp': 0.48100554831110676, 'alpha_loss': -109.3008457911497, 'alpha': 6.285399755301503, 'critic_loss': 2536.657651338963, 'actor_loss': -12.126027374598332, 'time_step': 0.0475122460051079, 'td_error': 1.3109870455101984, 'init_value': 11.29199504852295, 'ave_value': 11.293645931522446} step=16954
2022-04-22 05:26.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:27.05 [info     ] CQL_20220422051153: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.0003432621156549178, 'time_algorithm_update': 0.04740732046910104, 'temp_loss': 2.3937913682419443, 'temp': 0.47395136633704854, 'alpha_loss': -113.56846497375841, 'alpha': 6.5301075381350655, 'critic_loss': 2434.5017223909413, 'actor_loss': -12.329191681966616, 'time_step': 0.04784030996995165, 'td_error': 1.3171575614624782, 'init_value': 11.571850776672363, 'ave_value': 11.567842778304902} step=17300
2022-04-22 05:27.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422051153/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 05:27.06 [info     ] FQE_20220422052705: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00015867043690509107, 'time_algorithm_update': 0.005168600254748241, 'loss': 0.005982864307273314, 'time_step': 0.005399082080427423, 'init_value': -0.2992297112941742, 'ave_value': -0.30442187994181574, 'soft_opc': nan} step=166




2022-04-22 05:27.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.07 [info     ] FQE_20220422052705: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00016093828591955714, 'time_algorithm_update': 0.0051615051476352185, 'loss': 0.004154764697887003, 'time_step': 0.005395933806178081, 'init_value': -0.33853304386138916, 'ave_value': -0.3276191473678426, 'soft_opc': nan} step=332




2022-04-22 05:27.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.08 [info     ] FQE_20220422052705: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00016168370304337466, 'time_algorithm_update': 0.005177924431950213, 'loss': 0.0036116319327020503, 'time_step': 0.005415968148105116, 'init_value': -0.36046159267425537, 'ave_value': -0.346874374527115, 'soft_opc': nan} step=498




2022-04-22 05:27.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.09 [info     ] FQE_20220422052705: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00017611089959202042, 'time_algorithm_update': 0.005431277206145137, 'loss': 0.0032820436319459722, 'time_step': 0.005685079528624753, 'init_value': -0.39541518688201904, 'ave_value': -0.37632035739250014, 'soft_opc': nan} step=664




2022-04-22 05:27.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.10 [info     ] FQE_20220422052705: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00016342731843511742, 'time_algorithm_update': 0.0052663604897188855, 'loss': 0.0029187179416002907, 'time_step': 0.00550614494875253, 'init_value': -0.4063090682029724, 'ave_value': -0.38671903747830305, 'soft_opc': nan} step=830




2022-04-22 05:27.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.11 [info     ] FQE_20220422052705: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00015905678990375563, 'time_algorithm_update': 0.005073028874684529, 'loss': 0.0024802416145060973, 'time_step': 0.005300372479909874, 'init_value': -0.43013083934783936, 'ave_value': -0.4077668695098108, 'soft_opc': nan} step=996




2022-04-22 05:27.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.12 [info     ] FQE_20220422052705: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.0001598912549306111, 'time_algorithm_update': 0.005178995879299669, 'loss': 0.002277524767342551, 'time_step': 0.0054058755736753165, 'init_value': -0.4656825065612793, 'ave_value': -0.43393681244337345, 'soft_opc': nan} step=1162




2022-04-22 05:27.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.13 [info     ] FQE_20220422052705: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00016859496932431875, 'time_algorithm_update': 0.005281847643564983, 'loss': 0.0019444702059894532, 'time_step': 0.00552612327667604, 'init_value': -0.4689193665981293, 'ave_value': -0.427752303692873, 'soft_opc': nan} step=1328




2022-04-22 05:27.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.14 [info     ] FQE_20220422052705: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00015901944723473974, 'time_algorithm_update': 0.004623542349022555, 'loss': 0.0016803453923259154, 'time_step': 0.004853172474596874, 'init_value': -0.4908847212791443, 'ave_value': -0.43992669506308935, 'soft_opc': nan} step=1494




2022-04-22 05:27.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.15 [info     ] FQE_20220422052705: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00016032213188079466, 'time_algorithm_update': 0.005123112575117364, 'loss': 0.0018139882977485254, 'time_step': 0.005349623151572354, 'init_value': -0.5498921871185303, 'ave_value': -0.48013759552467583, 'soft_opc': nan} step=1660




2022-04-22 05:27.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.16 [info     ] FQE_20220422052705: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.0001651508262358516, 'time_algorithm_update': 0.005206179906086749, 'loss': 0.0017309547447548978, 'time_step': 0.0054470142686223405, 'init_value': -0.5988627672195435, 'ave_value': -0.5157911579250484, 'soft_opc': nan} step=1826




2022-04-22 05:27.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.17 [info     ] FQE_20220422052705: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00016108047531311772, 'time_algorithm_update': 0.005313864673476621, 'loss': 0.001708116655340362, 'time_step': 0.00554685276674937, 'init_value': -0.6346317529678345, 'ave_value': -0.5412610628797362, 'soft_opc': nan} step=1992




2022-04-22 05:27.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.18 [info     ] FQE_20220422052705: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00016183450997593892, 'time_algorithm_update': 0.005124897841947624, 'loss': 0.0017718865896620035, 'time_step': 0.0053653142538415384, 'init_value': -0.6898066997528076, 'ave_value': -0.5708659565739065, 'soft_opc': nan} step=2158




2022-04-22 05:27.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.19 [info     ] FQE_20220422052705: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00016630126769284168, 'time_algorithm_update': 0.005305396505149014, 'loss': 0.0019176405981107313, 'time_step': 0.005549445209732975, 'init_value': -0.772795557975769, 'ave_value': -0.6208021815992086, 'soft_opc': nan} step=2324




2022-04-22 05:27.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.20 [info     ] FQE_20220422052705: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.0001608607280685241, 'time_algorithm_update': 0.005245724356318095, 'loss': 0.0018519712080800322, 'time_step': 0.005480348345745041, 'init_value': -0.8116344213485718, 'ave_value': -0.6478503269554527, 'soft_opc': nan} step=2490




2022-04-22 05:27.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.21 [info     ] FQE_20220422052705: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00016159752765333797, 'time_algorithm_update': 0.005308306360819253, 'loss': 0.0022699533995375575, 'time_step': 0.005542352975132954, 'init_value': -0.8585395812988281, 'ave_value': -0.6840634858529377, 'soft_opc': nan} step=2656




2022-04-22 05:27.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.22 [info     ] FQE_20220422052705: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00016769012772893332, 'time_algorithm_update': 0.005086851407246417, 'loss': 0.0024104295775339186, 'time_step': 0.005330742123615311, 'init_value': -0.9018433094024658, 'ave_value': -0.7104550151070496, 'soft_opc': nan} step=2822




2022-04-22 05:27.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.23 [info     ] FQE_20220422052705: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00016198388065200253, 'time_algorithm_update': 0.005180884556597974, 'loss': 0.002478572261920872, 'time_step': 0.005418944071574384, 'init_value': -0.9326180815696716, 'ave_value': -0.7358045064684775, 'soft_opc': nan} step=2988




2022-04-22 05:27.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.23 [info     ] FQE_20220422052705: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00016510486602783203, 'time_algorithm_update': 0.004741743386509907, 'loss': 0.002914613269371856, 'time_step': 0.00498471777123141, 'init_value': -0.9559956789016724, 'ave_value': -0.7411660740059708, 'soft_opc': nan} step=3154




2022-04-22 05:27.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.25 [info     ] FQE_20220422052705: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00016723052564873752, 'time_algorithm_update': 0.005388117698301752, 'loss': 0.0030900709743398322, 'time_step': 0.0056328988937010245, 'init_value': -1.0170069932937622, 'ave_value': -0.788655484818459, 'soft_opc': nan} step=3320




2022-04-22 05:27.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.26 [info     ] FQE_20220422052705: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.000161238463528185, 'time_algorithm_update': 0.0051919652755001945, 'loss': 0.0033759037570968964, 'time_step': 0.005423056073935635, 'init_value': -1.037729263305664, 'ave_value': -0.7917379029925812, 'soft_opc': nan} step=3486




2022-04-22 05:27.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.27 [info     ] FQE_20220422052705: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00017121757369443593, 'time_algorithm_update': 0.005295707518795887, 'loss': 0.0036399307578706733, 'time_step': 0.005546064261930534, 'init_value': -1.0930440425872803, 'ave_value': -0.825469167538088, 'soft_opc': nan} step=3652




2022-04-22 05:27.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.27 [info     ] FQE_20220422052705: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00016552856169551252, 'time_algorithm_update': 0.005112040473754148, 'loss': 0.0038900504455724955, 'time_step': 0.005354899957955602, 'init_value': -1.1327402591705322, 'ave_value': -0.8389239705402639, 'soft_opc': nan} step=3818




2022-04-22 05:27.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.28 [info     ] FQE_20220422052705: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00016147113708128412, 'time_algorithm_update': 0.005158240536609328, 'loss': 0.004320186600180242, 'time_step': 0.005395527345588408, 'init_value': -1.1702207326889038, 'ave_value': -0.8748451935848943, 'soft_opc': nan} step=3984




2022-04-22 05:27.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.29 [info     ] FQE_20220422052705: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00016595512987619424, 'time_algorithm_update': 0.0051803287253322375, 'loss': 0.00461071819653987, 'time_step': 0.005423146558095174, 'init_value': -1.2325403690338135, 'ave_value': -0.9193058669315399, 'soft_opc': nan} step=4150




2022-04-22 05:27.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.30 [info     ] FQE_20220422052705: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00016309267067047487, 'time_algorithm_update': 0.0052784566419670385, 'loss': 0.005038762319775534, 'time_step': 0.005518348820238228, 'init_value': -1.2476533651351929, 'ave_value': -0.930879719585583, 'soft_opc': nan} step=4316




2022-04-22 05:27.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.31 [info     ] FQE_20220422052705: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00016314150339149567, 'time_algorithm_update': 0.005127195852348603, 'loss': 0.005491223348604785, 'time_step': 0.005361898835883083, 'init_value': -1.2682185173034668, 'ave_value': -0.9476221214409347, 'soft_opc': nan} step=4482




2022-04-22 05:27.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.32 [info     ] FQE_20220422052705: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00016170381063438323, 'time_algorithm_update': 0.005157225103263396, 'loss': 0.006166570481538481, 'time_step': 0.005394598087632513, 'init_value': -1.3030548095703125, 'ave_value': -0.9710031361730249, 'soft_opc': nan} step=4648




2022-04-22 05:27.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.33 [info     ] FQE_20220422052705: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00015989699995661355, 'time_algorithm_update': 0.004675431423876659, 'loss': 0.006084979870510905, 'time_step': 0.004908866192921099, 'init_value': -1.2944471836090088, 'ave_value': -0.9605718867224012, 'soft_opc': nan} step=4814




2022-04-22 05:27.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.34 [info     ] FQE_20220422052705: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00016920394208057817, 'time_algorithm_update': 0.005232293921780874, 'loss': 0.006552635557060298, 'time_step': 0.005479011190942971, 'init_value': -1.2944307327270508, 'ave_value': -0.9590470882921337, 'soft_opc': nan} step=4980




2022-04-22 05:27.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.35 [info     ] FQE_20220422052705: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00017071057514971997, 'time_algorithm_update': 0.0052435513002326685, 'loss': 0.007339908087454983, 'time_step': 0.0054889400321317, 'init_value': -1.3454160690307617, 'ave_value': -1.017294521077729, 'soft_opc': nan} step=5146




2022-04-22 05:27.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.36 [info     ] FQE_20220422052705: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00016706248363816594, 'time_algorithm_update': 0.004957496401775314, 'loss': 0.0077119518451165435, 'time_step': 0.0051953203706856235, 'init_value': -1.3388111591339111, 'ave_value': -0.9953548138592023, 'soft_opc': nan} step=5312




2022-04-22 05:27.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.37 [info     ] FQE_20220422052705: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00016496124037777084, 'time_algorithm_update': 0.005157249519623906, 'loss': 0.008109649974664852, 'time_step': 0.005396782633769943, 'init_value': -1.343746542930603, 'ave_value': -0.9816574005426856, 'soft_opc': nan} step=5478




2022-04-22 05:27.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.38 [info     ] FQE_20220422052705: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00016352785639016024, 'time_algorithm_update': 0.005244601203734617, 'loss': 0.008355458686759427, 'time_step': 0.005482099142419286, 'init_value': -1.37982177734375, 'ave_value': -1.0155797401359221, 'soft_opc': nan} step=5644




2022-04-22 05:27.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.39 [info     ] FQE_20220422052705: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.0001705310430871435, 'time_algorithm_update': 0.005279653043632048, 'loss': 0.008636772963085144, 'time_step': 0.005525873368044934, 'init_value': -1.4028688669204712, 'ave_value': -1.053084343051756, 'soft_opc': nan} step=5810




2022-04-22 05:27.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.40 [info     ] FQE_20220422052705: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.0001679299825645355, 'time_algorithm_update': 0.00518472654273711, 'loss': 0.009291035912829978, 'time_step': 0.005424770964197366, 'init_value': -1.3948851823806763, 'ave_value': -1.0124197593369932, 'soft_opc': nan} step=5976




2022-04-22 05:27.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.41 [info     ] FQE_20220422052705: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00016575979899211102, 'time_algorithm_update': 0.005251568484019084, 'loss': 0.00982621363160784, 'time_step': 0.005495022578411792, 'init_value': -1.4888603687286377, 'ave_value': -1.1108892384916544, 'soft_opc': nan} step=6142




2022-04-22 05:27.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.42 [info     ] FQE_20220422052705: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.0001661088093217597, 'time_algorithm_update': 0.004672034677252712, 'loss': 0.01058656243706986, 'time_step': 0.004912715360342738, 'init_value': -1.571489691734314, 'ave_value': -1.1819197518700684, 'soft_opc': nan} step=6308




2022-04-22 05:27.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.43 [info     ] FQE_20220422052705: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.0001648923000657415, 'time_algorithm_update': 0.005178455846855439, 'loss': 0.010950903039697715, 'time_step': 0.005411867635795869, 'init_value': -1.5897116661071777, 'ave_value': -1.201677455442647, 'soft_opc': nan} step=6474




2022-04-22 05:27.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.44 [info     ] FQE_20220422052705: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00016875870256538852, 'time_algorithm_update': 0.00529740661023611, 'loss': 0.011307351447568914, 'time_step': 0.00554062946733222, 'init_value': -1.5245718955993652, 'ave_value': -1.1270896327488862, 'soft_opc': nan} step=6640




2022-04-22 05:27.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.45 [info     ] FQE_20220422052705: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.0001644441880375506, 'time_algorithm_update': 0.00515650841126959, 'loss': 0.012121869492728869, 'time_step': 0.005390717322567859, 'init_value': -1.5388767719268799, 'ave_value': -1.1448142885196921, 'soft_opc': nan} step=6806




2022-04-22 05:27.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.46 [info     ] FQE_20220422052705: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.0001691091491515378, 'time_algorithm_update': 0.005327931369643614, 'loss': 0.012126501555644334, 'time_step': 0.005569189427846886, 'init_value': -1.576487421989441, 'ave_value': -1.1772988242785203, 'soft_opc': nan} step=6972




2022-04-22 05:27.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.47 [info     ] FQE_20220422052705: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00016625817999782333, 'time_algorithm_update': 0.005296259041292122, 'loss': 0.012684820200238063, 'time_step': 0.005534722144345203, 'init_value': -1.5943995714187622, 'ave_value': -1.197656347008573, 'soft_opc': nan} step=7138




2022-04-22 05:27.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.48 [info     ] FQE_20220422052705: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.0001662079110203019, 'time_algorithm_update': 0.005119375435702772, 'loss': 0.013502028439031264, 'time_step': 0.005359879459243223, 'init_value': -1.5703415870666504, 'ave_value': -1.1946347358258995, 'soft_opc': nan} step=7304




2022-04-22 05:27.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.49 [info     ] FQE_20220422052705: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016824308648166885, 'time_algorithm_update': 0.005366713167673134, 'loss': 0.013728626776158428, 'time_step': 0.005613597042589302, 'init_value': -1.5540595054626465, 'ave_value': -1.147370737035935, 'soft_opc': nan} step=7470




2022-04-22 05:27.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.50 [info     ] FQE_20220422052705: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00016809658831860646, 'time_algorithm_update': 0.005267252405005765, 'loss': 0.014853565263946215, 'time_step': 0.005509846181754607, 'init_value': -1.5980921983718872, 'ave_value': -1.185482012044202, 'soft_opc': nan} step=7636




2022-04-22 05:27.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.51 [info     ] FQE_20220422052705: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00016647792724241693, 'time_algorithm_update': 0.005225516227354486, 'loss': 0.015310163698748248, 'time_step': 0.005469632435993976, 'init_value': -1.5986738204956055, 'ave_value': -1.1938712514701997, 'soft_opc': nan} step=7802




2022-04-22 05:27.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.52 [info     ] FQE_20220422052705: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00015737636979803982, 'time_algorithm_update': 0.004525257880429187, 'loss': 0.015768070685598684, 'time_step': 0.004757593913250659, 'init_value': -1.6087749004364014, 'ave_value': -1.1923824961993608, 'soft_opc': nan} step=7968




2022-04-22 05:27.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.53 [info     ] FQE_20220422052705: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00016190775905747012, 'time_algorithm_update': 0.005081534385681152, 'loss': 0.016752879355167297, 'time_step': 0.005316316363323166, 'init_value': -1.6453063488006592, 'ave_value': -1.220641600545328, 'soft_opc': nan} step=8134




2022-04-22 05:27.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:27.54 [info     ] FQE_20220422052705: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.0001636571194752153, 'time_algorithm_update': 0.0051574233066604795, 'loss': 0.017176496414453665, 'time_step': 0.005392116236399455, 'init_value': -1.6714719533920288, 'ave_value': -1.2495764453249338, 'soft_opc': nan} step=8300




2022-04-22 05:27.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052705/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-22 05:27.54 [info     ] Directory is created at d3rlpy_logs/FQE_20220422052754
2022-04-22 05:27.54 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 05:27.54 [debug    ] Building models...
2022-04-22 05:27.54 [debug    ] Models have been built.
2022-04-22 05:27.54 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422052754/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 05:27.56 [info     ] FQE_20220422052754: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016778291657913562, 'time_algorithm_update': 0.0051741073297899825, 'loss': 0.021848812133485322, 'time_step': 0.005416256050730861, 'init_value': -1.1638519763946533, 'ave_value': -1.1547320016869553, 'soft_opc': nan} step=344




2022-04-22 05:27.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:27.58 [info     ] FQE_20220422052754: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00016809133596198503, 'time_algorithm_update': 0.005110485609187637, 'loss': 0.020577285566020672, 'time_step': 0.005351799865101659, 'init_value': -1.933927297592163, 'ave_value': -1.938122649343164, 'soft_opc': nan} step=688




2022-04-22 05:27.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.00 [info     ] FQE_20220422052754: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016730192095734352, 'time_algorithm_update': 0.005158197048098542, 'loss': 0.02338185739097034, 'time_step': 0.005401253007179083, 'init_value': -3.03863263130188, 'ave_value': -3.0961123425144335, 'soft_opc': nan} step=1032




2022-04-22 05:28.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.02 [info     ] FQE_20220422052754: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00017151790995930516, 'time_algorithm_update': 0.004954446886861047, 'loss': 0.02561559822670249, 'time_step': 0.00520026822422826, 'init_value': -3.7191152572631836, 'ave_value': -3.8191452865799267, 'soft_opc': nan} step=1376




2022-04-22 05:28.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.04 [info     ] FQE_20220422052754: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00016345880752386048, 'time_algorithm_update': 0.005064998948296835, 'loss': 0.030695078011905384, 'time_step': 0.005303576935169308, 'init_value': -4.561037540435791, 'ave_value': -4.76057925914322, 'soft_opc': nan} step=1720




2022-04-22 05:28.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.06 [info     ] FQE_20220422052754: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00016822995141495105, 'time_algorithm_update': 0.005225897528404413, 'loss': 0.03624500090873605, 'time_step': 0.005468533482662467, 'init_value': -5.203717231750488, 'ave_value': -5.543287531467708, 'soft_opc': nan} step=2064




2022-04-22 05:28.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.08 [info     ] FQE_20220422052754: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00016750291336414426, 'time_algorithm_update': 0.005211497462073038, 'loss': 0.04200915921327853, 'time_step': 0.0054550614467886996, 'init_value': -5.950240135192871, 'ave_value': -6.468892119167087, 'soft_opc': nan} step=2408




2022-04-22 05:28.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.10 [info     ] FQE_20220422052754: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00016948372818702875, 'time_algorithm_update': 0.005348554877347724, 'loss': 0.05005264432483548, 'time_step': 0.005591502023297687, 'init_value': -6.556398391723633, 'ave_value': -7.343511006571688, 'soft_opc': nan} step=2752




2022-04-22 05:28.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.12 [info     ] FQE_20220422052754: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00016871995704118595, 'time_algorithm_update': 0.005119101252666739, 'loss': 0.05834568725888033, 'time_step': 0.005366962316424348, 'init_value': -7.048823356628418, 'ave_value': -8.067807088612772, 'soft_opc': nan} step=3096




2022-04-22 05:28.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.14 [info     ] FQE_20220422052754: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00017251386198886606, 'time_algorithm_update': 0.005239054214122684, 'loss': 0.0719672551941733, 'time_step': 0.005485974772031917, 'init_value': -7.796854019165039, 'ave_value': -9.134634487957879, 'soft_opc': nan} step=3440




2022-04-22 05:28.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.16 [info     ] FQE_20220422052754: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00017295257989750353, 'time_algorithm_update': 0.005247572826784711, 'loss': 0.08301235581910628, 'time_step': 0.005499947902768157, 'init_value': -8.181694030761719, 'ave_value': -9.810397914144723, 'soft_opc': nan} step=3784




2022-04-22 05:28.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.19 [info     ] FQE_20220422052754: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.000168058761330538, 'time_algorithm_update': 0.005232870578765869, 'loss': 0.09457018046888935, 'time_step': 0.005477738241816676, 'init_value': -8.879667282104492, 'ave_value': -10.83158212163494, 'soft_opc': nan} step=4128




2022-04-22 05:28.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.20 [info     ] FQE_20220422052754: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.0001677108365435933, 'time_algorithm_update': 0.004888780588327452, 'loss': 0.1039489722691563, 'time_step': 0.005132978045663168, 'init_value': -9.359066009521484, 'ave_value': -11.63167652232585, 'soft_opc': nan} step=4472




2022-04-22 05:28.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.22 [info     ] FQE_20220422052754: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00017285832138948663, 'time_algorithm_update': 0.005176581615625426, 'loss': 0.11862728955375768, 'time_step': 0.005422732857770698, 'init_value': -9.830793380737305, 'ave_value': -12.391691586473403, 'soft_opc': nan} step=4816




2022-04-22 05:28.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.24 [info     ] FQE_20220422052754: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00017150058302768442, 'time_algorithm_update': 0.005168471918549649, 'loss': 0.13359914593355254, 'time_step': 0.005414355632870696, 'init_value': -10.336445808410645, 'ave_value': -13.216217116643422, 'soft_opc': nan} step=5160




2022-04-22 05:28.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.27 [info     ] FQE_20220422052754: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.0001837611198425293, 'time_algorithm_update': 0.00526932159135508, 'loss': 0.14774900632044083, 'time_step': 0.005533817202545876, 'init_value': -11.26622486114502, 'ave_value': -14.368167451641371, 'soft_opc': nan} step=5504




2022-04-22 05:28.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.29 [info     ] FQE_20220422052754: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.0001636154429857121, 'time_algorithm_update': 0.005081216263216596, 'loss': 0.16584136771839544, 'time_step': 0.0053162955960562065, 'init_value': -11.608051300048828, 'ave_value': -15.102111886771237, 'soft_opc': nan} step=5848




2022-04-22 05:28.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.30 [info     ] FQE_20220422052754: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00016333613284798555, 'time_algorithm_update': 0.004618104114088901, 'loss': 0.1898331138451562, 'time_step': 0.0048535875109739085, 'init_value': -12.002452850341797, 'ave_value': -15.718861613893118, 'soft_opc': nan} step=6192




2022-04-22 05:28.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.32 [info     ] FQE_20220422052754: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00017139315605163574, 'time_algorithm_update': 0.005168333996173947, 'loss': 0.20906247509383533, 'time_step': 0.005414853262346845, 'init_value': -12.732025146484375, 'ave_value': -16.647670380769902, 'soft_opc': nan} step=6536




2022-04-22 05:28.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.34 [info     ] FQE_20220422052754: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00016588527102803075, 'time_algorithm_update': 0.005056874003521231, 'loss': 0.2311060335541274, 'time_step': 0.00529773567998132, 'init_value': -13.31959342956543, 'ave_value': -17.422581316159434, 'soft_opc': nan} step=6880




2022-04-22 05:28.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.36 [info     ] FQE_20220422052754: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00016382821770601495, 'time_algorithm_update': 0.005046523587648259, 'loss': 0.2477042686334963, 'time_step': 0.005281118459479753, 'init_value': -14.006723403930664, 'ave_value': -18.280461308026794, 'soft_opc': nan} step=7224




2022-04-22 05:28.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.38 [info     ] FQE_20220422052754: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00017294842143391454, 'time_algorithm_update': 0.00520396163297254, 'loss': 0.26195666433831805, 'time_step': 0.005453786877698676, 'init_value': -14.354148864746094, 'ave_value': -18.91758718172734, 'soft_opc': nan} step=7568




2022-04-22 05:28.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.40 [info     ] FQE_20220422052754: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00016252523244813432, 'time_algorithm_update': 0.004731264225272245, 'loss': 0.2842211158956986, 'time_step': 0.004969804092895153, 'init_value': -14.787014961242676, 'ave_value': -19.628038708483047, 'soft_opc': nan} step=7912




2022-04-22 05:28.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.42 [info     ] FQE_20220422052754: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00017078116882679074, 'time_algorithm_update': 0.005158616359843765, 'loss': 0.29418436842862256, 'time_step': 0.005404698294262553, 'init_value': -15.275205612182617, 'ave_value': -20.19672122343783, 'soft_opc': nan} step=8256




2022-04-22 05:28.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.44 [info     ] FQE_20220422052754: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00016802064208097236, 'time_algorithm_update': 0.005177871432415274, 'loss': 0.29678501332785157, 'time_step': 0.005423370488854342, 'init_value': -15.37483024597168, 'ave_value': -20.448222642536233, 'soft_opc': nan} step=8600




2022-04-22 05:28.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.46 [info     ] FQE_20220422052754: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00016924530960792718, 'time_algorithm_update': 0.005111234825710917, 'loss': 0.30200674999396987, 'time_step': 0.005358405584512755, 'init_value': -15.834831237792969, 'ave_value': -21.192697400888346, 'soft_opc': nan} step=8944




2022-04-22 05:28.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.48 [info     ] FQE_20220422052754: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00016593794490015783, 'time_algorithm_update': 0.00504384900248328, 'loss': 0.31636928404183234, 'time_step': 0.005285709403281988, 'init_value': -16.234657287597656, 'ave_value': -21.603973287837327, 'soft_opc': nan} step=9288




2022-04-22 05:28.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.50 [info     ] FQE_20220422052754: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00016864579777384913, 'time_algorithm_update': 0.005005221727282502, 'loss': 0.32509437226148885, 'time_step': 0.005249431660008985, 'init_value': -17.23828125, 'ave_value': -22.734043869494695, 'soft_opc': nan} step=9632




2022-04-22 05:28.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.52 [info     ] FQE_20220422052754: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00016950729281403298, 'time_algorithm_update': 0.0050871545492216595, 'loss': 0.3255340993935026, 'time_step': 0.0053333321283029955, 'init_value': -17.199588775634766, 'ave_value': -22.71788679147978, 'soft_opc': nan} step=9976




2022-04-22 05:28.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.54 [info     ] FQE_20220422052754: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00016970689906630406, 'time_algorithm_update': 0.00511595745419347, 'loss': 0.32968893303904073, 'time_step': 0.005362112161725066, 'init_value': -17.926971435546875, 'ave_value': -23.31329231995938, 'soft_opc': nan} step=10320




2022-04-22 05:28.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.56 [info     ] FQE_20220422052754: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00016852936079335767, 'time_algorithm_update': 0.0052860289119010745, 'loss': 0.3310915783784071, 'time_step': 0.005530123793801596, 'init_value': -18.21783447265625, 'ave_value': -23.525053883977822, 'soft_opc': nan} step=10664




2022-04-22 05:28.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.58 [info     ] FQE_20220422052754: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00016661993292875068, 'time_algorithm_update': 0.004438581854798073, 'loss': 0.34009860146812404, 'time_step': 0.004680424928665161, 'init_value': -19.015544891357422, 'ave_value': -24.274627359027278, 'soft_opc': nan} step=11008




2022-04-22 05:28.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:28.59 [info     ] FQE_20220422052754: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00016045085219449775, 'time_algorithm_update': 0.003750205733055292, 'loss': 0.3450825458154256, 'time_step': 0.003985125658123992, 'init_value': -19.35940170288086, 'ave_value': -24.536328879173276, 'soft_opc': nan} step=11352




2022-04-22 05:28.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:29.01 [info     ] FQE_20220422052754: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00016211562378461972, 'time_algorithm_update': 0.003742453663848167, 'loss': 0.34758001255131393, 'time_step': 0.003977478243583857, 'init_value': -19.85826873779297, 'ave_value': -25.111149951224927, 'soft_opc': nan} step=11696




2022-04-22 05:29.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:29.02 [info     ] FQE_20220422052754: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00016396336777265683, 'time_algorithm_update': 0.003918538952982703, 'loss': 0.36615528347143944, 'time_step': 0.0041578190271244495, 'init_value': -20.09429931640625, 'ave_value': -25.207543887097287, 'soft_opc': nan} step=12040




2022-04-22 05:29.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:29.04 [info     ] FQE_20220422052754: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00016362583914468455, 'time_algorithm_update': 0.003732850385266681, 'loss': 0.3774276055790874, 'time_step': 0.003971282132836275, 'init_value': -20.411766052246094, 'ave_value': -25.428744610651556, 'soft_opc': nan} step=12384




2022-04-22 05:29.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:29.05 [info     ] FQE_20220422052754: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016374088997064636, 'time_algorithm_update': 0.003762805184652639, 'loss': 0.38903270745487495, 'time_step': 0.0039993884951569315, 'init_value': -20.758235931396484, 'ave_value': -25.7258402520346, 'soft_opc': nan} step=12728




2022-04-22 05:29.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:29.07 [info     ] FQE_20220422052754: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.0001655117023822873, 'time_algorithm_update': 0.003850349853205126, 'loss': 0.39912275658638846, 'time_step': 0.0040893291318139366, 'init_value': -21.384765625, 'ave_value': -26.18986527386566, 'soft_opc': nan} step=13072




2022-04-22 05:29.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:29.09 [info     ] FQE_20220422052754: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016722013784009358, 'time_algorithm_update': 0.0038256485794865808, 'loss': 0.4077387401254761, 'time_step': 0.004069092661835427, 'init_value': -21.93328857421875, 'ave_value': -26.75269799398317, 'soft_opc': nan} step=13416




2022-04-22 05:29.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:29.10 [info     ] FQE_20220422052754: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.0001638684161873751, 'time_algorithm_update': 0.0038711317749910578, 'loss': 0.41769798928980045, 'time_step': 0.004110838784727939, 'init_value': -21.977863311767578, 'ave_value': -26.64131200357659, 'soft_opc': nan} step=13760




2022-04-22 05:29.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:29.12 [info     ] FQE_20220422052754: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.0001647312973820886, 'time_algorithm_update': 0.0038798971231593644, 'loss': 0.4222405513797259, 'time_step': 0.0041207685027011606, 'init_value': -21.692171096801758, 'ave_value': -26.416652333217062, 'soft_opc': nan} step=14104




2022-04-22 05:29.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:29.13 [info     ] FQE_20220422052754: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00016652428826620413, 'time_algorithm_update': 0.0038347881893778957, 'loss': 0.4287476811238543, 'time_step': 0.004078115141668985, 'init_value': -21.81024169921875, 'ave_value': -26.490676746755284, 'soft_opc': nan} step=14448




2022-04-22 05:29.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:29.15 [info     ] FQE_20220422052754: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00016401881395384322, 'time_algorithm_update': 0.0038595456023548923, 'loss': 0.43377958361037766, 'time_step': 0.004098456266314485, 'init_value': -22.066810607910156, 'ave_value': -26.70084965799281, 'soft_opc': nan} step=14792




2022-04-22 05:29.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:29.16 [info     ] FQE_20220422052754: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00016321900279022927, 'time_algorithm_update': 0.004018095343611961, 'loss': 0.44168435139744, 'time_step': 0.004256739865901859, 'init_value': -22.051637649536133, 'ave_value': -26.587833328819457, 'soft_opc': nan} step=15136




2022-04-22 05:29.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:29.18 [info     ] FQE_20220422052754: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016353435294572696, 'time_algorithm_update': 0.003859281539916992, 'loss': 0.45418777581856606, 'time_step': 0.004096584957699443, 'init_value': -22.39689064025879, 'ave_value': -26.891062561892863, 'soft_opc': nan} step=15480




2022-04-22 05:29.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:29.19 [info     ] FQE_20220422052754: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.0001650951629461244, 'time_algorithm_update': 0.0038506832233695097, 'loss': 0.46052340876993314, 'time_step': 0.004091438859008079, 'init_value': -22.65117645263672, 'ave_value': -27.379618358045715, 'soft_opc': nan} step=15824




2022-04-22 05:29.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:29.21 [info     ] FQE_20220422052754: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016553318777749705, 'time_algorithm_update': 0.003960501315981843, 'loss': 0.46762058583662175, 'time_step': 0.004201620124107183, 'init_value': -22.876325607299805, 'ave_value': -27.705432271589064, 'soft_opc': nan} step=16168




2022-04-22 05:29.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:29.22 [info     ] FQE_20220422052754: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00017266564590986386, 'time_algorithm_update': 0.0038220418054004048, 'loss': 0.4677029371066669, 'time_step': 0.004069507122039795, 'init_value': -23.18241310119629, 'ave_value': -27.92129913925252, 'soft_opc': nan} step=16512




2022-04-22 05:29.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:29.24 [info     ] FQE_20220422052754: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016073224156401878, 'time_algorithm_update': 0.0038406883561333946, 'loss': 0.4834313208811245, 'time_step': 0.004074384306752404, 'init_value': -23.432641983032227, 'ave_value': -28.256513129764546, 'soft_opc': nan} step=16856




2022-04-22 05:29.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:29.26 [info     ] FQE_20220422052754: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.0001633583113204601, 'time_algorithm_update': 0.003815733416135921, 'loss': 0.4954271983858848, 'time_step': 0.0040516174116799995, 'init_value': -23.486244201660156, 'ave_value': -28.45369581940861, 'soft_opc': nan} step=17200




2022-04-22 05:29.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422052754/model_17200.pt
search iteration:  22
using hyper params:  [0.00557465858241362, 0.002840422510422013, 6.791294713047555e-05, 7]
2022-04-22 05:29.26 [debug    ] RoundIterator is selected.
2022-04-22 05:29.26 [info     ] Directory is created at d3rlpy_logs/CQL_20220422052926
2022-04-22 05:29.26 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 05:29.26 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 05:29.26 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422052926/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.00557465858241362, 'actor_optim_factory': {'optim_c

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:29.41 [info     ] CQL_20220422052926: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.00041501232654373083, 'time_algorithm_update': 0.04212250116932599, 'temp_loss': 4.927972432505878, 'temp': 0.9879981603236557, 'alpha_loss': -17.76745118157712, 'alpha': 1.0177621999917004, 'critic_loss': 179.53117280199348, 'actor_loss': 3.833528926036175, 'time_step': 0.042639975602916214, 'td_error': 1.4348402248568157, 'init_value': -10.095112800598145, 'ave_value': -9.707874323056737} step=346
2022-04-22 05:29.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:29.56 [info     ] CQL_20220422052926: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.00044699486969523346, 'time_algorithm_update': 0.04215503772559193, 'temp_loss': 4.868279764417968, 'temp': 0.965026801032138, 'alpha_loss': -18.420831426719708, 'alpha': 1.0542630225936802, 'critic_loss': 177.26935579597605, 'actor_loss': 13.070906741081634, 'time_step': 0.042702512934028755, 'td_error': 1.6135316680831315, 'init_value': -16.562786102294922, 'ave_value': -15.81330041453532} step=692
2022-04-22 05:29.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:30.12 [info     ] CQL_20220422052926: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.0004262972429308588, 'time_algorithm_update': 0.042326217441889594, 'temp_loss': 4.754723918231236, 'temp': 0.9429706217925673, 'alpha_loss': -19.0889474835699, 'alpha': 1.0925299504588795, 'critic_loss': 319.8086183140044, 'actor_loss': 19.407182048510954, 'time_step': 0.042857531867275346, 'td_error': 1.880577473650533, 'init_value': -22.288501739501953, 'ave_value': -21.392081455259653} step=1038
2022-04-22 05:30.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:30.27 [info     ] CQL_20220422052926: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.0004270338598703373, 'time_algorithm_update': 0.043308032041340205, 'temp_loss': 4.648090012500741, 'temp': 0.9216361500624287, 'alpha_loss': -19.78211806413066, 'alpha': 1.1326858111888687, 'critic_loss': 517.3919420187184, 'actor_loss': 24.16669987805317, 'time_step': 0.043837836712081996, 'td_error': 2.0354526658981698, 'init_value': -25.468242645263672, 'ave_value': -24.436846806574568} step=1384
2022-04-22 05:30.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:30.43 [info     ] CQL_20220422052926: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.00043391974675173017, 'time_algorithm_update': 0.04396308226392448, 'temp_loss': 4.5472031411408, 'temp': 0.9009362460905417, 'alpha_loss': -20.502687117956967, 'alpha': 1.1747703514347188, 'critic_loss': 762.9405568734759, 'actor_loss': 26.359289555191303, 'time_step': 0.04449955300788659, 'td_error': 2.033504767341701, 'init_value': -26.366899490356445, 'ave_value': -25.349575671724597} step=1730
2022-04-22 05:30.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:30.59 [info     ] CQL_20220422052926: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.0004229393997633388, 'time_algorithm_update': 0.043384268793756564, 'temp_loss': 4.446422807054024, 'temp': 0.8808263131993355, 'alpha_loss': -21.26196655648292, 'alpha': 1.2188113231879438, 'critic_loss': 1052.4698080603098, 'actor_loss': 25.418469456578954, 'time_step': 0.043909996920238344, 'td_error': 1.8881323194986666, 'init_value': -24.07415771484375, 'ave_value': -23.31600242875209} step=2076
2022-04-22 05:30.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:31.15 [info     ] CQL_20220422052926: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.0004437493451068856, 'time_algorithm_update': 0.04523285482660194, 'temp_loss': 4.3486581744486195, 'temp': 0.8612700676642402, 'alpha_loss': -22.027615767682907, 'alpha': 1.2648172457783209, 'critic_loss': 1387.5225992368137, 'actor_loss': 21.041638600343912, 'time_step': 0.04577897876673351, 'td_error': 1.6254399069946344, 'init_value': -18.608612060546875, 'ave_value': -18.156020215406638} step=2422
2022-04-22 05:31.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:31.32 [info     ] CQL_20220422052926: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.00043219638008602784, 'time_algorithm_update': 0.04716808947524583, 'temp_loss': 4.253848518250305, 'temp': 0.8422289743933374, 'alpha_loss': -22.840863045929485, 'alpha': 1.312842074501721, 'critic_loss': 1770.4847306268064, 'actor_loss': 13.288349237056137, 'time_step': 0.04770397519789679, 'td_error': 1.3772123951449957, 'init_value': -10.434481620788574, 'ave_value': -10.272562895458758} step=2768
2022-04-22 05:31.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:31.50 [info     ] CQL_20220422052926: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.00043501950412816394, 'time_algorithm_update': 0.04702580319663693, 'temp_loss': 4.160244242993393, 'temp': 0.8236725528460707, 'alpha_loss': -23.704890741778247, 'alpha': 1.3629475859548315, 'critic_loss': 2192.6970172507226, 'actor_loss': 6.914008810341014, 'time_step': 0.04756516666081599, 'td_error': 1.3233539634016815, 'init_value': -7.22365140914917, 'ave_value': -7.149952691106674} step=3114
2022-04-22 05:31.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:32.07 [info     ] CQL_20220422052926: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.00044044180412512984, 'time_algorithm_update': 0.046779848936665265, 'temp_loss': 4.0686181377124235, 'temp': 0.8055835776246352, 'alpha_loss': -24.613746775368046, 'alpha': 1.4152102039728551, 'critic_loss': 2582.8598068325505, 'actor_loss': 5.440720796585083, 'time_step': 0.04732267153745442, 'td_error': 1.3201295742536265, 'init_value': -6.824825286865234, 'ave_value': -6.782122068554375} step=3460
2022-04-22 05:32.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:32.24 [info     ] CQL_20220422052926: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.0004342229380083911, 'time_algorithm_update': 0.047178711505294534, 'temp_loss': 3.979012804224312, 'temp': 0.7879316172847858, 'alpha_loss': -25.556485335950907, 'alpha': 1.4696670401992136, 'critic_loss': 2925.599444968163, 'actor_loss': 5.2690895844057115, 'time_step': 0.04771116083067966, 'td_error': 1.321154478660551, 'init_value': -6.723058700561523, 'ave_value': -6.691926539526443} step=3806
2022-04-22 05:32.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:32.41 [info     ] CQL_20220422052926: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.00043803005549259956, 'time_algorithm_update': 0.047379772098078206, 'temp_loss': 3.8929710277932226, 'temp': 0.7706980193625985, 'alpha_loss': -26.54542034898879, 'alpha': 1.526384922465837, 'critic_loss': 3235.622029387193, 'actor_loss': 5.346772521906505, 'time_step': 0.04791999965733876, 'td_error': 1.3255210951369831, 'init_value': -6.9067606925964355, 'ave_value': -6.877963840074028} step=4152
2022-04-22 05:32.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:32.58 [info     ] CQL_20220422052926: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.00042972192598905175, 'time_algorithm_update': 0.04749608039855957, 'temp_loss': 3.807655261431126, 'temp': 0.753865960016416, 'alpha_loss': -27.569740041832013, 'alpha': 1.5854143614024785, 'critic_loss': 3541.8092972419167, 'actor_loss': 5.552571288423042, 'time_step': 0.04802587197695164, 'td_error': 1.3294394286799822, 'init_value': -7.015352725982666, 'ave_value': -6.995052448885878} step=4498
2022-04-22 05:32.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:33.15 [info     ] CQL_20220422052926: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.000432986055495422, 'time_algorithm_update': 0.047837553685800185, 'temp_loss': 3.7243715786520455, 'temp': 0.7374208867549896, 'alpha_loss': -28.637296081278365, 'alpha': 1.646829164786146, 'critic_loss': 3840.2809134246295, 'actor_loss': 5.7712209128230985, 'time_step': 0.04837025107675894, 'td_error': 1.3350311414929512, 'init_value': -7.2598652839660645, 'ave_value': -7.241259370408617} step=4844
2022-04-22 05:33.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:33.33 [info     ] CQL_20220422052926: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.0004257239358273545, 'time_algorithm_update': 0.04706244110372025, 'temp_loss': 3.6431903342980183, 'temp': 0.7213504951813318, 'alpha_loss': -29.750900433931736, 'alpha': 1.7107037371293659, 'critic_loss': 4131.34487826838, 'actor_loss': 6.054701490898353, 'time_step': 0.047592619250964564, 'td_error': 1.3406360575608351, 'init_value': -7.489306926727295, 'ave_value': -7.474214347271402} step=5190
2022-04-22 05:33.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:33.49 [info     ] CQL_20220422052926: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.00045113756477488257, 'time_algorithm_update': 0.04673360262302994, 'temp_loss': 3.563849875003616, 'temp': 0.7056419632338375, 'alpha_loss': -30.90172994756974, 'alpha': 1.7771178704465744, 'critic_loss': 4408.321517679733, 'actor_loss': 6.402208758227398, 'time_step': 0.04728462310195658, 'td_error': 1.3515553658438775, 'init_value': -8.021095275878906, 'ave_value': -8.00493129261753} step=5536
2022-04-22 05:33.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:34.06 [info     ] CQL_20220422052926: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.00042693876806711183, 'time_algorithm_update': 0.04626889518230637, 'temp_loss': 3.4859137528204505, 'temp': 0.6902834413024043, 'alpha_loss': -32.10439711223448, 'alpha': 1.8461556479420964, 'critic_loss': 4647.214270795701, 'actor_loss': 6.816478780239303, 'time_step': 0.04679485759294102, 'td_error': 1.3605737602334853, 'init_value': -8.406669616699219, 'ave_value': -8.391363590054175} step=5882
2022-04-22 05:34.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:34.23 [info     ] CQL_20220422052926: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.00044078978499925204, 'time_algorithm_update': 0.0464675185308291, 'temp_loss': 3.41046035703207, 'temp': 0.6752651728302068, 'alpha_loss': -33.35183634234301, 'alpha': 1.9179123753757146, 'critic_loss': 4878.110590058255, 'actor_loss': 7.255708916338882, 'time_step': 0.047011207982983895, 'td_error': 1.3662125466947936, 'init_value': -8.598670959472656, 'ave_value': -8.590942606014796} step=6228
2022-04-22 05:34.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:34.40 [info     ] CQL_20220422052926: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.00042156125768760724, 'time_algorithm_update': 0.04610616554414606, 'temp_loss': 3.335481269511184, 'temp': 0.6605769299358302, 'alpha_loss': -34.64459351997155, 'alpha': 1.9924851876462815, 'critic_loss': 5091.600662143244, 'actor_loss': 7.744132808178146, 'time_step': 0.04662763383347175, 'td_error': 1.3828048133611177, 'init_value': -9.341015815734863, 'ave_value': -9.328988279828307} step=6574
2022-04-22 05:34.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:34.57 [info     ] CQL_20220422052926: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.00041526521561462755, 'time_algorithm_update': 0.04598392365295763, 'temp_loss': 3.2633233173734193, 'temp': 0.6462149763038393, 'alpha_loss': -35.99651313517135, 'alpha': 2.0699806743963607, 'critic_loss': 5276.5405541568825, 'actor_loss': 8.271359378891873, 'time_step': 0.04649527706851849, 'td_error': 1.395940218587985, 'init_value': -9.860838890075684, 'ave_value': -9.849605238160684} step=6920
2022-04-22 05:34.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:35.13 [info     ] CQL_20220422052926: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.0004426296046703537, 'time_algorithm_update': 0.045535973041732875, 'temp_loss': 3.191922061016105, 'temp': 0.6321673141738583, 'alpha_loss': -37.39554104226173, 'alpha': 2.1505129985037565, 'critic_loss': 5416.09134810784, 'actor_loss': 8.81858480596818, 'time_step': 0.04609777679333108, 'td_error': 1.4095123562634384, 'init_value': -10.376022338867188, 'ave_value': -10.3651713916169} step=7266
2022-04-22 05:35.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:35.30 [info     ] CQL_20220422052926: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.00044616729537875664, 'time_algorithm_update': 0.04620995273479837, 'temp_loss': 3.1230887491579002, 'temp': 0.6184252316207555, 'alpha_loss': -38.850472984975475, 'alpha': 2.23418619798098, 'critic_loss': 5570.643433322796, 'actor_loss': 9.431955778529879, 'time_step': 0.046757032416459454, 'td_error': 1.42734393717196, 'init_value': -11.046310424804688, 'ave_value': -11.034262720562937} step=7612
2022-04-22 05:35.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:35.47 [info     ] CQL_20220422052926: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.0004270531538593976, 'time_algorithm_update': 0.04614503948674726, 'temp_loss': 3.0555382064312178, 'temp': 0.6049814115714476, 'alpha_loss': -40.361659518556095, 'alpha': 2.321123261672224, 'critic_loss': 5728.617874762916, 'actor_loss': 10.08486218535142, 'time_step': 0.0466669253531219, 'td_error': 1.4383425915245869, 'init_value': -11.369436264038086, 'ave_value': -11.365575712628136} step=7958
2022-04-22 05:35.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:36.04 [info     ] CQL_20220422052926: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.0004335793456590245, 'time_algorithm_update': 0.04694957195678887, 'temp_loss': 2.9893538324819136, 'temp': 0.5918311502547623, 'alpha_loss': -41.9315819822984, 'alpha': 2.4114489700063806, 'critic_loss': 5875.908736565209, 'actor_loss': 10.762276853440126, 'time_step': 0.04748098905375927, 'td_error': 1.4612176318382637, 'init_value': -12.178181648254395, 'ave_value': -12.171738550684474} step=8304
2022-04-22 05:36.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:36.20 [info     ] CQL_20220422052926: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.00042229236205878286, 'time_algorithm_update': 0.04639354951119836, 'temp_loss': 2.9241369108244175, 'temp': 0.5789673445541734, 'alpha_loss': -43.559369511686995, 'alpha': 2.505292313636383, 'critic_loss': 6022.491076871839, 'actor_loss': 11.436785077773077, 'time_step': 0.04691216849178248, 'td_error': 1.4821741546335807, 'init_value': -12.85565185546875, 'ave_value': -12.84995737234073} step=8650
2022-04-22 05:36.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:36.38 [info     ] CQL_20220422052926: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.0004329336860965442, 'time_algorithm_update': 0.047168445035901375, 'temp_loss': 2.8606196807299047, 'temp': 0.5663845170440013, 'alpha_loss': -45.25803432574851, 'alpha': 2.6027989194572316, 'critic_loss': 6161.637612050668, 'actor_loss': 12.14256352496285, 'time_step': 0.04769746140937585, 'td_error': 1.5045122278201848, 'init_value': -13.557957649230957, 'ave_value': -13.551988872276658} step=8996
2022-04-22 05:36.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:36.54 [info     ] CQL_20220422052926: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.0004107848757264242, 'time_algorithm_update': 0.04584237261314612, 'temp_loss': 2.7990650675889386, 'temp': 0.5540762448931016, 'alpha_loss': -47.018825839709685, 'alpha': 2.704106420450817, 'critic_loss': 6187.249342372652, 'actor_loss': 12.835737655617597, 'time_step': 0.04633946708172043, 'td_error': 1.5242165768008566, 'init_value': -14.132377624511719, 'ave_value': -14.129205655260298} step=9342
2022-04-22 05:36.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:37.11 [info     ] CQL_20220422052926: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.00042621868883254213, 'time_algorithm_update': 0.04667103221650758, 'temp_loss': 2.737429527878072, 'temp': 0.5420323301946497, 'alpha_loss': -48.85769813184793, 'alpha': 2.809360997525254, 'critic_loss': 6090.134624503252, 'actor_loss': 13.503507236524813, 'time_step': 0.04719342661730816, 'td_error': 1.5519394226271437, 'init_value': -14.945990562438965, 'ave_value': -14.940877037174953} step=9688
2022-04-22 05:37.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:37.28 [info     ] CQL_20220422052926: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.0004438561511177548, 'time_algorithm_update': 0.047054311443615514, 'temp_loss': 2.678268970092597, 'temp': 0.530254725603699, 'alpha_loss': -50.7540116062054, 'alpha': 2.918723537053676, 'critic_loss': 6088.522659919166, 'actor_loss': 14.2739977643669, 'time_step': 0.04759740278210943, 'td_error': 1.577510053669753, 'init_value': -15.655400276184082, 'ave_value': -15.64978160324567} step=10034
2022-04-22 05:37.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:37.45 [info     ] CQL_20220422052926: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.00042672446697433557, 'time_algorithm_update': 0.04660423022474168, 'temp_loss': 2.620225847801032, 'temp': 0.5187319129533161, 'alpha_loss': -52.72781164797744, 'alpha': 3.0323392676480245, 'critic_loss': 5984.42404686371, 'actor_loss': 14.986144688777152, 'time_step': 0.04712805513701687, 'td_error': 1.6026776223683552, 'init_value': -16.319805145263672, 'ave_value': -16.315788516844545} step=10380
2022-04-22 05:37.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:38.02 [info     ] CQL_20220422052926: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.00042980323637151994, 'time_algorithm_update': 0.0470516488731252, 'temp_loss': 2.562582399114708, 'temp': 0.5074609897040219, 'alpha_loss': -54.78225735570654, 'alpha': 3.150367821572144, 'critic_loss': 5982.693242243949, 'actor_loss': 15.756509662363571, 'time_step': 0.0475843090542479, 'td_error': 1.6295398868359592, 'init_value': -16.99021339416504, 'ave_value': -16.98860492444027} step=10726
2022-04-22 05:38.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:38.19 [info     ] CQL_20220422052926: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.0004356341554939402, 'time_algorithm_update': 0.04708126514633267, 'temp_loss': 2.507675945414284, 'temp': 0.4964333205037034, 'alpha_loss': -56.91798340240655, 'alpha': 3.273014752161985, 'critic_loss': 6089.896144271586, 'actor_loss': 16.5168664331381, 'time_step': 0.04761990990941924, 'td_error': 1.6596027205397115, 'init_value': -17.738845825195312, 'ave_value': -17.7364916039508} step=11072
2022-04-22 05:38.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:38.36 [info     ] CQL_20220422052926: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.00042492874784965734, 'time_algorithm_update': 0.046868327725140345, 'temp_loss': 2.4532474479234287, 'temp': 0.4856456775713518, 'alpha_loss': -59.135639168623555, 'alpha': 3.4004255801956087, 'critic_loss': 6193.525085802024, 'actor_loss': 17.25060061085431, 'time_step': 0.04738980841774472, 'td_error': 1.6933586711939403, 'init_value': -18.56171226501465, 'ave_value': -18.556669362476505} step=11418
2022-04-22 05:38.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:38.53 [info     ] CQL_20220422052926: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.0004416773084960232, 'time_algorithm_update': 0.04731812987024384, 'temp_loss': 2.3994444746502563, 'temp': 0.47509186968982564, 'alpha_loss': -61.43048921485857, 'alpha': 3.5327925392658037, 'critic_loss': 6337.608507101247, 'actor_loss': 17.967941190466025, 'time_step': 0.047857024077046124, 'td_error': 1.7193253161653017, 'init_value': -19.144548416137695, 'ave_value': -19.142556473201573} step=11764
2022-04-22 05:38.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:39.10 [info     ] CQL_20220422052926: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.00041125964567151373, 'time_algorithm_update': 0.04673701076838323, 'temp_loss': 2.3472120086581723, 'temp': 0.4647689795907522, 'alpha_loss': -63.83244192531343, 'alpha': 3.6703131054178137, 'critic_loss': 6508.633956997381, 'actor_loss': 18.659828130909474, 'time_step': 0.04724277857411115, 'td_error': 1.753345049174725, 'init_value': -19.925519943237305, 'ave_value': -19.920607997411345} step=12110
2022-04-22 05:39.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:39.27 [info     ] CQL_20220422052926: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.0004287696298147213, 'time_algorithm_update': 0.04665292480777454, 'temp_loss': 2.296445316661989, 'temp': 0.4546698030364307, 'alpha_loss': -66.31416327415863, 'alpha': 3.8131866820285776, 'critic_loss': 6535.494171671785, 'actor_loss': 19.315209212330725, 'time_step': 0.04717853027961157, 'td_error': 1.7826306138489614, 'init_value': -20.556991577148438, 'ave_value': -20.552485073771507} step=12456
2022-04-22 05:39.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:39.44 [info     ] CQL_20220422052926: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.0004436790598610233, 'time_algorithm_update': 0.04599592244693999, 'temp_loss': 2.246617347518833, 'temp': 0.4447903083583523, 'alpha_loss': -68.88269547920007, 'alpha': 3.961608418839515, 'critic_loss': 6608.379229418804, 'actor_loss': 19.992766738626997, 'time_step': 0.04653903583570712, 'td_error': 1.8130901900447998, 'init_value': -21.194042205810547, 'ave_value': -21.190419477170654} step=12802
2022-04-22 05:39.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:40.01 [info     ] CQL_20220422052926: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.00042346860632041974, 'time_algorithm_update': 0.046472361322083225, 'temp_loss': 2.1973287184114403, 'temp': 0.43512648895296746, 'alpha_loss': -71.57341146744744, 'alpha': 4.115816528397488, 'critic_loss': 6750.8363114726335, 'actor_loss': 20.687667664764934, 'time_step': 0.04699922986113267, 'td_error': 1.8441732353836373, 'init_value': -21.8275203704834, 'ave_value': -21.824685056676454} step=13148
2022-04-22 05:40.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:40.17 [info     ] CQL_20220422052926: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.0004296144309071447, 'time_algorithm_update': 0.04310028952670235, 'temp_loss': 2.15019323853399, 'temp': 0.4256703671003353, 'alpha_loss': -74.36659165751728, 'alpha': 4.2760331217264165, 'critic_loss': 6919.220474507767, 'actor_loss': 21.3284090020064, 'time_step': 0.043630678529684254, 'td_error': 1.8697643255369645, 'init_value': -22.33058738708496, 'ave_value': -22.330485831125475} step=13494
2022-04-22 05:40.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:40.32 [info     ] CQL_20220422052926: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.000444327475707655, 'time_algorithm_update': 0.04258773574939353, 'temp_loss': 2.1027263458064525, 'temp': 0.41642182281596124, 'alpha_loss': -77.2448486813231, 'alpha': 4.442486661017974, 'critic_loss': 6940.718606055816, 'actor_loss': 21.93243849759846, 'time_step': 0.04312738448898227, 'td_error': 1.9015225312931512, 'init_value': -22.959102630615234, 'ave_value': -22.958148260103012} step=13840
2022-04-22 05:40.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:40.48 [info     ] CQL_20220422052926: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.000437416782268899, 'time_algorithm_update': 0.043231922767065854, 'temp_loss': 2.057687884810343, 'temp': 0.4073739284380323, 'alpha_loss': -80.26055158493836, 'alpha': 4.6154043715813255, 'critic_loss': 7065.102608212157, 'actor_loss': 22.563273292056397, 'time_step': 0.04377111189627234, 'td_error': 1.9366131288859512, 'init_value': -23.640470504760742, 'ave_value': -23.637288105233466} step=14186
2022-04-22 05:40.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:41.03 [info     ] CQL_20220422052926: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.000431661660960644, 'time_algorithm_update': 0.04317193224250926, 'temp_loss': 2.0124197988151815, 'temp': 0.39852194264100466, 'alpha_loss': -83.36825482142454, 'alpha': 4.7950575269026565, 'critic_loss': 7201.857540417269, 'actor_loss': 23.142205795111686, 'time_step': 0.04370431472800371, 'td_error': 1.9661618715292317, 'init_value': -24.19527244567871, 'ave_value': -24.192238129660552} step=14532
2022-04-22 05:41.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:41.19 [info     ] CQL_20220422052926: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.0004314239314525803, 'time_algorithm_update': 0.04311366232833421, 'temp_loss': 1.969423652728858, 'temp': 0.389861689568255, 'alpha_loss': -86.60925570802192, 'alpha': 4.981686225516259, 'critic_loss': 7353.189115844021, 'actor_loss': 23.735111010556967, 'time_step': 0.043645313709457484, 'td_error': 1.9994766189538, 'init_value': -24.80868911743164, 'ave_value': -24.80439673053421} step=14878
2022-04-22 05:41.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:41.35 [info     ] CQL_20220422052926: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.00043904781341552734, 'time_algorithm_update': 0.04299783913386351, 'temp_loss': 1.9262880524458912, 'temp': 0.3813890951734058, 'alpha_loss': -89.99066693520959, 'alpha': 5.175584218405575, 'critic_loss': 7448.046268176481, 'actor_loss': 24.28638026617855, 'time_step': 0.04353803017235905, 'td_error': 2.0255291124035355, 'init_value': -25.265043258666992, 'ave_value': -25.262929046905793} step=15224
2022-04-22 05:41.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:41.50 [info     ] CQL_20220422052926: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.00043381638609605027, 'time_algorithm_update': 0.043243203549026756, 'temp_loss': 1.8842427771904564, 'temp': 0.3731021273859664, 'alpha_loss': -93.50382477424048, 'alpha': 5.37706402133655, 'critic_loss': 7028.127519023212, 'actor_loss': 24.612705445703053, 'time_step': 0.04377944896675948, 'td_error': 2.0335070241252313, 'init_value': -25.374391555786133, 'ave_value': -25.379806969119453} step=15570
2022-04-22 05:41.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:42.06 [info     ] CQL_20220422052926: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.0004411474128679044, 'time_algorithm_update': 0.043985255191780925, 'temp_loss': 1.843581618601187, 'temp': 0.3649950508954208, 'alpha_loss': -97.13525403855164, 'alpha': 5.586389833792096, 'critic_loss': 5765.099315841763, 'actor_loss': 24.72298688833424, 'time_step': 0.044530840278360885, 'td_error': 2.046501867293727, 'init_value': -25.599515914916992, 'ave_value': -25.60444476614636} step=15916
2022-04-22 05:42.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:42.22 [info     ] CQL_20220422052926: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.0004257577003082099, 'time_algorithm_update': 0.04420450794903529, 'temp_loss': 1.8033457649925542, 'temp': 0.35706303677806966, 'alpha_loss': -100.91718415717858, 'alpha': 5.803855879458389, 'critic_loss': 5013.459176300578, 'actor_loss': 25.08648158497893, 'time_step': 0.044734701944913476, 'td_error': 2.067257718397905, 'init_value': -25.9685115814209, 'ave_value': -25.972499003077978} step=16262
2022-04-22 05:42.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:42.39 [info     ] CQL_20220422052926: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.00042530360249425634, 'time_algorithm_update': 0.04709777666654201, 'temp_loss': 1.7638842759793893, 'temp': 0.3493049395394463, 'alpha_loss': -104.84812305428389, 'alpha': 6.029794400827044, 'critic_loss': 4385.739832883625, 'actor_loss': 25.46928944339642, 'time_step': 0.047627505539469636, 'td_error': 2.093687687179382, 'init_value': -26.441679000854492, 'ave_value': -26.442676765040346} step=16608
2022-04-22 05:42.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:42.57 [info     ] CQL_20220422052926: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.0004280454161539243, 'time_algorithm_update': 0.04743879585596868, 'temp_loss': 1.7262143547824353, 'temp': 0.3417141687318769, 'alpha_loss': -108.95075805752263, 'alpha': 6.264533053932851, 'critic_loss': 3904.4170514586344, 'actor_loss': 25.939981636973474, 'time_step': 0.047970235003212285, 'td_error': 2.1155918947334977, 'init_value': -26.79230499267578, 'ave_value': -26.79585679417371} step=16954
2022-04-22 05:42.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:43.14 [info     ] CQL_20220422052926: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.0004367284003020711, 'time_algorithm_update': 0.0477546832465023, 'temp_loss': 1.6889922257103671, 'temp': 0.33428585641301434, 'alpha_loss': -113.1829084054583, 'alpha': 6.508424228326434, 'critic_loss': 3476.2437363111903, 'actor_loss': 26.439140849030775, 'time_step': 0.04829259897243081, 'td_error': 2.1516788053206435, 'init_value': -27.428621292114258, 'ave_value': -27.428224854901142} step=17300
2022-04-22 05:43.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422052926/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.51

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 05:43.15 [info     ] FQE_20220422054314: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00016794865389904343, 'time_algorithm_update': 0.005106723452188882, 'loss': 0.006518412827435567, 'time_step': 0.005347325141171375, 'init_value': -0.10286599397659302, 'ave_value': -0.06809154052018851, 'soft_opc': nan} step=166




2022-04-22 05:43.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.16 [info     ] FQE_20220422054314: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00016621365604630435, 'time_algorithm_update': 0.005165869931140578, 'loss': 0.004221480488440538, 'time_step': 0.005403893539704472, 'init_value': -0.18192318081855774, 'ave_value': -0.12326015863747981, 'soft_opc': nan} step=332




2022-04-22 05:43.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.17 [info     ] FQE_20220422054314: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00017406279782214797, 'time_algorithm_update': 0.005325409303228539, 'loss': 0.003633712210793452, 'time_step': 0.00557258042944483, 'init_value': -0.2074296474456787, 'ave_value': -0.1382535474578355, 'soft_opc': nan} step=498




2022-04-22 05:43.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.18 [info     ] FQE_20220422054314: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00016719892800572407, 'time_algorithm_update': 0.004643917083740234, 'loss': 0.003333370453756617, 'time_step': 0.004882022558924663, 'init_value': -0.2692931294441223, 'ave_value': -0.17938648741031568, 'soft_opc': nan} step=664




2022-04-22 05:43.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.19 [info     ] FQE_20220422054314: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00016696481819612435, 'time_algorithm_update': 0.005135978560849845, 'loss': 0.0029885000584321656, 'time_step': 0.0053736761391881, 'init_value': -0.30452761054039, 'ave_value': -0.21168723969325962, 'soft_opc': nan} step=830




2022-04-22 05:43.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.20 [info     ] FQE_20220422054314: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00017418487962469998, 'time_algorithm_update': 0.0052844013076230704, 'loss': 0.0026170102043458856, 'time_step': 0.005528207284858428, 'init_value': -0.3416183590888977, 'ave_value': -0.24096391153194616, 'soft_opc': nan} step=996




2022-04-22 05:43.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.21 [info     ] FQE_20220422054314: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.0001721123614943171, 'time_algorithm_update': 0.005161961877202413, 'loss': 0.002317889910878569, 'time_step': 0.005411113601133048, 'init_value': -0.37987563014030457, 'ave_value': -0.28154460640439577, 'soft_opc': nan} step=1162




2022-04-22 05:43.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.22 [info     ] FQE_20220422054314: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00016938778291265648, 'time_algorithm_update': 0.005309160933437118, 'loss': 0.0020457274383579166, 'time_step': 0.005554719143603222, 'init_value': -0.36307090520858765, 'ave_value': -0.2671769981532618, 'soft_opc': nan} step=1328




2022-04-22 05:43.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.23 [info     ] FQE_20220422054314: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.0001680046679025673, 'time_algorithm_update': 0.005232533776616475, 'loss': 0.001956281017270837, 'time_step': 0.005473131156829466, 'init_value': -0.44754284620285034, 'ave_value': -0.3511436682817451, 'soft_opc': nan} step=1494




2022-04-22 05:43.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.24 [info     ] FQE_20220422054314: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.0001684642699827631, 'time_algorithm_update': 0.005059022501290563, 'loss': 0.001735118205844234, 'time_step': 0.005298828504171716, 'init_value': -0.4974762797355652, 'ave_value': -0.3990822599686867, 'soft_opc': nan} step=1660




2022-04-22 05:43.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.25 [info     ] FQE_20220422054314: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00017043481390160252, 'time_algorithm_update': 0.005379834807062724, 'loss': 0.0017666548938653152, 'time_step': 0.005625687449811453, 'init_value': -0.582544207572937, 'ave_value': -0.4765042035848842, 'soft_opc': nan} step=1826




2022-04-22 05:43.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.26 [info     ] FQE_20220422054314: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00017311199601874295, 'time_algorithm_update': 0.005273840513574071, 'loss': 0.0016332036908154927, 'time_step': 0.005521454006792551, 'init_value': -0.6218306422233582, 'ave_value': -0.5068312682879477, 'soft_opc': nan} step=1992




2022-04-22 05:43.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.27 [info     ] FQE_20220422054314: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.0001853876803294722, 'time_algorithm_update': 0.005171811724283609, 'loss': 0.001955704349941709, 'time_step': 0.0054294344890548525, 'init_value': -0.6861010193824768, 'ave_value': -0.5642529266710217, 'soft_opc': nan} step=2158




2022-04-22 05:43.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.28 [info     ] FQE_20220422054314: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00016939640045166016, 'time_algorithm_update': 0.005101422229445124, 'loss': 0.0021474806596076183, 'time_step': 0.005342150308999671, 'init_value': -0.7647284269332886, 'ave_value': -0.6326680112674178, 'soft_opc': nan} step=2324




2022-04-22 05:43.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.29 [info     ] FQE_20220422054314: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00017012601875397097, 'time_algorithm_update': 0.005213769085435982, 'loss': 0.0023727464319223605, 'time_step': 0.005461745951549116, 'init_value': -0.8401871919631958, 'ave_value': -0.6918660476061239, 'soft_opc': nan} step=2490




2022-04-22 05:43.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.30 [info     ] FQE_20220422054314: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.0001709346311638154, 'time_algorithm_update': 0.005177134490874876, 'loss': 0.0026701014201053, 'time_step': 0.005422442792409874, 'init_value': -0.9154309034347534, 'ave_value': -0.7573822320401166, 'soft_opc': nan} step=2656




2022-04-22 05:43.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.31 [info     ] FQE_20220422054314: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.000167628368699407, 'time_algorithm_update': 0.005394017840006265, 'loss': 0.0030048014787208087, 'time_step': 0.005636746624866164, 'init_value': -1.0221121311187744, 'ave_value': -0.8365273731470166, 'soft_opc': nan} step=2822




2022-04-22 05:43.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.32 [info     ] FQE_20220422054314: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00016775906804096267, 'time_algorithm_update': 0.005358918603644313, 'loss': 0.0034749853096520864, 'time_step': 0.0056024588734270575, 'init_value': -1.057215690612793, 'ave_value': -0.8704779453521074, 'soft_opc': nan} step=2988




2022-04-22 05:43.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.33 [info     ] FQE_20220422054314: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00016919963331107633, 'time_algorithm_update': 0.0051155205232551295, 'loss': 0.003956854487355541, 'time_step': 0.005363030606005566, 'init_value': -1.1223609447479248, 'ave_value': -0.9094045771558577, 'soft_opc': nan} step=3154




2022-04-22 05:43.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.34 [info     ] FQE_20220422054314: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00016871274235736892, 'time_algorithm_update': 0.005460255117301482, 'loss': 0.004648856467501463, 'time_step': 0.0057065486907958984, 'init_value': -1.186478853225708, 'ave_value': -0.9499618414906902, 'soft_opc': nan} step=3320




2022-04-22 05:43.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.35 [info     ] FQE_20220422054314: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00017049944544413002, 'time_algorithm_update': 0.005255334348563689, 'loss': 0.004771623277794065, 'time_step': 0.005502544253705496, 'init_value': -1.2244231700897217, 'ave_value': -0.9756965034867864, 'soft_opc': nan} step=3486




2022-04-22 05:43.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.36 [info     ] FQE_20220422054314: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00015580079641686864, 'time_algorithm_update': 0.0051067780299359055, 'loss': 0.005293090393835488, 'time_step': 0.005331368331449577, 'init_value': -1.2348260879516602, 'ave_value': -0.9599443226849224, 'soft_opc': nan} step=3652




2022-04-22 05:43.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.37 [info     ] FQE_20220422054314: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00015197460909923874, 'time_algorithm_update': 0.004202381674065648, 'loss': 0.005656150082709739, 'time_step': 0.004421706659248076, 'init_value': -1.3006908893585205, 'ave_value': -1.018456899805565, 'soft_opc': nan} step=3818




2022-04-22 05:43.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.38 [info     ] FQE_20220422054314: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.0001619910619345056, 'time_algorithm_update': 0.005009099661585796, 'loss': 0.0058146986624026805, 'time_step': 0.005240051143140678, 'init_value': -1.349928379058838, 'ave_value': -1.0472715219360071, 'soft_opc': nan} step=3984




2022-04-22 05:43.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.39 [info     ] FQE_20220422054314: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00015852106622902743, 'time_algorithm_update': 0.005007927676281297, 'loss': 0.006682250091731717, 'time_step': 0.005234380802476263, 'init_value': -1.3991055488586426, 'ave_value': -1.075810651790124, 'soft_opc': nan} step=4150




2022-04-22 05:43.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.40 [info     ] FQE_20220422054314: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00014753657651234823, 'time_algorithm_update': 0.004970145512776202, 'loss': 0.0071596333550861535, 'time_step': 0.005187790077852915, 'init_value': -1.4848856925964355, 'ave_value': -1.1586730437750743, 'soft_opc': nan} step=4316




2022-04-22 05:43.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.41 [info     ] FQE_20220422054314: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.0001546388649078737, 'time_algorithm_update': 0.005000565425459161, 'loss': 0.007570193198240216, 'time_step': 0.005222412477056664, 'init_value': -1.4995369911193848, 'ave_value': -1.1509038806866712, 'soft_opc': nan} step=4482




2022-04-22 05:43.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.42 [info     ] FQE_20220422054314: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001582596675459161, 'time_algorithm_update': 0.005067789410970297, 'loss': 0.007889055775819312, 'time_step': 0.005296938390616911, 'init_value': -1.6109603643417358, 'ave_value': -1.241284980847257, 'soft_opc': nan} step=4648




2022-04-22 05:43.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.43 [info     ] FQE_20220422054314: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00016504597951130695, 'time_algorithm_update': 0.005258563053177063, 'loss': 0.008492749830965984, 'time_step': 0.005499147507081549, 'init_value': -1.61967134475708, 'ave_value': -1.2380671056281138, 'soft_opc': nan} step=4814




2022-04-22 05:43.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.44 [info     ] FQE_20220422054314: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.0001676527850599174, 'time_algorithm_update': 0.005108228649001524, 'loss': 0.009063800671334233, 'time_step': 0.005350382931261177, 'init_value': -1.6681561470031738, 'ave_value': -1.280815218836237, 'soft_opc': nan} step=4980




2022-04-22 05:43.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.45 [info     ] FQE_20220422054314: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00016741867525031767, 'time_algorithm_update': 0.005302127585353622, 'loss': 0.00962697939403041, 'time_step': 0.005544557628861393, 'init_value': -1.7286585569381714, 'ave_value': -1.2932544190468065, 'soft_opc': nan} step=5146




2022-04-22 05:43.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.46 [info     ] FQE_20220422054314: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.0001694394881466785, 'time_algorithm_update': 0.004606920552540974, 'loss': 0.0102100048849923, 'time_step': 0.004853620586625065, 'init_value': -1.8231711387634277, 'ave_value': -1.3892436130425414, 'soft_opc': nan} step=5312




2022-04-22 05:43.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.47 [info     ] FQE_20220422054314: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.0001662308911243117, 'time_algorithm_update': 0.0052708861339523135, 'loss': 0.010784911328585852, 'time_step': 0.005510852997561535, 'init_value': -1.8256735801696777, 'ave_value': -1.3778412882731557, 'soft_opc': nan} step=5478




2022-04-22 05:43.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.48 [info     ] FQE_20220422054314: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.0001797862799770861, 'time_algorithm_update': 0.0052845535508121355, 'loss': 0.011107229904330572, 'time_step': 0.005539224808474621, 'init_value': -1.83906090259552, 'ave_value': -1.3729385647217969, 'soft_opc': nan} step=5644




2022-04-22 05:43.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.49 [info     ] FQE_20220422054314: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.0001737511301615152, 'time_algorithm_update': 0.0053125792239085735, 'loss': 0.011725910540939454, 'time_step': 0.00555835430880627, 'init_value': -1.9870643615722656, 'ave_value': -1.498910769573224, 'soft_opc': nan} step=5810




2022-04-22 05:43.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.50 [info     ] FQE_20220422054314: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00017071057514971997, 'time_algorithm_update': 0.0052891926593091115, 'loss': 0.012136175716576358, 'time_step': 0.0055361497833068115, 'init_value': -2.0067830085754395, 'ave_value': -1.4957128206880443, 'soft_opc': nan} step=5976




2022-04-22 05:43.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.51 [info     ] FQE_20220422054314: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00017087861716029155, 'time_algorithm_update': 0.0052931409284292935, 'loss': 0.012680095568382047, 'time_step': 0.0055403479610581, 'init_value': -2.0683023929595947, 'ave_value': -1.533145214490454, 'soft_opc': nan} step=6142




2022-04-22 05:43.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.52 [info     ] FQE_20220422054314: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00017017628773149238, 'time_algorithm_update': 0.005295657249818365, 'loss': 0.013177333348809394, 'time_step': 0.005541830177766731, 'init_value': -2.1100497245788574, 'ave_value': -1.540294241633367, 'soft_opc': nan} step=6308




2022-04-22 05:43.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.53 [info     ] FQE_20220422054314: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00016665027802249035, 'time_algorithm_update': 0.0051206752478358255, 'loss': 0.013710199560723212, 'time_step': 0.005362681595675917, 'init_value': -2.150662422180176, 'ave_value': -1.5614916558594543, 'soft_opc': nan} step=6474




2022-04-22 05:43.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.54 [info     ] FQE_20220422054314: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00017090016100780074, 'time_algorithm_update': 0.0052648280040327325, 'loss': 0.014094543789516214, 'time_step': 0.0055110985974231395, 'init_value': -2.185476303100586, 'ave_value': -1.5754044454572051, 'soft_opc': nan} step=6640




2022-04-22 05:43.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.55 [info     ] FQE_20220422054314: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00016430774366999246, 'time_algorithm_update': 0.004934437303657991, 'loss': 0.014762456570569628, 'time_step': 0.00517404510314206, 'init_value': -2.2581498622894287, 'ave_value': -1.6351164815352752, 'soft_opc': nan} step=6806




2022-04-22 05:43.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.56 [info     ] FQE_20220422054314: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00017172600849565253, 'time_algorithm_update': 0.005411069077181529, 'loss': 0.015636443255149412, 'time_step': 0.005659383463572307, 'init_value': -2.260770082473755, 'ave_value': -1.6069830987308813, 'soft_opc': nan} step=6972




2022-04-22 05:43.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.57 [info     ] FQE_20220422054314: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00017182941896369658, 'time_algorithm_update': 0.00520878671163536, 'loss': 0.01590739034434657, 'time_step': 0.005454817450190165, 'init_value': -2.3007190227508545, 'ave_value': -1.6219912337685667, 'soft_opc': nan} step=7138




2022-04-22 05:43.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.58 [info     ] FQE_20220422054314: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016889658318944723, 'time_algorithm_update': 0.005109764007200678, 'loss': 0.015666246405769003, 'time_step': 0.005354288112686341, 'init_value': -2.3764967918395996, 'ave_value': -1.6773800531918233, 'soft_opc': nan} step=7304




2022-04-22 05:43.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:43.59 [info     ] FQE_20220422054314: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016886211303343256, 'time_algorithm_update': 0.005174040794372559, 'loss': 0.017011574899401874, 'time_step': 0.005422145487314247, 'init_value': -2.3966867923736572, 'ave_value': -1.677541445117855, 'soft_opc': nan} step=7470




2022-04-22 05:43.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:44.00 [info     ] FQE_20220422054314: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00016807073570159544, 'time_algorithm_update': 0.005278965076768255, 'loss': 0.017470263420528428, 'time_step': 0.005520843597779791, 'init_value': -2.4820265769958496, 'ave_value': -1.7637627391212805, 'soft_opc': nan} step=7636




2022-04-22 05:44.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:44.01 [info     ] FQE_20220422054314: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.0001654036073799593, 'time_algorithm_update': 0.005206666797040457, 'loss': 0.018146274876444185, 'time_step': 0.0054488756570471334, 'init_value': -2.5098206996917725, 'ave_value': -1.7829324539248446, 'soft_opc': nan} step=7802




2022-04-22 05:44.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:44.02 [info     ] FQE_20220422054314: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00016616482332528355, 'time_algorithm_update': 0.0051257739584129975, 'loss': 0.01838756177163436, 'time_step': 0.005368211183203272, 'init_value': -2.602436065673828, 'ave_value': -1.8618004651608047, 'soft_opc': nan} step=7968




2022-04-22 05:44.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:44.03 [info     ] FQE_20220422054314: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.0001661490245037768, 'time_algorithm_update': 0.005276869578533862, 'loss': 0.01919544173519587, 'time_step': 0.0055159359093172, 'init_value': -2.5866920948028564, 'ave_value': -1.8179970707862874, 'soft_opc': nan} step=8134




2022-04-22 05:44.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:44.04 [info     ] FQE_20220422054314: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.0001837015151977539, 'time_algorithm_update': 0.005190516092691077, 'loss': 0.01990045079609368, 'time_step': 0.00545112052595759, 'init_value': -2.652853012084961, 'ave_value': -1.8866666006376371, 'soft_opc': nan} step=8300




2022-04-22 05:44.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054314/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-22 05:44.04 [info     ] Directory is created at d3rlpy_logs/FQE_20220422054404
2022-04-22 05:44.04 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 05:44.04 [debug    ] Building models...
2022-04-22 05:44.04 [debug    ] Models have been built.
2022-04-22 05:44.04 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422054404/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 05:44.06 [info     ] FQE_20220422054404: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00017128018445746842, 'time_algorithm_update': 0.005222182634264924, 'loss': 0.025027040668308388, 'time_step': 0.0054704581582269, 'init_value': -1.1892292499542236, 'ave_value': -1.187895866570709, 'soft_opc': nan} step=344




2022-04-22 05:44.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:44.08 [info     ] FQE_20220422054404: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.0001715830592221992, 'time_algorithm_update': 0.00526364944701971, 'loss': 0.023223328276883896, 'time_step': 0.005510043959284938, 'init_value': -2.034574031829834, 'ave_value': -2.0281457707957102, 'soft_opc': nan} step=688




2022-04-22 05:44.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:44.10 [info     ] FQE_20220422054404: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00017791778542274652, 'time_algorithm_update': 0.005296884581100109, 'loss': 0.028066159683539597, 'time_step': 0.00555116146109825, 'init_value': -3.0874977111816406, 'ave_value': -3.084387111623545, 'soft_opc': nan} step=1032




2022-04-22 05:44.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:44.12 [info     ] FQE_20220422054404: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.0001724736635075059, 'time_algorithm_update': 0.005131968232088311, 'loss': 0.03155306938407553, 'time_step': 0.0053803144499313, 'init_value': -3.8858447074890137, 'ave_value': -3.860175242281712, 'soft_opc': nan} step=1376




2022-04-22 05:44.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:44.14 [info     ] FQE_20220422054404: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00017357912174490996, 'time_algorithm_update': 0.004854278509006943, 'loss': 0.04048876955206398, 'time_step': 0.005104989506477533, 'init_value': -5.030400276184082, 'ave_value': -4.993287248667833, 'soft_opc': nan} step=1720




2022-04-22 05:44.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:44.16 [info     ] FQE_20220422054404: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00016880728477655456, 'time_algorithm_update': 0.005253373190414074, 'loss': 0.05331973829523249, 'time_step': 0.005498868088389552, 'init_value': -5.7121686935424805, 'ave_value': -5.673455386568565, 'soft_opc': nan} step=2064




2022-04-22 05:44.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:44.18 [info     ] FQE_20220422054404: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001745120037433713, 'time_algorithm_update': 0.005182882381039996, 'loss': 0.06821365934136042, 'time_step': 0.005436124496681746, 'init_value': -6.68306827545166, 'ave_value': -6.636556354896711, 'soft_opc': nan} step=2408




2022-04-22 05:44.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:44.20 [info     ] FQE_20220422054404: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00017053443332051122, 'time_algorithm_update': 0.005121152068293372, 'loss': 0.09005285338715238, 'time_step': 0.005368080943129783, 'init_value': -7.497742652893066, 'ave_value': -7.346434828884866, 'soft_opc': nan} step=2752




2022-04-22 05:44.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:44.22 [info     ] FQE_20220422054404: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00017016918160194574, 'time_algorithm_update': 0.005220501921897711, 'loss': 0.11633508771029842, 'time_step': 0.0054645676945531095, 'init_value': -8.486530303955078, 'ave_value': -8.235744645496881, 'soft_opc': nan} step=3096




2022-04-22 05:44.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:44.24 [info     ] FQE_20220422054404: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00017144375069196835, 'time_algorithm_update': 0.004889636538749517, 'loss': 0.1556080036395944, 'time_step': 0.005139249701832616, 'init_value': -9.211004257202148, 'ave_value': -8.813832103269728, 'soft_opc': nan} step=3440




2022-04-22 05:44.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:44.26 [info     ] FQE_20220422054404: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016919194265853528, 'time_algorithm_update': 0.005165911691133366, 'loss': 0.18343835417181253, 'time_step': 0.005412941062173178, 'init_value': -9.937192916870117, 'ave_value': -9.419989098852652, 'soft_opc': nan} step=3784




2022-04-22 05:44.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:44.28 [info     ] FQE_20220422054404: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00017331644546153934, 'time_algorithm_update': 0.005343847496564998, 'loss': 0.233472321691468, 'time_step': 0.005595751280008361, 'init_value': -11.087940216064453, 'ave_value': -10.280203537259519, 'soft_opc': nan} step=4128




2022-04-22 05:44.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:44.30 [info     ] FQE_20220422054404: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.0001760977645253026, 'time_algorithm_update': 0.005265518676402957, 'loss': 0.2735518590373875, 'time_step': 0.0055180822694024375, 'init_value': -11.657430648803711, 'ave_value': -10.64530558077501, 'soft_opc': nan} step=4472




2022-04-22 05:44.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:44.32 [info     ] FQE_20220422054404: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001737669456836789, 'time_algorithm_update': 0.0049918481083803395, 'loss': 0.3298427569355036, 'time_step': 0.0052410655243452205, 'init_value': -12.573389053344727, 'ave_value': -11.540460678111543, 'soft_opc': nan} step=4816




2022-04-22 05:44.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:44.34 [info     ] FQE_20220422054404: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00018004483954851017, 'time_algorithm_update': 0.005313499722369882, 'loss': 0.3857998238161726, 'time_step': 0.005569891181103018, 'init_value': -12.889680862426758, 'ave_value': -11.806849354726204, 'soft_opc': nan} step=5160




2022-04-22 05:44.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:44.36 [info     ] FQE_20220422054404: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.0001748245815898097, 'time_algorithm_update': 0.0053281797919162485, 'loss': 0.4466075876099599, 'time_step': 0.005580581897913024, 'init_value': -13.502960205078125, 'ave_value': -12.463553761057808, 'soft_opc': nan} step=5504




2022-04-22 05:44.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:44.38 [info     ] FQE_20220422054404: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00017281812290812648, 'time_algorithm_update': 0.005256393621134204, 'loss': 0.5047463082036999, 'time_step': 0.005506963230842768, 'init_value': -13.585882186889648, 'ave_value': -12.569196227285172, 'soft_opc': nan} step=5848




2022-04-22 05:44.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:44.41 [info     ] FQE_20220422054404: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00017435883366784385, 'time_algorithm_update': 0.005287072686261909, 'loss': 0.5608209514838838, 'time_step': 0.005540536586628403, 'init_value': -14.074714660644531, 'ave_value': -13.206348355366169, 'soft_opc': nan} step=6192




2022-04-22 05:44.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:44.42 [info     ] FQE_20220422054404: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00017231148342753566, 'time_algorithm_update': 0.004964423041011012, 'loss': 0.6140881369356066, 'time_step': 0.005216680986936702, 'init_value': -14.467718124389648, 'ave_value': -13.838113002291134, 'soft_opc': nan} step=6536




2022-04-22 05:44.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:44.45 [info     ] FQE_20220422054404: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00017370318257531455, 'time_algorithm_update': 0.005201933688895647, 'loss': 0.7003048012888622, 'time_step': 0.00545273270717887, 'init_value': -14.866100311279297, 'ave_value': -14.467347306850877, 'soft_opc': nan} step=6880




2022-04-22 05:44.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:44.47 [info     ] FQE_20220422054404: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00017892136130222055, 'time_algorithm_update': 0.00528606841730517, 'loss': 0.7642131788482847, 'time_step': 0.00554242730140686, 'init_value': -15.17490005493164, 'ave_value': -15.235232615465433, 'soft_opc': nan} step=7224




2022-04-22 05:44.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:44.49 [info     ] FQE_20220422054404: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00018187040506407272, 'time_algorithm_update': 0.005300317392792813, 'loss': 0.8332307268757113, 'time_step': 0.005562071190323941, 'init_value': -15.033443450927734, 'ave_value': -15.53948679744635, 'soft_opc': nan} step=7568




2022-04-22 05:44.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:44.51 [info     ] FQE_20220422054404: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00017408160276191178, 'time_algorithm_update': 0.005261719919914423, 'loss': 0.9054971910948151, 'time_step': 0.005511986654858256, 'init_value': -15.184967041015625, 'ave_value': -16.09791282049028, 'soft_opc': nan} step=7912




2022-04-22 05:44.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:44.53 [info     ] FQE_20220422054404: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00017157474229502123, 'time_algorithm_update': 0.004999954340069793, 'loss': 0.9583317483991904, 'time_step': 0.005250152460364408, 'init_value': -15.17455768585205, 'ave_value': -16.481411556014372, 'soft_opc': nan} step=8256




2022-04-22 05:44.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:44.55 [info     ] FQE_20220422054404: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00017593974290892135, 'time_algorithm_update': 0.0053069175675857895, 'loss': 1.0137536540237624, 'time_step': 0.005559410466704258, 'init_value': -14.87583065032959, 'ave_value': -16.423517444390836, 'soft_opc': nan} step=8600




2022-04-22 05:44.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:44.57 [info     ] FQE_20220422054404: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.0001741599204928376, 'time_algorithm_update': 0.005303158316501352, 'loss': 1.085745236056662, 'time_step': 0.005556410828302073, 'init_value': -15.216354370117188, 'ave_value': -17.28383843116798, 'soft_opc': nan} step=8944




2022-04-22 05:44.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:44.59 [info     ] FQE_20220422054404: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00017472963000452794, 'time_algorithm_update': 0.00519939148148825, 'loss': 1.1226702808066769, 'time_step': 0.005453675292259039, 'init_value': -15.163898468017578, 'ave_value': -17.618099804681954, 'soft_opc': nan} step=9288




2022-04-22 05:44.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:45.01 [info     ] FQE_20220422054404: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00017101681509683298, 'time_algorithm_update': 0.005052225534306016, 'loss': 1.1669069038227546, 'time_step': 0.005301498396452083, 'init_value': -15.000185012817383, 'ave_value': -17.786056402945672, 'soft_opc': nan} step=9632




2022-04-22 05:45.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:45.03 [info     ] FQE_20220422054404: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00017411972201147744, 'time_algorithm_update': 0.00537912928780844, 'loss': 1.2105464416631866, 'time_step': 0.005628716807032741, 'init_value': -16.161226272583008, 'ave_value': -19.288809540249442, 'soft_opc': nan} step=9976




2022-04-22 05:45.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:45.05 [info     ] FQE_20220422054404: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00017478022464486055, 'time_algorithm_update': 0.0052142067011012585, 'loss': 1.2262471975585403, 'time_step': 0.005467026732688726, 'init_value': -16.488523483276367, 'ave_value': -19.81793232935036, 'soft_opc': nan} step=10320




2022-04-22 05:45.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:45.07 [info     ] FQE_20220422054404: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.000178784825081049, 'time_algorithm_update': 0.005307434603225353, 'loss': 1.2618460891995753, 'time_step': 0.00556423636369927, 'init_value': -17.0679874420166, 'ave_value': -20.509764017720144, 'soft_opc': nan} step=10664




2022-04-22 05:45.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:45.09 [info     ] FQE_20220422054404: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00017796491467675498, 'time_algorithm_update': 0.005426320918770724, 'loss': 1.2777877425190147, 'time_step': 0.005681733059328656, 'init_value': -17.070892333984375, 'ave_value': -20.949118332230896, 'soft_opc': nan} step=11008




2022-04-22 05:45.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:45.11 [info     ] FQE_20220422054404: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.000172199897987898, 'time_algorithm_update': 0.005038474188294522, 'loss': 1.295632939200935, 'time_step': 0.005287781011226566, 'init_value': -16.841197967529297, 'ave_value': -21.062083102620903, 'soft_opc': nan} step=11352




2022-04-22 05:45.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:45.13 [info     ] FQE_20220422054404: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.0001741668512654859, 'time_algorithm_update': 0.005254556273305139, 'loss': 1.3455513442844846, 'time_step': 0.005505278360011966, 'init_value': -16.998689651489258, 'ave_value': -21.32320986496335, 'soft_opc': nan} step=11696




2022-04-22 05:45.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:45.15 [info     ] FQE_20220422054404: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00017076869343602381, 'time_algorithm_update': 0.00520537065905194, 'loss': 1.3795510634000219, 'time_step': 0.005451525366583536, 'init_value': -17.155006408691406, 'ave_value': -21.599503317195083, 'soft_opc': nan} step=12040




2022-04-22 05:45.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:45.17 [info     ] FQE_20220422054404: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00017440526984458747, 'time_algorithm_update': 0.005297915186992911, 'loss': 1.3578790094010358, 'time_step': 0.0055509161117464996, 'init_value': -17.07642364501953, 'ave_value': -21.870937811319827, 'soft_opc': nan} step=12384




2022-04-22 05:45.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:45.19 [info     ] FQE_20220422054404: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00017646648163019226, 'time_algorithm_update': 0.005283784727717555, 'loss': 1.370788620893172, 'time_step': 0.005537823882213858, 'init_value': -17.69814682006836, 'ave_value': -22.620264415489928, 'soft_opc': nan} step=12728




2022-04-22 05:45.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:45.21 [info     ] FQE_20220422054404: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00016951560974121094, 'time_algorithm_update': 0.005002662192943485, 'loss': 1.409592314222611, 'time_step': 0.005246048056802084, 'init_value': -17.7524471282959, 'ave_value': -22.81404244333405, 'soft_opc': nan} step=13072




2022-04-22 05:45.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:45.23 [info     ] FQE_20220422054404: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00018032692199529602, 'time_algorithm_update': 0.005307411038598349, 'loss': 1.4282468665109644, 'time_step': 0.005563557841057001, 'init_value': -18.230792999267578, 'ave_value': -23.39688035961878, 'soft_opc': nan} step=13416




2022-04-22 05:45.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:45.25 [info     ] FQE_20220422054404: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00017359021098114724, 'time_algorithm_update': 0.005209789719692496, 'loss': 1.4038356420757292, 'time_step': 0.005460542301798976, 'init_value': -18.034839630126953, 'ave_value': -23.450761869882847, 'soft_opc': nan} step=13760




2022-04-22 05:45.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:45.27 [info     ] FQE_20220422054404: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.0001746388368828352, 'time_algorithm_update': 0.005235131396803745, 'loss': 1.390909502456008, 'time_step': 0.005485570707986521, 'init_value': -17.897428512573242, 'ave_value': -23.703395017390015, 'soft_opc': nan} step=14104




2022-04-22 05:45.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:45.29 [info     ] FQE_20220422054404: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00017344813014185705, 'time_algorithm_update': 0.0049574305844861406, 'loss': 1.382762267644149, 'time_step': 0.005207902470300364, 'init_value': -18.426502227783203, 'ave_value': -24.32347375323308, 'soft_opc': nan} step=14448




2022-04-22 05:45.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:45.31 [info     ] FQE_20220422054404: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00017565142276675203, 'time_algorithm_update': 0.005345060381778451, 'loss': 1.3966832491460928, 'time_step': 0.005597024462943853, 'init_value': -18.55379867553711, 'ave_value': -24.707275034036268, 'soft_opc': nan} step=14792




2022-04-22 05:45.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:45.33 [info     ] FQE_20220422054404: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001734723878461261, 'time_algorithm_update': 0.005252094462860462, 'loss': 1.41752540303914, 'time_step': 0.00550511964531832, 'init_value': -18.528926849365234, 'ave_value': -24.810145366900006, 'soft_opc': nan} step=15136




2022-04-22 05:45.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:45.35 [info     ] FQE_20220422054404: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00017639994621276855, 'time_algorithm_update': 0.005193552998609321, 'loss': 1.4035972454368548, 'time_step': 0.005446011243864547, 'init_value': -18.563581466674805, 'ave_value': -24.958380777075014, 'soft_opc': nan} step=15480




2022-04-22 05:45.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:45.37 [info     ] FQE_20220422054404: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00017378635184709415, 'time_algorithm_update': 0.005209172880926797, 'loss': 1.374208875953458, 'time_step': 0.005458452673845513, 'init_value': -18.34021759033203, 'ave_value': -25.032510626181825, 'soft_opc': nan} step=15824




2022-04-22 05:45.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:45.39 [info     ] FQE_20220422054404: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00017334208932033804, 'time_algorithm_update': 0.004999251559723255, 'loss': 1.4279638905792909, 'time_step': 0.005248823138170464, 'init_value': -18.372291564941406, 'ave_value': -25.1916540146067, 'soft_opc': nan} step=16168




2022-04-22 05:45.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:45.42 [info     ] FQE_20220422054404: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.0001704145309536956, 'time_algorithm_update': 0.005310582560162211, 'loss': 1.4184902413033469, 'time_step': 0.005555342103159705, 'init_value': -18.38697052001953, 'ave_value': -25.344672204387532, 'soft_opc': nan} step=16512




2022-04-22 05:45.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:45.44 [info     ] FQE_20220422054404: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00017395407654518304, 'time_algorithm_update': 0.005226526842560879, 'loss': 1.4293988313374304, 'time_step': 0.005471991938213969, 'init_value': -18.86294174194336, 'ave_value': -25.861367854804875, 'soft_opc': nan} step=16856




2022-04-22 05:45.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 05:45.46 [info     ] FQE_20220422054404: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00017059265181075696, 'time_algorithm_update': 0.005132731310156889, 'loss': 1.4395015572825836, 'time_step': 0.005376115094783695, 'init_value': -19.12302589416504, 'ave_value': -26.055331087194652, 'soft_opc': nan} step=17200




2022-04-22 05:45.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422054404/model_17200.pt
search iteration:  23
using hyper params:  [0.0072414049990309134, 0.00984688546817252, 6.004164267478865e-05, 5]
2022-04-22 05:45.46 [debug    ] RoundIterator is selected.
2022-04-22 05:45.46 [info     ] Directory is created at d3rlpy_logs/CQL_20220422054546
2022-04-22 05:45.46 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 05:45.46 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 05:45.46 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422054546/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.0072414049990309134, 'actor_optim_factory': {'opti

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:46.02 [info     ] CQL_20220422054546: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.0003990472396674184, 'time_algorithm_update': 0.046472517052137785, 'temp_loss': 4.94133940528583, 'temp': 0.9893932766307986, 'alpha_loss': -17.734539825792258, 'alpha': 1.017711813394734, 'critic_loss': 99.1302101377807, 'actor_loss': 4.575943178308837, 'time_step': 0.0469745397567749, 'td_error': 1.3113624598337863, 'init_value': -7.464409351348877, 'ave_value': -6.883035286527609} step=346
2022-04-22 05:46.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:46.19 [info     ] CQL_20220422054546: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.00040844754676598344, 'time_algorithm_update': 0.04681857472899332, 'temp_loss': 4.88927454479857, 'temp': 0.9690278758203363, 'alpha_loss': -18.372180668604855, 'alpha': 1.054141586915606, 'critic_loss': 204.6493181879121, 'actor_loss': 7.544627163451531, 'time_step': 0.04733214970958026, 'td_error': 1.3758161722951703, 'init_value': -9.68503189086914, 'ave_value': -9.024697748865206} step=692
2022-04-22 05:46.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:46.37 [info     ] CQL_20220422054546: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.00040261042600422237, 'time_algorithm_update': 0.04718583374354192, 'temp_loss': 4.790991201566134, 'temp': 0.9493805578333794, 'alpha_loss': -19.020132627101304, 'alpha': 1.0923843838575948, 'critic_loss': 455.0808623209165, 'actor_loss': 7.196916906819867, 'time_step': 0.04769068026129221, 'td_error': 1.2871491626301679, 'init_value': -7.199850082397461, 'ave_value': -6.888389218051381} step=1038
2022-04-22 05:46.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:46.53 [info     ] CQL_20220422054546: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.00042191612927210815, 'time_algorithm_update': 0.04652575950402056, 'temp_loss': 4.6965220401741865, 'temp': 0.9302968959932383, 'alpha_loss': -19.697155324020827, 'alpha': 1.1324954859783194, 'critic_loss': 847.0166962904738, 'actor_loss': 4.775867518662028, 'time_step': 0.04704981870044862, 'td_error': 1.2904424438736721, 'init_value': -5.975508213043213, 'ave_value': -5.862948618798868} step=1384
2022-04-22 05:46.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:47.10 [info     ] CQL_20220422054546: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.0004032767576978386, 'time_algorithm_update': 0.046835210281989476, 'temp_loss': 4.6038311905943585, 'temp': 0.9117265614126459, 'alpha_loss': -20.427645319459067, 'alpha': 1.1745442079670856, 'critic_loss': 1275.2721518808708, 'actor_loss': 4.675426867655936, 'time_step': 0.04734019254673423, 'td_error': 1.3099840658179986, 'init_value': -6.603440761566162, 'ave_value': -6.529113795645376} step=1730
2022-04-22 05:47.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:47.28 [info     ] CQL_20220422054546: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.00040672693638443255, 'time_algorithm_update': 0.047109441261071, 'temp_loss': 4.51367321593224, 'temp': 0.8936304928939467, 'alpha_loss': -21.191938290017188, 'alpha': 1.2185800196118437, 'critic_loss': 1694.9706265664513, 'actor_loss': 5.347691840519106, 'time_step': 0.04761455031488672, 'td_error': 1.3302860052837302, 'init_value': -7.5552825927734375, 'ave_value': -7.490733520766467} step=2076
2022-04-22 05:47.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:47.45 [info     ] CQL_20220422054546: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.0004112279484037719, 'time_algorithm_update': 0.04715822404519671, 'temp_loss': 4.424766426141551, 'temp': 0.8759735300706301, 'alpha_loss': -21.994021465323563, 'alpha': 1.2646270883565693, 'critic_loss': 2125.2396480141347, 'actor_loss': 6.1861294101428435, 'time_step': 0.0476706572350739, 'td_error': 1.3512623027264536, 'init_value': -8.393492698669434, 'ave_value': -8.341255066551943} step=2422
2022-04-22 05:47.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:48.02 [info     ] CQL_20220422054546: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.00040920001233933287, 'time_algorithm_update': 0.047100031996048945, 'temp_loss': 4.337111033456174, 'temp': 0.8587371527804115, 'alpha_loss': -22.82707996588911, 'alpha': 1.3127267484720042, 'critic_loss': 2583.1406334673047, 'actor_loss': 7.138558405672194, 'time_step': 0.04761149152854963, 'td_error': 1.3762562198196966, 'init_value': -9.428051948547363, 'ave_value': -9.379642849863847} step=2768
2022-04-22 05:48.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:48.19 [info     ] CQL_20220422054546: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.0004040554079706269, 'time_algorithm_update': 0.04717216464136377, 'temp_loss': 4.252066759704855, 'temp': 0.8418936796615578, 'alpha_loss': -23.702477372450634, 'alpha': 1.3629141215644132, 'critic_loss': 3062.87850758106, 'actor_loss': 8.229721533769817, 'time_step': 0.04768067701703551, 'td_error': 1.4035858340226648, 'init_value': -10.321905136108398, 'ave_value': -10.294237933332969} step=3114
2022-04-22 05:48.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:48.36 [info     ] CQL_20220422054546: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.0003980398178100586, 'time_algorithm_update': 0.04714166704629887, 'temp_loss': 4.168929834586347, 'temp': 0.8254221210934524, 'alpha_loss': -24.608317871314252, 'alpha': 1.4152362267406, 'critic_loss': 3569.851060106575, 'actor_loss': 9.367047833569478, 'time_step': 0.04764238671760339, 'td_error': 1.4378670224506813, 'init_value': -11.54821491241455, 'ave_value': -11.522689291717544} step=3460
2022-04-22 05:48.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:48.53 [info     ] CQL_20220422054546: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.0004032926063317095, 'time_algorithm_update': 0.04710889827309316, 'temp_loss': 4.086646431443319, 'temp': 0.8093072789252838, 'alpha_loss': -25.556233924248314, 'alpha': 1.4697434195893349, 'critic_loss': 4082.852977951138, 'actor_loss': 10.59175901743718, 'time_step': 0.047610146461883725, 'td_error': 1.4761304676461735, 'init_value': -12.84949016571045, 'ave_value': -12.820706169070537} step=3806
2022-04-22 05:48.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:49.10 [info     ] CQL_20220422054546: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.00040475757135821215, 'time_algorithm_update': 0.047027564462209716, 'temp_loss': 4.0073636839155515, 'temp': 0.793532769804056, 'alpha_loss': -26.54672568106238, 'alpha': 1.526497701688998, 'critic_loss': 4626.080230536489, 'actor_loss': 11.916148998833805, 'time_step': 0.04753304974881211, 'td_error': 1.5172063866353431, 'init_value': -14.017509460449219, 'ave_value': -14.002700313271477} step=4152
2022-04-22 05:49.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:49.27 [info     ] CQL_20220422054546: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.0004055100369315616, 'time_algorithm_update': 0.046811668169980794, 'temp_loss': 3.9300349447768546, 'temp': 0.7780830760222639, 'alpha_loss': -27.57136075345078, 'alpha': 1.5855616603283524, 'critic_loss': 5186.18551300578, 'actor_loss': 13.30215119764295, 'time_step': 0.04732218298608857, 'td_error': 1.5645737832410802, 'init_value': -15.355100631713867, 'ave_value': -15.342827484561347} step=4498
2022-04-22 05:49.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:49.44 [info     ] CQL_20220422054546: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.0004082284221759421, 'time_algorithm_update': 0.046929601299969446, 'temp_loss': 3.8526619087064886, 'temp': 0.76295002123524, 'alpha_loss': -28.63926299872426, 'alpha': 1.6469993932398757, 'critic_loss': 5770.153444499639, 'actor_loss': 14.68837625580716, 'time_step': 0.04743987425214293, 'td_error': 1.6298529355936213, 'init_value': -17.189428329467773, 'ave_value': -17.159233528994566} step=4844
2022-04-22 05:49.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:50.01 [info     ] CQL_20220422054546: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.0004084316981321125, 'time_algorithm_update': 0.04740663277620525, 'temp_loss': 3.7782846895945554, 'temp': 0.7481245569066505, 'alpha_loss': -29.749892108013174, 'alpha': 1.7108911579054904, 'critic_loss': 6375.774120529263, 'actor_loss': 16.173974337605383, 'time_step': 0.047912754075375596, 'td_error': 1.6871798016851565, 'init_value': -18.524438858032227, 'ave_value': -18.502244397937094} step=5190
2022-04-22 05:50.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:50.18 [info     ] CQL_20220422054546: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.0003891597593450822, 'time_algorithm_update': 0.04640530161774917, 'temp_loss': 3.7054167083233076, 'temp': 0.733593895083907, 'alpha_loss': -30.899207886932903, 'alpha': 1.7773158770765183, 'critic_loss': 6874.790130791636, 'actor_loss': 17.621658937090395, 'time_step': 0.04689190360162988, 'td_error': 1.7536746634592022, 'init_value': -20.05282211303711, 'ave_value': -20.028324081978} step=5536
2022-04-22 05:50.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:50.35 [info     ] CQL_20220422054546: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.0004124420915724914, 'time_algorithm_update': 0.04740375866090631, 'temp_loss': 3.632802130169951, 'temp': 0.7193517869263026, 'alpha_loss': -32.10806360410128, 'alpha': 1.8463642762575536, 'critic_loss': 7273.803787143244, 'actor_loss': 19.078519595151693, 'time_step': 0.047921804334386926, 'td_error': 1.8082527548168916, 'init_value': -21.11256217956543, 'ave_value': -21.104676887761816} step=5882
2022-04-22 05:50.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:50.52 [info     ] CQL_20220422054546: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.0004179105593289943, 'time_algorithm_update': 0.04733428651886868, 'temp_loss': 3.5622216559558932, 'temp': 0.705392403581928, 'alpha_loss': -33.355928817925424, 'alpha': 1.918138698346353, 'critic_loss': 7665.264170034772, 'actor_loss': 20.52458813838187, 'time_step': 0.04784824048852645, 'td_error': 1.873314390515221, 'init_value': -22.401453018188477, 'ave_value': -22.401281972517047} step=6228
2022-04-22 05:50.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:51.09 [info     ] CQL_20220422054546: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.0003987605861156662, 'time_algorithm_update': 0.044912389937163774, 'temp_loss': 3.4930116055328724, 'temp': 0.6917089251424536, 'alpha_loss': -34.65405310922964, 'alpha': 1.9927319171111708, 'critic_loss': 7996.458147523031, 'actor_loss': 21.936639052595016, 'time_step': 0.045410047376776015, 'td_error': 1.9489880507317159, 'init_value': -23.868982315063477, 'ave_value': -23.86673503050933} step=6574
2022-04-22 05:51.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:51.25 [info     ] CQL_20220422054546: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.0004041608358394204, 'time_algorithm_update': 0.04371290744384589, 'temp_loss': 3.425440825478879, 'temp': 0.6782916165845243, 'alpha_loss': -36.00172323987663, 'alpha': 2.0702476322306373, 'critic_loss': 8569.218645569907, 'actor_loss': 23.52087681830963, 'time_step': 0.044222256351757604, 'td_error': 2.022755594159695, 'init_value': -25.148130416870117, 'ave_value': -25.15939634376479} step=6920
2022-04-22 05:51.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:51.40 [info     ] CQL_20220422054546: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.0004087624522302881, 'time_algorithm_update': 0.043571780182722675, 'temp_loss': 3.359710295765386, 'temp': 0.6651364368510384, 'alpha_loss': -37.399542869170965, 'alpha': 2.150785243580107, 'critic_loss': 9199.472258286669, 'actor_loss': 25.045091430575862, 'time_step': 0.04408055922888607, 'td_error': 2.130006476623113, 'init_value': -27.102603912353516, 'ave_value': -27.093481980437293} step=7266
2022-04-22 05:51.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:51.56 [info     ] CQL_20220422054546: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.00041732347080473264, 'time_algorithm_update': 0.04352723311826673, 'temp_loss': 3.293486767421568, 'temp': 0.6522384239414524, 'alpha_loss': -38.857723710164855, 'alpha': 2.234472777802131, 'critic_loss': 9882.664274182624, 'actor_loss': 26.684712972255113, 'time_step': 0.04404804885731956, 'td_error': 2.2354337948549046, 'init_value': -28.828771591186523, 'ave_value': -28.81663347102274} step=7612
2022-04-22 05:51.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:52.12 [info     ] CQL_20220422054546: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.00039899142491335127, 'time_algorithm_update': 0.0441192553911595, 'temp_loss': 3.2300880072433826, 'temp': 0.6395931409273533, 'alpha_loss': -40.36125598753119, 'alpha': 2.3214201182988337, 'critic_loss': 10463.18989060242, 'actor_loss': 28.289247606531045, 'time_step': 0.04461055615044743, 'td_error': 2.3367941530188974, 'init_value': -30.413673400878906, 'ave_value': -30.39627183187963} step=7958
2022-04-22 05:52.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:52.28 [info     ] CQL_20220422054546: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.0004034352440365477, 'time_algorithm_update': 0.04415719977693062, 'temp_loss': 3.1671813609283093, 'temp': 0.6271922262762323, 'alpha_loss': -41.94589494694175, 'alpha': 2.4117696988100263, 'critic_loss': 10469.62159332099, 'actor_loss': 29.6377306866508, 'time_step': 0.04465364031708999, 'td_error': 2.4131107703643555, 'init_value': -31.46861457824707, 'ave_value': -31.46921294959816} step=8304
2022-04-22 05:52.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:52.44 [info     ] CQL_20220422054546: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.0004085550418478905, 'time_algorithm_update': 0.044966649457898446, 'temp_loss': 3.105448165380886, 'temp': 0.6150339647180083, 'alpha_loss': -43.56531232492083, 'alpha': 2.5056242295083284, 'critic_loss': 10534.44621567919, 'actor_loss': 31.164493467077353, 'time_step': 0.045467454574011655, 'td_error': 2.5072976068027284, 'init_value': -32.78300476074219, 'ave_value': -32.79222533875814} step=8650
2022-04-22 05:52.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:53.00 [info     ] CQL_20220422054546: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.00045513762214969346, 'time_algorithm_update': 0.044309177150616066, 'temp_loss': 3.045794092161807, 'temp': 0.6031105935229042, 'alpha_loss': -45.28666036528659, 'alpha': 2.6031649415892675, 'critic_loss': 10842.145688448338, 'actor_loss': 32.62799867729231, 'time_step': 0.044855753810419514, 'td_error': 2.640995207110928, 'init_value': -34.71928024291992, 'ave_value': -34.70146612454049} step=8996
2022-04-22 05:53.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:53.17 [info     ] CQL_20220422054546: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.0004072141096082037, 'time_algorithm_update': 0.04421510379438456, 'temp_loss': 2.986776166568602, 'temp': 0.5914187510578619, 'alpha_loss': -47.03326317891909, 'alpha': 2.704498479132018, 'critic_loss': 11202.266810422689, 'actor_loss': 34.044194337260514, 'time_step': 0.044711914365691253, 'td_error': 2.69947723152049, 'init_value': -35.344600677490234, 'ave_value': -35.3645248680784} step=9342
2022-04-22 05:53.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:53.34 [info     ] CQL_20220422054546: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.0004220318932064696, 'time_algorithm_update': 0.047737084372195204, 'temp_loss': 2.9285740521601857, 'temp': 0.5799546053988396, 'alpha_loss': -48.85523604244166, 'alpha': 2.809759185493337, 'critic_loss': 11574.578599169075, 'actor_loss': 35.405321462995055, 'time_step': 0.048248940809613707, 'td_error': 2.8273774896709827, 'init_value': -37.05418014526367, 'ave_value': -37.056190113955736} step=9688
2022-04-22 05:53.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:53.51 [info     ] CQL_20220422054546: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.0004173282943019977, 'time_algorithm_update': 0.047608605009972015, 'temp_loss': 2.872207753231071, 'temp': 0.5687108813338198, 'alpha_loss': -50.75959180269627, 'alpha': 2.9191190073255857, 'critic_loss': 11493.800394576409, 'actor_loss': 36.577650797849444, 'time_step': 0.048119820611325304, 'td_error': 2.9082515256483217, 'init_value': -38.02216720581055, 'ave_value': -38.031490006860594} step=10034
2022-04-22 05:53.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:54.08 [info     ] CQL_20220422054546: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.0004060757642536494, 'time_algorithm_update': 0.04745509858765354, 'temp_loss': 2.81616262066571, 'temp': 0.5576860096757812, 'alpha_loss': -52.73230647489515, 'alpha': 3.0327311650866027, 'critic_loss': 9089.44508811642, 'actor_loss': 37.26802974767079, 'time_step': 0.04796448608354337, 'td_error': 2.977330226909624, 'init_value': -38.86989212036133, 'ave_value': -38.877316431594494} step=10380
2022-04-22 05:54.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:54.25 [info     ] CQL_20220422054546: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.0004060440669859076, 'time_algorithm_update': 0.04739471253632121, 'temp_loss': 2.761077825044621, 'temp': 0.5468768486053268, 'alpha_loss': -54.78780341837447, 'alpha': 3.1507683799445974, 'critic_loss': 7369.895658812771, 'actor_loss': 38.369330555028306, 'time_step': 0.04790244763986224, 'td_error': 3.075355460813082, 'init_value': -40.0639762878418, 'ave_value': -40.06187183168501} step=10726
2022-04-22 05:54.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:54.43 [info     ] CQL_20220422054546: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.00040130532545850457, 'time_algorithm_update': 0.0473158724735238, 'temp_loss': 2.7095513205996826, 'temp': 0.5362741552680903, 'alpha_loss': -56.92115957888564, 'alpha': 3.2734263342929024, 'critic_loss': 6069.712042483291, 'actor_loss': 39.37300553073773, 'time_step': 0.047819136195100115, 'td_error': 3.1581983902479625, 'init_value': -41.031524658203125, 'ave_value': -41.027902492452654} step=11072
2022-04-22 05:54.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:55.00 [info     ] CQL_20220422054546: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.0004081422882962089, 'time_algorithm_update': 0.046933620651333316, 'temp_loss': 2.6554319148807854, 'temp': 0.52587723318552, 'alpha_loss': -59.13016157205394, 'alpha': 3.400834155909588, 'critic_loss': 6206.210778032424, 'actor_loss': 40.920545500826975, 'time_step': 0.04744096574066691, 'td_error': 3.2862780246135044, 'init_value': -42.44266128540039, 'ave_value': -42.446052073573206} step=11418
2022-04-22 05:55.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:55.17 [info     ] CQL_20220422054546: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.000420391904136349, 'time_algorithm_update': 0.047004504699927535, 'temp_loss': 2.604224514409986, 'temp': 0.5156845367712781, 'alpha_loss': -61.43680845933154, 'alpha': 3.5332266299021726, 'critic_loss': 6460.057461953576, 'actor_loss': 42.15138006485956, 'time_step': 0.04752153192641418, 'td_error': 3.3938959387870486, 'init_value': -43.62300491333008, 'ave_value': -43.624009811849945} step=11764
2022-04-22 05:55.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:55.34 [info     ] CQL_20220422054546: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.00040162987791733933, 'time_algorithm_update': 0.04724672764022916, 'temp_loss': 2.5538332930879095, 'temp': 0.5056876019935388, 'alpha_loss': -63.83141070018614, 'alpha': 3.670751999568388, 'critic_loss': 6276.507932453486, 'actor_loss': 43.325531182261564, 'time_step': 0.047751742980383725, 'td_error': 3.5213109453594296, 'init_value': -44.99700927734375, 'ave_value': -44.990615571059564} step=12110
2022-04-22 05:55.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:55.51 [info     ] CQL_20220422054546: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.0004094577249074947, 'time_algorithm_update': 0.0472622620577068, 'temp_loss': 2.504406069744529, 'temp': 0.49588531953406473, 'alpha_loss': -66.32190144268763, 'alpha': 3.8136476719310517, 'critic_loss': 6593.647396021495, 'actor_loss': 44.713525926446636, 'time_step': 0.04777274862190203, 'td_error': 3.6516640899931425, 'init_value': -46.357460021972656, 'ave_value': -46.3464218798041} step=12456
2022-04-22 05:55.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:56.08 [info     ] CQL_20220422054546: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.00040917244949781827, 'time_algorithm_update': 0.047387298131953776, 'temp_loss': 2.4559202304465235, 'temp': 0.48627190576123364, 'alpha_loss': -68.90275033498776, 'alpha': 3.9621152133610895, 'critic_loss': 6790.264520016709, 'actor_loss': 45.88437380267016, 'time_step': 0.047895658222926143, 'td_error': 3.7420570149676142, 'init_value': -47.2088508605957, 'ave_value': -47.214411121004844} step=12802
2022-04-22 05:56.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:56.25 [info     ] CQL_20220422054546: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.00043167268609724984, 'time_algorithm_update': 0.04706169277257313, 'temp_loss': 2.408392729097708, 'temp': 0.4768453483292133, 'alpha_loss': -71.58432758750254, 'alpha': 4.11633231736332, 'critic_loss': 7121.368975512554, 'actor_loss': 47.09915117032266, 'time_step': 0.047591492619817655, 'td_error': 3.8847045563588547, 'init_value': -48.67389678955078, 'ave_value': -48.664002899560614} step=13148
2022-04-22 05:56.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:56.42 [info     ] CQL_20220422054546: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.0004010551926717593, 'time_algorithm_update': 0.04701687972669657, 'temp_loss': 2.361544092266546, 'temp': 0.46760183658903043, 'alpha_loss': -74.35861241335125, 'alpha': 4.276567839473659, 'critic_loss': 7390.459979283327, 'actor_loss': 48.20429159175454, 'time_step': 0.04751358969363174, 'td_error': 3.978214089055959, 'init_value': -49.53916931152344, 'ave_value': -49.53703394438091} step=13494
2022-04-22 05:56.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:57.00 [info     ] CQL_20220422054546: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.0004403157041252004, 'time_algorithm_update': 0.047650032649839545, 'temp_loss': 2.3158414012434854, 'temp': 0.45853755231193033, 'alpha_loss': -77.25819650550798, 'alpha': 4.443000712146649, 'critic_loss': 7374.338988552204, 'actor_loss': 49.03085139721115, 'time_step': 0.048190960994345604, 'td_error': 4.067216830200122, 'init_value': -50.396751403808594, 'ave_value': -50.39166611571513} step=13840
2022-04-22 05:57.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:57.17 [info     ] CQL_20220422054546: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.0004050690314673275, 'time_algorithm_update': 0.04700692885183875, 'temp_loss': 2.2708732647702874, 'temp': 0.4496498902925866, 'alpha_loss': -80.25853012614168, 'alpha': 4.615949455713261, 'critic_loss': 7672.204599722272, 'actor_loss': 50.10485241178832, 'time_step': 0.04751299089089984, 'td_error': 4.197697692409131, 'init_value': -51.67231750488281, 'ave_value': -51.6498308435895} step=14186
2022-04-22 05:57.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:57.34 [info     ] CQL_20220422054546: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.0004019571866603256, 'time_algorithm_update': 0.04779138041369488, 'temp_loss': 2.22661289245407, 'temp': 0.44093328171727286, 'alpha_loss': -83.3823694879609, 'alpha': 4.795610094346063, 'critic_loss': 6752.801189091853, 'actor_loss': 50.5867266351777, 'time_step': 0.04829770981231866, 'td_error': 4.200918627703377, 'init_value': -51.616294860839844, 'ave_value': -51.61750141268604} step=14532
2022-04-22 05:57.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:57.51 [info     ] CQL_20220422054546: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.00042869314292951817, 'time_algorithm_update': 0.04709455663758206, 'temp_loss': 2.183767084441433, 'temp': 0.43238646084862636, 'alpha_loss': -86.63150776328379, 'alpha': 4.982268308628501, 'critic_loss': 5640.094489477962, 'actor_loss': 51.17717233558611, 'time_step': 0.04762483676733998, 'td_error': 4.311642949993328, 'init_value': -52.67591094970703, 'ave_value': -52.66200938832991} step=14878
2022-04-22 05:57.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:58.08 [info     ] CQL_20220422054546: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.00040199990906467325, 'time_algorithm_update': 0.047298501681730235, 'temp_loss': 2.1415474139197026, 'temp': 0.42400421813733313, 'alpha_loss': -90.01171407534208, 'alpha': 5.176213570412873, 'critic_loss': 6025.7824975162575, 'actor_loss': 52.267554178403294, 'time_step': 0.04780020879183201, 'td_error': 4.392424846277031, 'init_value': -53.333797454833984, 'ave_value': -53.33804401055848} step=15224
2022-04-22 05:58.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:58.25 [info     ] CQL_20220422054546: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.0004196993877432939, 'time_algorithm_update': 0.04712736399876589, 'temp_loss': 2.0998780017643304, 'temp': 0.41578483478182315, 'alpha_loss': -93.51884605981022, 'alpha': 5.377729211928528, 'critic_loss': 6021.75348570719, 'actor_loss': 52.985340052257385, 'time_step': 0.04764892462361066, 'td_error': 4.4587724608408195, 'init_value': -53.9539680480957, 'ave_value': -53.950701579622546} step=15570
2022-04-22 05:58.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:58.42 [info     ] CQL_20220422054546: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.00040483888174068034, 'time_algorithm_update': 0.04699699244747272, 'temp_loss': 2.0593620166613187, 'temp': 0.40772413311666145, 'alpha_loss': -97.15940133684633, 'alpha': 5.587075232081331, 'critic_loss': 5636.246769723176, 'actor_loss': 53.50103765278193, 'time_step': 0.04750451600620512, 'td_error': 4.530378581455381, 'init_value': -54.588531494140625, 'ave_value': -54.58237757567491} step=15916
2022-04-22 05:58.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:58.59 [info     ] CQL_20220422054546: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.0004261160172479001, 'time_algorithm_update': 0.04717901607469327, 'temp_loss': 2.0189442720716397, 'temp': 0.3998216717918484, 'alpha_loss': -100.92574054519565, 'alpha': 5.804549436348712, 'critic_loss': 5837.246431030979, 'actor_loss': 54.25953802207991, 'time_step': 0.04770594525199405, 'td_error': 4.61589932058027, 'init_value': -55.3033561706543, 'ave_value': -55.30325925003131} step=16262
2022-04-22 05:58.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:59.16 [info     ] CQL_20220422054546: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.0004277263762633925, 'time_algorithm_update': 0.046562019111104094, 'temp_loss': 1.979765990565967, 'temp': 0.39207249673115724, 'alpha_loss': -104.87329659434413, 'alpha': 6.0305116824332, 'critic_loss': 5593.073635917179, 'actor_loss': 54.68070313558413, 'time_step': 0.047092942144140344, 'td_error': 4.648371409384994, 'init_value': -55.591270446777344, 'ave_value': -55.59008000040574} step=16608
2022-04-22 05:59.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:59.33 [info     ] CQL_20220422054546: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.0004053605085163447, 'time_algorithm_update': 0.04633115895221688, 'temp_loss': 1.9423671526715935, 'temp': 0.3844720050261889, 'alpha_loss': -108.9351967166614, 'alpha': 6.265233016427541, 'critic_loss': 5504.824690096641, 'actor_loss': 55.181726753367165, 'time_step': 0.04684068495138532, 'td_error': 4.72467302093164, 'init_value': -56.24472427368164, 'ave_value': -56.24199972591337} step=16954
2022-04-22 05:59.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 05:59.50 [info     ] CQL_20220422054546: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.00040976160523519353, 'time_algorithm_update': 0.04685263551039503, 'temp_loss': 1.9036130533053006, 'temp': 0.3770186470423131, 'alpha_loss': -113.1807607749983, 'alpha': 6.509097905517313, 'critic_loss': 5711.799138592847, 'actor_loss': 55.765248458509504, 'time_step': 0.04735122388497943, 'td_error': 4.787284487559549, 'init_value': -56.7861442565918, 'ave_value': -56.77823256686605} step=17300
2022-04-22 05:59.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422054546/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.519100

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 05:59.51 [info     ] FQE_20220422055950: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00015966432640351444, 'time_algorithm_update': 0.005182622426963714, 'loss': 0.0072147072971719935, 'time_step': 0.00541602416210864, 'init_value': -0.16413842141628265, 'ave_value': -0.14051888752309186, 'soft_opc': nan} step=166




2022-04-22 05:59.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:59.52 [info     ] FQE_20220422055950: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.0001619666455739952, 'time_algorithm_update': 0.005256165941077542, 'loss': 0.004639462225632854, 'time_step': 0.0054942843425704775, 'init_value': -0.22601178288459778, 'ave_value': -0.1801091153464104, 'soft_opc': nan} step=332




2022-04-22 05:59.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:59.53 [info     ] FQE_20220422055950: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00015995875898613986, 'time_algorithm_update': 0.005023673356297505, 'loss': 0.003834093347225473, 'time_step': 0.0052531153322702435, 'init_value': -0.23104916512966156, 'ave_value': -0.17643392213058096, 'soft_opc': nan} step=498




2022-04-22 05:59.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:59.54 [info     ] FQE_20220422055950: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00015795087239828454, 'time_algorithm_update': 0.004379169050469456, 'loss': 0.003516935972014375, 'time_step': 0.004600142858114587, 'init_value': -0.2566562294960022, 'ave_value': -0.20038327717700521, 'soft_opc': nan} step=664




2022-04-22 05:59.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:59.55 [info     ] FQE_20220422055950: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00016162481652684958, 'time_algorithm_update': 0.005286063056394279, 'loss': 0.0031625132689375923, 'time_step': 0.00552088668547481, 'init_value': -0.2536240518093109, 'ave_value': -0.20510218994071086, 'soft_opc': nan} step=830




2022-04-22 05:59.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:59.56 [info     ] FQE_20220422055950: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00015861155038856598, 'time_algorithm_update': 0.005250518580517137, 'loss': 0.0027559928102306574, 'time_step': 0.005482280110738364, 'init_value': -0.27357929944992065, 'ave_value': -0.2198393335205142, 'soft_opc': nan} step=996




2022-04-22 05:59.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:59.57 [info     ] FQE_20220422055950: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00015939718269440066, 'time_algorithm_update': 0.0051877283188233895, 'loss': 0.002527633826711204, 'time_step': 0.00541926579303052, 'init_value': -0.2692147493362427, 'ave_value': -0.2290837518426145, 'soft_opc': nan} step=1162




2022-04-22 05:59.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:59.58 [info     ] FQE_20220422055950: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001661691320947854, 'time_algorithm_update': 0.005368193948125264, 'loss': 0.0022787835234383412, 'time_step': 0.005607639450624764, 'init_value': -0.27497830986976624, 'ave_value': -0.25073110626439926, 'soft_opc': nan} step=1328




2022-04-22 05:59.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 05:59.59 [info     ] FQE_20220422055950: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00015730742948601045, 'time_algorithm_update': 0.005128376455192107, 'loss': 0.0020307817570696845, 'time_step': 0.00535973152482366, 'init_value': -0.2838228940963745, 'ave_value': -0.27065811721096233, 'soft_opc': nan} step=1494




2022-04-22 05:59.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.00 [info     ] FQE_20220422055950: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00015973613922854504, 'time_algorithm_update': 0.005174405603523714, 'loss': 0.001989729989653857, 'time_step': 0.0054069513297942745, 'init_value': -0.31110823154449463, 'ave_value': -0.3069564522697112, 'soft_opc': nan} step=1660




2022-04-22 06:00.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.01 [info     ] FQE_20220422055950: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.0001580557191228292, 'time_algorithm_update': 0.005215967994138419, 'loss': 0.0020259428337701677, 'time_step': 0.005449023591466697, 'init_value': -0.3167228400707245, 'ave_value': -0.3255250186606956, 'soft_opc': nan} step=1826




2022-04-22 06:00.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.02 [info     ] FQE_20220422055950: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00015796379870679006, 'time_algorithm_update': 0.004992308386837144, 'loss': 0.0020095976043270946, 'time_step': 0.005220451986933329, 'init_value': -0.33950918912887573, 'ave_value': -0.3563281851862599, 'soft_opc': nan} step=1992




2022-04-22 06:00.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.03 [info     ] FQE_20220422055950: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00016754937459187335, 'time_algorithm_update': 0.005122833941356245, 'loss': 0.002276575229627204, 'time_step': 0.00536630383457046, 'init_value': -0.3128161132335663, 'ave_value': -0.34359471589717006, 'soft_opc': nan} step=2158




2022-04-22 06:00.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.04 [info     ] FQE_20220422055950: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.0001561699143375259, 'time_algorithm_update': 0.004592845238834979, 'loss': 0.0024018368295587443, 'time_step': 0.004819566944995558, 'init_value': -0.330593466758728, 'ave_value': -0.372773979475663, 'soft_opc': nan} step=2324




2022-04-22 06:00.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.05 [info     ] FQE_20220422055950: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00016005067940217903, 'time_algorithm_update': 0.005031887307224503, 'loss': 0.0024473620757477425, 'time_step': 0.005266702318766031, 'init_value': -0.34103894233703613, 'ave_value': -0.38817638805438137, 'soft_opc': nan} step=2490




2022-04-22 06:00.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.06 [info     ] FQE_20220422055950: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.0001596341650170016, 'time_algorithm_update': 0.005106447690940765, 'loss': 0.0028281727478227927, 'time_step': 0.005343727318637343, 'init_value': -0.3538050651550293, 'ave_value': -0.40345322800179323, 'soft_opc': nan} step=2656




2022-04-22 06:00.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.07 [info     ] FQE_20220422055950: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001637016434267343, 'time_algorithm_update': 0.0051582089389663145, 'loss': 0.0030081394133410497, 'time_step': 0.0053960817405976445, 'init_value': -0.39503076672554016, 'ave_value': -0.4536814051797731, 'soft_opc': nan} step=2822




2022-04-22 06:00.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.08 [info     ] FQE_20220422055950: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00016175551586840526, 'time_algorithm_update': 0.004986333559794599, 'loss': 0.003322745847806099, 'time_step': 0.0052194925675909205, 'init_value': -0.4154547154903412, 'ave_value': -0.4796743435600588, 'soft_opc': nan} step=2988




2022-04-22 06:00.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.09 [info     ] FQE_20220422055950: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00015951782824045205, 'time_algorithm_update': 0.005146869693893984, 'loss': 0.0037621565230460054, 'time_step': 0.005383541785090803, 'init_value': -0.47876349091529846, 'ave_value': -0.5429467247784356, 'soft_opc': nan} step=3154




2022-04-22 06:00.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.10 [info     ] FQE_20220422055950: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00015963560127350222, 'time_algorithm_update': 0.005025757364479892, 'loss': 0.0039024235563582727, 'time_step': 0.0052562406264155745, 'init_value': -0.43704235553741455, 'ave_value': -0.5080891844671773, 'soft_opc': nan} step=3320




2022-04-22 06:00.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.11 [info     ] FQE_20220422055950: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.0001595508621399661, 'time_algorithm_update': 0.0052249776311667565, 'loss': 0.004371661465637475, 'time_step': 0.005457237542393696, 'init_value': -0.5426517724990845, 'ave_value': -0.6084601365403179, 'soft_opc': nan} step=3486




2022-04-22 06:00.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.12 [info     ] FQE_20220422055950: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.000164590686200613, 'time_algorithm_update': 0.005148400743323636, 'loss': 0.004474533847336803, 'time_step': 0.005389081426413663, 'init_value': -0.5729963779449463, 'ave_value': -0.6356467122895861, 'soft_opc': nan} step=3652




2022-04-22 06:00.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.13 [info     ] FQE_20220422055950: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00015679037714579018, 'time_algorithm_update': 0.004835477794509336, 'loss': 0.0048019027138263524, 'time_step': 0.0050666159894092976, 'init_value': -0.6202000379562378, 'ave_value': -0.6941470360964419, 'soft_opc': nan} step=3818




2022-04-22 06:00.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.14 [info     ] FQE_20220422055950: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00016490235386124575, 'time_algorithm_update': 0.00516033316233072, 'loss': 0.005386029082947938, 'time_step': 0.005401035389268255, 'init_value': -0.6780728101730347, 'ave_value': -0.7454923516928076, 'soft_opc': nan} step=3984




2022-04-22 06:00.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.15 [info     ] FQE_20220422055950: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00015953075454895756, 'time_algorithm_update': 0.0051945720810488046, 'loss': 0.005431473207995239, 'time_step': 0.005428121750613293, 'init_value': -0.7336127161979675, 'ave_value': -0.7861301909911924, 'soft_opc': nan} step=4150




2022-04-22 06:00.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.16 [info     ] FQE_20220422055950: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00015881837132465407, 'time_algorithm_update': 0.005071655813469945, 'loss': 0.005893565609866013, 'time_step': 0.005303815186741841, 'init_value': -0.7837277054786682, 'ave_value': -0.8244541911177572, 'soft_opc': nan} step=4316




2022-04-22 06:00.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.17 [info     ] FQE_20220422055950: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.0001597045415855316, 'time_algorithm_update': 0.005220291126205261, 'loss': 0.006191719015877896, 'time_step': 0.0054549869284572375, 'init_value': -0.8147715330123901, 'ave_value': -0.8494500517694129, 'soft_opc': nan} step=4482




2022-04-22 06:00.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.18 [info     ] FQE_20220422055950: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001604987914303699, 'time_algorithm_update': 0.005136678017765643, 'loss': 0.006547611861430804, 'time_step': 0.005365957696753812, 'init_value': -0.8252649307250977, 'ave_value': -0.8594299344664759, 'soft_opc': nan} step=4648




2022-04-22 06:00.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.19 [info     ] FQE_20220422055950: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.0001607156661619623, 'time_algorithm_update': 0.0050667150911078394, 'loss': 0.0070232290803336995, 'time_step': 0.005299625626529555, 'init_value': -0.9138078093528748, 'ave_value': -0.9152927045613897, 'soft_opc': nan} step=4814




2022-04-22 06:00.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.19 [info     ] FQE_20220422055950: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00016196090054799276, 'time_algorithm_update': 0.005200215132839708, 'loss': 0.007446677901596953, 'time_step': 0.005436108773013195, 'init_value': -0.9184558391571045, 'ave_value': -0.9392512154136156, 'soft_opc': nan} step=4980




2022-04-22 06:00.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.21 [info     ] FQE_20220422055950: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.0001631932086255177, 'time_algorithm_update': 0.005302272647260183, 'loss': 0.007958800983909765, 'time_step': 0.005541891936796257, 'init_value': -1.0322699546813965, 'ave_value': -1.0136564936211987, 'soft_opc': nan} step=5146




2022-04-22 06:00.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.21 [info     ] FQE_20220422055950: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00016267472002879683, 'time_algorithm_update': 0.005108918052121818, 'loss': 0.0085414727427085, 'time_step': 0.005346621375486076, 'init_value': -1.08209228515625, 'ave_value': -1.05309256114955, 'soft_opc': nan} step=5312




2022-04-22 06:00.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.22 [info     ] FQE_20220422055950: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00016185174505394627, 'time_algorithm_update': 0.004569088120058358, 'loss': 0.009128662638004353, 'time_step': 0.004805065063108881, 'init_value': -1.1148024797439575, 'ave_value': -1.071391767323403, 'soft_opc': nan} step=5478




2022-04-22 06:00.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.23 [info     ] FQE_20220422055950: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00015910562262477646, 'time_algorithm_update': 0.005136362041335508, 'loss': 0.009530585954885885, 'time_step': 0.005373880087611187, 'init_value': -1.2074344158172607, 'ave_value': -1.1443638466726485, 'soft_opc': nan} step=5644




2022-04-22 06:00.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.24 [info     ] FQE_20220422055950: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.0001604370324008436, 'time_algorithm_update': 0.00518409171736384, 'loss': 0.010376894847961166, 'time_step': 0.005414651100894055, 'init_value': -1.2511087656021118, 'ave_value': -1.1607530693708172, 'soft_opc': nan} step=5810




2022-04-22 06:00.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.25 [info     ] FQE_20220422055950: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00017088723469929523, 'time_algorithm_update': 0.00523261420698051, 'loss': 0.010781664577669027, 'time_step': 0.005478959485708949, 'init_value': -1.28757905960083, 'ave_value': -1.1726326608722022, 'soft_opc': nan} step=5976




2022-04-22 06:00.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.26 [info     ] FQE_20220422055950: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.0001621734665100833, 'time_algorithm_update': 0.005169311201716044, 'loss': 0.011466294599988734, 'time_step': 0.0054035933620958446, 'init_value': -1.375115156173706, 'ave_value': -1.2484642343614503, 'soft_opc': nan} step=6142




2022-04-22 06:00.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.27 [info     ] FQE_20220422055950: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.0001669490193746176, 'time_algorithm_update': 0.005139656813747911, 'loss': 0.012034836889339992, 'time_step': 0.005379503031811082, 'init_value': -1.4244712591171265, 'ave_value': -1.2990560095505537, 'soft_opc': nan} step=6308




2022-04-22 06:00.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.28 [info     ] FQE_20220422055950: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00015915732785879848, 'time_algorithm_update': 0.005131991512804146, 'loss': 0.011593857372645289, 'time_step': 0.005361219486558294, 'init_value': -1.4604054689407349, 'ave_value': -1.3277505678774975, 'soft_opc': nan} step=6474




2022-04-22 06:00.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.29 [info     ] FQE_20220422055950: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00016036665583231362, 'time_algorithm_update': 0.0050626117062855915, 'loss': 0.012634120493389508, 'time_step': 0.0052961699933890835, 'init_value': -1.4564051628112793, 'ave_value': -1.2940333493351768, 'soft_opc': nan} step=6640




2022-04-22 06:00.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.30 [info     ] FQE_20220422055950: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00015749701534409122, 'time_algorithm_update': 0.005056679966938065, 'loss': 0.013090394993792344, 'time_step': 0.005285265934036438, 'init_value': -1.5019428730010986, 'ave_value': -1.3241244731699577, 'soft_opc': nan} step=6806




2022-04-22 06:00.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.31 [info     ] FQE_20220422055950: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.000164276146026979, 'time_algorithm_update': 0.0050978445145021005, 'loss': 0.013405304578138539, 'time_step': 0.005334161850343268, 'init_value': -1.5014762878417969, 'ave_value': -1.2959701338808078, 'soft_opc': nan} step=6972




2022-04-22 06:00.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.32 [info     ] FQE_20220422055950: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00015787762331675333, 'time_algorithm_update': 0.004648890840001853, 'loss': 0.014208809803632727, 'time_step': 0.004875741809247488, 'init_value': -1.5243852138519287, 'ave_value': -1.3108131067288267, 'soft_opc': nan} step=7138




2022-04-22 06:00.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.33 [info     ] FQE_20220422055950: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.0001618718526449548, 'time_algorithm_update': 0.00526928183544113, 'loss': 0.01421094303095085, 'time_step': 0.005504701510969415, 'init_value': -1.5776562690734863, 'ave_value': -1.368416945564116, 'soft_opc': nan} step=7304




2022-04-22 06:00.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.34 [info     ] FQE_20220422055950: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00015841621950448277, 'time_algorithm_update': 0.005045247365193194, 'loss': 0.015008205790577614, 'time_step': 0.005274147872465202, 'init_value': -1.5847350358963013, 'ave_value': -1.3580796551810963, 'soft_opc': nan} step=7470




2022-04-22 06:00.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.35 [info     ] FQE_20220422055950: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00015577350754335703, 'time_algorithm_update': 0.004999259868300104, 'loss': 0.01561549623007325, 'time_step': 0.005226162542779762, 'init_value': -1.594977855682373, 'ave_value': -1.3569489029085113, 'soft_opc': nan} step=7636




2022-04-22 06:00.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.36 [info     ] FQE_20220422055950: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.0001579092209597668, 'time_algorithm_update': 0.00502333583601986, 'loss': 0.01630997935559668, 'time_step': 0.00525306219077972, 'init_value': -1.7350449562072754, 'ave_value': -1.4975256083088482, 'soft_opc': nan} step=7802




2022-04-22 06:00.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.37 [info     ] FQE_20220422055950: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00016363844814070737, 'time_algorithm_update': 0.005199620522648455, 'loss': 0.017002239928185288, 'time_step': 0.005435050251972245, 'init_value': -1.7231428623199463, 'ave_value': -1.4750665703536692, 'soft_opc': nan} step=7968




2022-04-22 06:00.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.38 [info     ] FQE_20220422055950: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00016062805451542498, 'time_algorithm_update': 0.005033783165805311, 'loss': 0.017228951599134738, 'time_step': 0.005265016153634313, 'init_value': -1.8112807273864746, 'ave_value': -1.5407537675546565, 'soft_opc': nan} step=8134




2022-04-22 06:00.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:00.39 [info     ] FQE_20220422055950: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.0001617483345859022, 'time_algorithm_update': 0.005141354468931635, 'loss': 0.0177170156086767, 'time_step': 0.005375154047127229, 'init_value': -1.906883955001831, 'ave_value': -1.6212635655803522, 'soft_opc': nan} step=8300




2022-04-22 06:00.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422055950/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-22 06:00.39 [info     ] Directory is created at d3rlpy_logs/FQE_20220422060039
2022-04-22 06:00.39 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 06:00.39 [debug    ] Building models...
2022-04-22 06:00.39 [debug    ] Models have been built.
2022-04-22 06:00.39 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422060039/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 06:00.41 [info     ] FQE_20220422060039: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.000162840582603632, 'time_algorithm_update': 0.0048895221810008205, 'loss': 0.025430102488784075, 'time_step': 0.005127316990564036, 'init_value': -0.615493893623352, 'ave_value': -0.6062127256296105, 'soft_opc': nan} step=344




2022-04-22 06:00.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:00.43 [info     ] FQE_20220422060039: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.0001668666684350302, 'time_algorithm_update': 0.005283901164698047, 'loss': 0.02206722917684982, 'time_step': 0.005528756352358086, 'init_value': -1.3504618406295776, 'ave_value': -1.3150517958375785, 'soft_opc': nan} step=688




2022-04-22 06:00.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:00.45 [info     ] FQE_20220422060039: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.0001720314802125443, 'time_algorithm_update': 0.005089250414870506, 'loss': 0.024604281862699535, 'time_step': 0.0053340314432632095, 'init_value': -2.2585463523864746, 'ave_value': -2.2112690451647246, 'soft_opc': nan} step=1032




2022-04-22 06:00.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:00.47 [info     ] FQE_20220422060039: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.000165251798407976, 'time_algorithm_update': 0.005185019138247468, 'loss': 0.02661397940814928, 'time_step': 0.005423559698947641, 'init_value': -2.778059720993042, 'ave_value': -2.7353425010870973, 'soft_opc': nan} step=1376




2022-04-22 06:00.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:00.49 [info     ] FQE_20220422060039: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00017397694809492245, 'time_algorithm_update': 0.0052325711693874625, 'loss': 0.0333777770324242, 'time_step': 0.0054814572944197545, 'init_value': -3.5647549629211426, 'ave_value': -3.538340114845752, 'soft_opc': nan} step=1720




2022-04-22 06:00.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:00.51 [info     ] FQE_20220422060039: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00016534605691599291, 'time_algorithm_update': 0.004818963450054789, 'loss': 0.04015965139774909, 'time_step': 0.005061293757239053, 'init_value': -4.071572780609131, 'ave_value': -4.103643716026836, 'soft_opc': nan} step=2064




2022-04-22 06:00.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:00.53 [info     ] FQE_20220422060039: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00016558586164962415, 'time_algorithm_update': 0.005244600911473119, 'loss': 0.04740293741193708, 'time_step': 0.005484952483066293, 'init_value': -4.699140548706055, 'ave_value': -4.838191178100402, 'soft_opc': nan} step=2408




2022-04-22 06:00.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:00.55 [info     ] FQE_20220422060039: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00016606616419415142, 'time_algorithm_update': 0.005169084598851758, 'loss': 0.056639019594809346, 'time_step': 0.0054097965706226435, 'init_value': -5.081761360168457, 'ave_value': -5.35056591607496, 'soft_opc': nan} step=2752




2022-04-22 06:00.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:00.57 [info     ] FQE_20220422060039: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00016760756803113361, 'time_algorithm_update': 0.005277707826259525, 'loss': 0.06513241106416857, 'time_step': 0.005521790925846543, 'init_value': -5.299174785614014, 'ave_value': -5.706096580335116, 'soft_opc': nan} step=3096




2022-04-22 06:00.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:00.59 [info     ] FQE_20220422060039: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00017364773639412813, 'time_algorithm_update': 0.005101610754811486, 'loss': 0.0768805333862601, 'time_step': 0.0053497116233027255, 'init_value': -5.802532196044922, 'ave_value': -6.410898328481896, 'soft_opc': nan} step=3440




2022-04-22 06:00.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:01.01 [info     ] FQE_20220422060039: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016626992891001147, 'time_algorithm_update': 0.00491267719934153, 'loss': 0.08715335874233482, 'time_step': 0.005154274230779603, 'init_value': -5.949902534484863, 'ave_value': -6.823384586146924, 'soft_opc': nan} step=3784




2022-04-22 06:01.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:01.03 [info     ] FQE_20220422060039: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00016952669897744822, 'time_algorithm_update': 0.005414183749709018, 'loss': 0.09947745610829876, 'time_step': 0.005660333605699761, 'init_value': -6.276576042175293, 'ave_value': -7.4298047485367, 'soft_opc': nan} step=4128




2022-04-22 06:01.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:01.05 [info     ] FQE_20220422060039: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016699003618816997, 'time_algorithm_update': 0.005255159943602806, 'loss': 0.10963599131507583, 'time_step': 0.005496075680089551, 'init_value': -6.242849349975586, 'ave_value': -7.658662641529615, 'soft_opc': nan} step=4472




2022-04-22 06:01.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:01.07 [info     ] FQE_20220422060039: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00017204950022142986, 'time_algorithm_update': 0.005429869474366654, 'loss': 0.12520208615852996, 'time_step': 0.005681062853613565, 'init_value': -6.4436750411987305, 'ave_value': -8.27218897897532, 'soft_opc': nan} step=4816




2022-04-22 06:01.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:01.09 [info     ] FQE_20220422060039: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.0001704138378764308, 'time_algorithm_update': 0.004995411218598832, 'loss': 0.13508955006715084, 'time_step': 0.005243872487267783, 'init_value': -6.615421295166016, 'ave_value': -8.757317653422554, 'soft_opc': nan} step=5160




2022-04-22 06:01.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:01.11 [info     ] FQE_20220422060039: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00017218950182892556, 'time_algorithm_update': 0.005286848822305369, 'loss': 0.15163343898453857, 'time_step': 0.00553771645523781, 'init_value': -7.034084796905518, 'ave_value': -9.512256977790212, 'soft_opc': nan} step=5504




2022-04-22 06:01.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:01.13 [info     ] FQE_20220422060039: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00017326585082120673, 'time_algorithm_update': 0.0052089407000430795, 'loss': 0.16944239056820787, 'time_step': 0.005456793446873509, 'init_value': -7.13626766204834, 'ave_value': -10.022527398979543, 'soft_opc': nan} step=5848




2022-04-22 06:01.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:01.15 [info     ] FQE_20220422060039: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00017304822456005008, 'time_algorithm_update': 0.005303907533024632, 'loss': 0.18017307070603725, 'time_step': 0.005557500345762386, 'init_value': -7.1585235595703125, 'ave_value': -10.276300592118085, 'soft_opc': nan} step=6192




2022-04-22 06:01.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:01.18 [info     ] FQE_20220422060039: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00017188038936881132, 'time_algorithm_update': 0.005314926075380902, 'loss': 0.19741341700712436, 'time_step': 0.005562864763792171, 'init_value': -7.300550937652588, 'ave_value': -10.85040797289872, 'soft_opc': nan} step=6536




2022-04-22 06:01.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:01.19 [info     ] FQE_20220422060039: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00017008601233016614, 'time_algorithm_update': 0.005064796569735505, 'loss': 0.2113835897654035, 'time_step': 0.005313709031703861, 'init_value': -7.532116889953613, 'ave_value': -11.322552850331506, 'soft_opc': nan} step=6880




2022-04-22 06:01.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:01.22 [info     ] FQE_20220422060039: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00016872966012289358, 'time_algorithm_update': 0.0053719282150268555, 'loss': 0.2396537127287322, 'time_step': 0.005620409582936486, 'init_value': -7.760714530944824, 'ave_value': -11.877240795887134, 'soft_opc': nan} step=7224




2022-04-22 06:01.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:01.24 [info     ] FQE_20220422060039: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00017434566519981207, 'time_algorithm_update': 0.005381193964980369, 'loss': 0.2518623105282787, 'time_step': 0.005635373121084169, 'init_value': -7.925849437713623, 'ave_value': -12.281561006336297, 'soft_opc': nan} step=7568




2022-04-22 06:01.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:01.26 [info     ] FQE_20220422060039: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.0001706328502921171, 'time_algorithm_update': 0.005432094252386758, 'loss': 0.2778761235964593, 'time_step': 0.0056829840637916745, 'init_value': -8.313692092895508, 'ave_value': -13.036281690321157, 'soft_opc': nan} step=7912




2022-04-22 06:01.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:01.28 [info     ] FQE_20220422060039: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00017375447029291199, 'time_algorithm_update': 0.0052880942821502686, 'loss': 0.29812079740060105, 'time_step': 0.00553914419440336, 'init_value': -8.511953353881836, 'ave_value': -13.449507436240596, 'soft_opc': nan} step=8256




2022-04-22 06:01.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:01.30 [info     ] FQE_20220422060039: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.0001710979051368181, 'time_algorithm_update': 0.005043535731559576, 'loss': 0.3229541225537528, 'time_step': 0.005290574112603831, 'init_value': -8.74036693572998, 'ave_value': -13.98197199418671, 'soft_opc': nan} step=8600




2022-04-22 06:01.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:01.32 [info     ] FQE_20220422060039: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.0001710999843686126, 'time_algorithm_update': 0.005350829556930897, 'loss': 0.35179567211957347, 'time_step': 0.005601298670436061, 'init_value': -9.082008361816406, 'ave_value': -14.616495604953217, 'soft_opc': nan} step=8944




2022-04-22 06:01.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:01.34 [info     ] FQE_20220422060039: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.000172163164892862, 'time_algorithm_update': 0.005257932252662126, 'loss': 0.3577263240355912, 'time_step': 0.005507794923560564, 'init_value': -9.447315216064453, 'ave_value': -15.132979368991931, 'soft_opc': nan} step=9288




2022-04-22 06:01.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:01.36 [info     ] FQE_20220422060039: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00017517181329948958, 'time_algorithm_update': 0.005465445823447649, 'loss': 0.36883831427562547, 'time_step': 0.005720506573832313, 'init_value': -9.749385833740234, 'ave_value': -15.768645989633928, 'soft_opc': nan} step=9632




2022-04-22 06:01.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:01.38 [info     ] FQE_20220422060039: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00017383694648742676, 'time_algorithm_update': 0.005307870548824931, 'loss': 0.3822842603928388, 'time_step': 0.005561102268307708, 'init_value': -9.981844902038574, 'ave_value': -16.376045496244956, 'soft_opc': nan} step=9976




2022-04-22 06:01.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:01.40 [info     ] FQE_20220422060039: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00017354724019072776, 'time_algorithm_update': 0.005034374636273051, 'loss': 0.3981702607666511, 'time_step': 0.00528470236201619, 'init_value': -10.326847076416016, 'ave_value': -16.92689438118591, 'soft_opc': nan} step=10320




2022-04-22 06:01.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:01.42 [info     ] FQE_20220422060039: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00017249584197998047, 'time_algorithm_update': 0.005185209734495296, 'loss': 0.42103175516781766, 'time_step': 0.005434969136881274, 'init_value': -10.736586570739746, 'ave_value': -17.635542199458627, 'soft_opc': nan} step=10664




2022-04-22 06:01.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:01.44 [info     ] FQE_20220422060039: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00017270515131395916, 'time_algorithm_update': 0.005273807880490325, 'loss': 0.43975046502296317, 'time_step': 0.005525909190954164, 'init_value': -11.10558032989502, 'ave_value': -18.178125906581275, 'soft_opc': nan} step=11008




2022-04-22 06:01.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:01.46 [info     ] FQE_20220422060039: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.000174758046172386, 'time_algorithm_update': 0.005287542592647464, 'loss': 0.4663974885601377, 'time_step': 0.005539210729820784, 'init_value': -11.452634811401367, 'ave_value': -18.829793548409466, 'soft_opc': nan} step=11352




2022-04-22 06:01.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:01.48 [info     ] FQE_20220422060039: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.0001728070336718892, 'time_algorithm_update': 0.005127742540004642, 'loss': 0.4805493648380561, 'time_step': 0.005377277385356815, 'init_value': -11.814240455627441, 'ave_value': -19.400895420065872, 'soft_opc': nan} step=11696




2022-04-22 06:01.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:01.50 [info     ] FQE_20220422060039: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00016913441724555436, 'time_algorithm_update': 0.005231544028880985, 'loss': 0.5002649900785019, 'time_step': 0.005478854789290317, 'init_value': -12.075105667114258, 'ave_value': -19.742402683856252, 'soft_opc': nan} step=12040




2022-04-22 06:01.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:01.52 [info     ] FQE_20220422060039: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.0001744108144627061, 'time_algorithm_update': 0.005331945973773336, 'loss': 0.5036765247919098, 'time_step': 0.005586152159890463, 'init_value': -12.523499488830566, 'ave_value': -20.184308183783884, 'soft_opc': nan} step=12384




2022-04-22 06:01.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:01.54 [info     ] FQE_20220422060039: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.0001736650633257489, 'time_algorithm_update': 0.005252673182376596, 'loss': 0.5123785967115573, 'time_step': 0.005505021228346714, 'init_value': -12.832809448242188, 'ave_value': -20.53796704196715, 'soft_opc': nan} step=12728




2022-04-22 06:01.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:01.56 [info     ] FQE_20220422060039: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.000172383563463078, 'time_algorithm_update': 0.005270853292110355, 'loss': 0.5250463596176963, 'time_step': 0.005521589240362478, 'init_value': -13.523951530456543, 'ave_value': -21.31451106428563, 'soft_opc': nan} step=13072




2022-04-22 06:01.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:01.58 [info     ] FQE_20220422060039: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016861391621966694, 'time_algorithm_update': 0.004474389691685521, 'loss': 0.5444013168439702, 'time_step': 0.004718436058177505, 'init_value': -13.906211853027344, 'ave_value': -21.687311785178142, 'soft_opc': nan} step=13416




2022-04-22 06:01.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:02.00 [info     ] FQE_20220422060039: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00017114226208176723, 'time_algorithm_update': 0.003986147947089617, 'loss': 0.5714283583274241, 'time_step': 0.0042332341504651445, 'init_value': -14.600076675415039, 'ave_value': -22.341339177403363, 'soft_opc': nan} step=13760




2022-04-22 06:02.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:02.01 [info     ] FQE_20220422060039: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00016680359840393066, 'time_algorithm_update': 0.004022958666779274, 'loss': 0.5861445916220979, 'time_step': 0.004267751477485479, 'init_value': -14.994302749633789, 'ave_value': -22.75446570502208, 'soft_opc': nan} step=14104




2022-04-22 06:02.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:02.03 [info     ] FQE_20220422060039: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.0001732214938762576, 'time_algorithm_update': 0.003943714984627657, 'loss': 0.5995843379520053, 'time_step': 0.004196999377982561, 'init_value': -15.087260246276855, 'ave_value': -22.986189675519057, 'soft_opc': nan} step=14448




2022-04-22 06:02.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:02.05 [info     ] FQE_20220422060039: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00016505912292835324, 'time_algorithm_update': 0.0038855110490044883, 'loss': 0.6087716217044481, 'time_step': 0.004127416499825411, 'init_value': -15.455157279968262, 'ave_value': -23.360350610570865, 'soft_opc': nan} step=14792




2022-04-22 06:02.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:02.06 [info     ] FQE_20220422060039: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001664750797804012, 'time_algorithm_update': 0.003852562155834464, 'loss': 0.6192428633397402, 'time_step': 0.004093894431757373, 'init_value': -15.752471923828125, 'ave_value': -23.45706844611748, 'soft_opc': nan} step=15136




2022-04-22 06:02.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:02.08 [info     ] FQE_20220422060039: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00016274077947749647, 'time_algorithm_update': 0.0040160278941309725, 'loss': 0.6287645949808838, 'time_step': 0.004254393106283143, 'init_value': -15.913049697875977, 'ave_value': -23.691798657232578, 'soft_opc': nan} step=15480




2022-04-22 06:02.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:02.09 [info     ] FQE_20220422060039: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00016537932462470475, 'time_algorithm_update': 0.004031748965729115, 'loss': 0.6476876201530442, 'time_step': 0.004273163717846537, 'init_value': -16.17756462097168, 'ave_value': -23.524220130733543, 'soft_opc': nan} step=15824




2022-04-22 06:02.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:02.11 [info     ] FQE_20220422060039: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016710508701413176, 'time_algorithm_update': 0.0038680108480675275, 'loss': 0.6609365292554071, 'time_step': 0.004111057797143626, 'init_value': -16.584171295166016, 'ave_value': -24.046866447941678, 'soft_opc': nan} step=16168




2022-04-22 06:02.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:02.12 [info     ] FQE_20220422060039: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00017006383385769156, 'time_algorithm_update': 0.0038954830446908642, 'loss': 0.6761702815460604, 'time_step': 0.004138940295507741, 'init_value': -17.305362701416016, 'ave_value': -24.65249166630947, 'soft_opc': nan} step=16512




2022-04-22 06:02.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:02.14 [info     ] FQE_20220422060039: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.0001653204130571942, 'time_algorithm_update': 0.003887777411660483, 'loss': 0.7052570282438293, 'time_step': 0.004127649373786394, 'init_value': -17.280946731567383, 'ave_value': -24.482575638202935, 'soft_opc': nan} step=16856




2022-04-22 06:02.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:02.15 [info     ] FQE_20220422060039: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00016473060430482377, 'time_algorithm_update': 0.0038109816784082456, 'loss': 0.7192805240141921, 'time_step': 0.004049825113873149, 'init_value': -17.409252166748047, 'ave_value': -24.53963704710608, 'soft_opc': nan} step=17200




2022-04-22 06:02.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422060039/model_17200.pt
search iteration:  24
using hyper params:  [0.0011196038123331806, 0.0010615573292358771, 9.983434030932379e-05, 3]
2022-04-22 06:02.15 [debug    ] RoundIterator is selected.
2022-04-22 06:02.15 [info     ] Directory is created at d3rlpy_logs/CQL_20220422060215
2022-04-22 06:02.15 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 06:02.15 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 06:02.15 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422060215/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.0011196038123331806, 'actor_optim_factory': {'op

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:02.31 [info     ] CQL_20220422060215: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.00038852788120335925, 'time_algorithm_update': 0.04217032269935388, 'temp_loss': 4.577562750419441, 'temp': 0.981702555810785, 'alpha_loss': -17.555507637861837, 'alpha': 1.017800910969001, 'critic_loss': 86.2119822529699, 'actor_loss': -0.6755584080655106, 'time_step': 0.042658026507824146, 'td_error': 1.2392303713633521, 'init_value': -2.171520471572876, 'ave_value': -1.8356055969764087} step=346
2022-04-22 06:02.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:02.46 [info     ] CQL_20220422060215: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.0003676531631822531, 'time_algorithm_update': 0.042454050455479266, 'temp_loss': 4.7561056503670756, 'temp': 0.9469704292068591, 'alpha_loss': -18.34107942526051, 'alpha': 1.054459043320893, 'critic_loss': 67.92172032835856, 'actor_loss': 2.2274658783322816, 'time_step': 0.04292627359401284, 'td_error': 1.2461228183301492, 'init_value': -4.654655933380127, 'ave_value': -4.071456520253198} step=692
2022-04-22 06:02.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:03.02 [info     ] CQL_20220422060215: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.0003683484358594597, 'time_algorithm_update': 0.043546096437928304, 'temp_loss': 4.61724024976609, 'temp': 0.9147838107767822, 'alpha_loss': -19.016538432567796, 'alpha': 1.0928657978256313, 'critic_loss': 101.49364239907678, 'actor_loss': 5.037083170317501, 'time_step': 0.044014542778103335, 'td_error': 1.3023591047675305, 'init_value': -7.612480640411377, 'ave_value': -6.989320426859975} step=1038
2022-04-22 06:03.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:03.18 [info     ] CQL_20220422060215: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.0003742572200091588, 'time_algorithm_update': 0.04391570794100017, 'temp_loss': 4.466508564921472, 'temp': 0.8844544255320047, 'alpha_loss': -19.719514631811595, 'alpha': 1.1331213450845266, 'critic_loss': 150.8599800815472, 'actor_loss': 7.8529693804724365, 'time_step': 0.04439026151778381, 'td_error': 1.3490902204250343, 'init_value': -9.735682487487793, 'ave_value': -8.986617103262839} step=1384
2022-04-22 06:03.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:03.33 [info     ] CQL_20220422060215: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.00035167705116933483, 'time_algorithm_update': 0.04263815438816313, 'temp_loss': 4.321326038051892, 'temp': 0.855619063267129, 'alpha_loss': -20.450717804748887, 'alpha': 1.1752881097655765, 'critic_loss': 215.38027063270525, 'actor_loss': 10.218263016959835, 'time_step': 0.043087886937091806, 'td_error': 1.4214473134622836, 'init_value': -12.213171005249023, 'ave_value': -11.465989474743788} step=1730
2022-04-22 06:03.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:03.49 [info     ] CQL_20220422060215: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.00037422483367037914, 'time_algorithm_update': 0.043485312792607124, 'temp_loss': 4.18161341082843, 'temp': 0.8280670456803603, 'alpha_loss': -21.220157529577353, 'alpha': 1.2194102689020896, 'critic_loss': 292.6270005330874, 'actor_loss': 11.984684436996549, 'time_step': 0.043964319146437454, 'td_error': 1.44382954688816, 'init_value': -13.239518165588379, 'ave_value': -12.450625875132973} step=2076
2022-04-22 06:03.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:04.05 [info     ] CQL_20220422060215: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.00036906575880987796, 'time_algorithm_update': 0.04335422047300835, 'temp_loss': 4.048470045789818, 'temp': 0.8016530947189111, 'alpha_loss': -22.01979292610477, 'alpha': 1.265533293258248, 'critic_loss': 382.2465695066948, 'actor_loss': 13.083166734331606, 'time_step': 0.04382520810717103, 'td_error': 1.47026930755197, 'init_value': -14.0877046585083, 'ave_value': -13.312714090975838} step=2422
2022-04-22 06:04.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:04.21 [info     ] CQL_20220422060215: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.0003688252730176628, 'time_algorithm_update': 0.046170588862689245, 'temp_loss': 3.9203728568347205, 'temp': 0.776271737207567, 'alpha_loss': -22.854125993100205, 'alpha': 1.3136915247564371, 'critic_loss': 481.86058776502665, 'actor_loss': 13.502591596173414, 'time_step': 0.04664175841160592, 'td_error': 1.4670925108681823, 'init_value': -13.992230415344238, 'ave_value': -13.324854357336415} step=2768
2022-04-22 06:04.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:04.39 [info     ] CQL_20220422060215: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.0003827286593486808, 'time_algorithm_update': 0.047284011206874955, 'temp_loss': 3.7974202867188205, 'temp': 0.7518388451523863, 'alpha_loss': -23.725931829110735, 'alpha': 1.3639286646264137, 'critic_loss': 591.196745944161, 'actor_loss': 13.021901155482826, 'time_step': 0.04776804640113963, 'td_error': 1.4328089210851607, 'init_value': -13.102737426757812, 'ave_value': -12.544230161588189} step=3114
2022-04-22 06:04.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:04.56 [info     ] CQL_20220422060215: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.0003855421363962868, 'time_algorithm_update': 0.04754471089798591, 'temp_loss': 3.677693922395651, 'temp': 0.7282838316666598, 'alpha_loss': -24.631731292415907, 'alpha': 1.4162959142916465, 'critic_loss': 707.6792972983652, 'actor_loss': 11.586530125899122, 'time_step': 0.04803571949115378, 'td_error': 1.3878527852119065, 'init_value': -11.398300170898438, 'ave_value': -10.988735941067981} step=3460
2022-04-22 06:04.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:05.13 [info     ] CQL_20220422060215: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.00038126920689048104, 'time_algorithm_update': 0.04744050613028466, 'temp_loss': 3.5636309454206785, 'temp': 0.7055543478513728, 'alpha_loss': -25.581699068146634, 'alpha': 1.470842474802381, 'critic_loss': 834.0633553741984, 'actor_loss': 9.234926868725374, 'time_step': 0.04792684764531306, 'td_error': 1.3320765602083935, 'init_value': -8.75719928741455, 'ave_value': -8.490106049732553} step=3806
2022-04-22 06:05.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:05.30 [info     ] CQL_20220422060215: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.0003891370200008326, 'time_algorithm_update': 0.047206208885060566, 'temp_loss': 3.4512657361223518, 'temp': 0.683596876143031, 'alpha_loss': -26.567209205186437, 'alpha': 1.527637863434808, 'critic_loss': 970.5660213404308, 'actor_loss': 6.339522692509469, 'time_step': 0.04770037617986602, 'td_error': 1.2937733575654098, 'init_value': -6.095292091369629, 'ave_value': -5.956639585816052} step=4152
2022-04-22 06:05.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:05.47 [info     ] CQL_20220422060215: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.00039096512546429055, 'time_algorithm_update': 0.047913548574282254, 'temp_loss': 3.3450164629544825, 'temp': 0.6623738101452072, 'alpha_loss': -27.59623226540626, 'alpha': 1.5867404420940863, 'critic_loss': 1117.4716341757362, 'actor_loss': 4.022872718772447, 'time_step': 0.04840923733793931, 'td_error': 1.2786311435105375, 'init_value': -4.532949447631836, 'ave_value': -4.458199530950142} step=4498
2022-04-22 06:05.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:06.05 [info     ] CQL_20220422060215: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.00038779746590322155, 'time_algorithm_update': 0.04741155205434457, 'temp_loss': 3.2414993219981993, 'temp': 0.6418442753698096, 'alpha_loss': -28.661736593081084, 'alpha': 1.6482243648154198, 'critic_loss': 1267.2380240556133, 'actor_loss': 3.009516257771178, 'time_step': 0.04790291483002591, 'td_error': 1.2756415320135674, 'init_value': -3.987175464630127, 'ave_value': -3.940082339100953} step=4844
2022-04-22 06:06.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:06.21 [info     ] CQL_20220422060215: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.0003890750036074247, 'time_algorithm_update': 0.046643049730730884, 'temp_loss': 3.14079553061138, 'temp': 0.6219787032617999, 'alpha_loss': -29.77311484386466, 'alpha': 1.7121586103659834, 'critic_loss': 1417.7304814509573, 'actor_loss': 2.6535783675364675, 'time_step': 0.04712944361515817, 'td_error': 1.27493609842011, 'init_value': -3.7721645832061768, 'ave_value': -3.7370467443271744} step=5190
2022-04-22 06:06.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:06.39 [info     ] CQL_20220422060215: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.00036795635443891405, 'time_algorithm_update': 0.04724296117793618, 'temp_loss': 3.0439649558480766, 'temp': 0.6027511296244715, 'alpha_loss': -30.928828465456217, 'alpha': 1.7786297415722312, 'critic_loss': 1574.7259884872879, 'actor_loss': 2.5476738173148537, 'time_step': 0.0477082226317742, 'td_error': 1.2749117440678688, 'init_value': -3.661862850189209, 'ave_value': -3.632932433476093} step=5536
2022-04-22 06:06.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:06.55 [info     ] CQL_20220422060215: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.0003673809801222961, 'time_algorithm_update': 0.046107824138134204, 'temp_loss': 2.9500781642219236, 'temp': 0.5841298878537438, 'alpha_loss': -32.130176009470325, 'alpha': 1.8477289190871178, 'critic_loss': 1740.739844878974, 'actor_loss': 2.5270090103149414, 'time_step': 0.04657250057066107, 'td_error': 1.2756470562808753, 'init_value': -3.6491575241088867, 'ave_value': -3.625889064848508} step=5882
2022-04-22 06:06.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:07.12 [info     ] CQL_20220422060215: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.00036801699269024623, 'time_algorithm_update': 0.04565214077172252, 'temp_loss': 2.8589842664024046, 'temp': 0.566095659153999, 'alpha_loss': -33.37721757392663, 'alpha': 1.9195487785201542, 'critic_loss': 1916.431960971369, 'actor_loss': 2.5513231306406805, 'time_step': 0.04611457221080802, 'td_error': 1.2765635493040615, 'init_value': -3.663205146789551, 'ave_value': -3.643048862034023} step=6228
2022-04-22 06:07.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:07.28 [info     ] CQL_20220422060215: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.00037431234569218805, 'time_algorithm_update': 0.04571353217769909, 'temp_loss': 2.7705014578868887, 'temp': 0.5486276940458772, 'alpha_loss': -34.67835730624337, 'alpha': 1.9941906353641796, 'critic_loss': 2105.9826049804688, 'actor_loss': 2.589412817376197, 'time_step': 0.04618275923536003, 'td_error': 1.277671780559233, 'init_value': -3.7143218517303467, 'ave_value': -3.6969043097263152} step=6574
2022-04-22 06:07.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:07.46 [info     ] CQL_20220422060215: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.0003795485965089302, 'time_algorithm_update': 0.046994310582993346, 'temp_loss': 2.685282848473918, 'temp': 0.5317077516131319, 'alpha_loss': -36.02818383233396, 'alpha': 2.0717601500494633, 'critic_loss': 2306.3820645547326, 'actor_loss': 2.6719847249157858, 'time_step': 0.04747358360731533, 'td_error': 1.2784997139349592, 'init_value': -3.7239785194396973, 'ave_value': -3.7115595435463122} step=6920
2022-04-22 06:07.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:08.03 [info     ] CQL_20220422060215: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.00038508597136921967, 'time_algorithm_update': 0.04713030702116861, 'temp_loss': 2.6021439464106035, 'temp': 0.5153127770892457, 'alpha_loss': -37.42842936653622, 'alpha': 2.152360240159007, 'critic_loss': 2516.5307292607476, 'actor_loss': 2.7578160452704896, 'time_step': 0.04761408863729135, 'td_error': 1.279631246776106, 'init_value': -3.7775766849517822, 'ave_value': -3.7668312042004803} step=7266
2022-04-22 06:08.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:08.20 [info     ] CQL_20220422060215: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.0003745225123587371, 'time_algorithm_update': 0.04673950176018511, 'temp_loss': 2.5222411899897406, 'temp': 0.4994265323429438, 'alpha_loss': -38.879665121177716, 'alpha': 2.236107789712145, 'critic_loss': 2746.604583916637, 'actor_loss': 2.8835567752749935, 'time_step': 0.04721419659653151, 'td_error': 1.28155236728827, 'init_value': -3.9126999378204346, 'ave_value': -3.902079952469704} step=7612
2022-04-22 06:08.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:08.37 [info     ] CQL_20220422060215: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.000385833613445304, 'time_algorithm_update': 0.04726779805442501, 'temp_loss': 2.444693691468652, 'temp': 0.484032102489058, 'alpha_loss': -40.39583586543971, 'alpha': 2.3231160833656443, 'critic_loss': 2981.3991487536127, 'actor_loss': 3.003194997076354, 'time_step': 0.047752381060164786, 'td_error': 1.28216858194323, 'init_value': -3.9234955310821533, 'ave_value': -3.9151087046913084} step=7958
2022-04-22 06:08.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:08.54 [info     ] CQL_20220422060215: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.00037767845771216244, 'time_algorithm_update': 0.04730948685221589, 'temp_loss': 2.3693319097419696, 'temp': 0.4691141484617498, 'alpha_loss': -41.96984323049556, 'alpha': 2.4135243169145086, 'critic_loss': 3229.649227781792, 'actor_loss': 3.1147097369839, 'time_step': 0.047785202202769377, 'td_error': 1.2846902679416, 'init_value': -4.106130123138428, 'ave_value': -4.0986821040320365} step=8304
2022-04-22 06:08.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:09.11 [info     ] CQL_20220422060215: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.0003903139533335074, 'time_algorithm_update': 0.04750077710675366, 'temp_loss': 2.2963571934341696, 'temp': 0.45465700448937496, 'alpha_loss': -43.60537056013339, 'alpha': 2.507458815684897, 'critic_loss': 3484.4682793589686, 'actor_loss': 3.270056778984952, 'time_step': 0.04799076449664342, 'td_error': 1.2858824475786166, 'init_value': -4.160933017730713, 'ave_value': -4.155406538871523} step=8650
2022-04-22 06:09.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:09.28 [info     ] CQL_20220422060215: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.00037303136263279555, 'time_algorithm_update': 0.0473387744385383, 'temp_loss': 2.225662081916897, 'temp': 0.44064587681968775, 'alpha_loss': -45.30020040170306, 'alpha': 2.605057533076733, 'critic_loss': 3755.6232783146675, 'actor_loss': 3.4101136778131385, 'time_step': 0.04780785265685506, 'td_error': 1.288406989598335, 'init_value': -4.340737819671631, 'ave_value': -4.334882409180324} step=8996
2022-04-22 06:09.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:09.45 [info     ] CQL_20220422060215: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.00037853841836741893, 'time_algorithm_update': 0.04731384315931728, 'temp_loss': 2.1570052494203424, 'temp': 0.42706902523261275, 'alpha_loss': -47.060641735275354, 'alpha': 2.706447533789398, 'critic_loss': 4030.829417675217, 'actor_loss': 3.5690201100586467, 'time_step': 0.047792801967245994, 'td_error': 1.2909551326345505, 'init_value': -4.513098239898682, 'ave_value': -4.507343903962556} step=9342
2022-04-22 06:09.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:10.02 [info     ] CQL_20220422060215: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.00038667290196942456, 'time_algorithm_update': 0.047192081550642244, 'temp_loss': 2.0902603402992206, 'temp': 0.4139112998984453, 'alpha_loss': -48.89585008786593, 'alpha': 2.8118011586238882, 'critic_loss': 4329.63940711931, 'actor_loss': 3.749630932173977, 'time_step': 0.04768431875747063, 'td_error': 1.291776509975619, 'init_value': -4.545405387878418, 'ave_value': -4.541672967700045} step=9688
2022-04-22 06:10.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:10.19 [info     ] CQL_20220422060215: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.0003705479506123273, 'time_algorithm_update': 0.046779059950326905, 'temp_loss': 2.0263753023450772, 'temp': 0.401158623612685, 'alpha_loss': -50.79962871529463, 'alpha': 2.921261183788322, 'critic_loss': 4646.936708306991, 'actor_loss': 3.8990829638663054, 'time_step': 0.0472473381571687, 'td_error': 1.2954247608875948, 'init_value': -4.797491073608398, 'ave_value': -4.7931311965949615} step=10034
2022-04-22 06:10.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:10.37 [info     ] CQL_20220422060215: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.0003796092347602624, 'time_algorithm_update': 0.04800154156767564, 'temp_loss': 1.9637292354782192, 'temp': 0.38879750953244335, 'alpha_loss': -52.77315188281109, 'alpha': 3.034976589197368, 'critic_loss': 4972.756209356937, 'actor_loss': 4.085262205559395, 'time_step': 0.048477867435168666, 'td_error': 1.297429549485242, 'init_value': -4.917965888977051, 'ave_value': -4.914510213418728} step=10380
2022-04-22 06:10.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:10.54 [info     ] CQL_20220422060215: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.0003793019090773742, 'time_algorithm_update': 0.0474396151614327, 'temp_loss': 1.903180383877947, 'temp': 0.3768199596963177, 'alpha_loss': -54.82418049277598, 'alpha': 3.153115557108311, 'critic_loss': 5294.754237886109, 'actor_loss': 4.254067251447998, 'time_step': 0.047917904881383644, 'td_error': 1.298465121377295, 'init_value': -4.968360424041748, 'ave_value': -4.966069658766883} step=10726
2022-04-22 06:10.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:11.11 [info     ] CQL_20220422060215: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.0003810996954151661, 'time_algorithm_update': 0.04707025034579238, 'temp_loss': 1.8446607014347363, 'temp': 0.365210836677882, 'alpha_loss': -56.965577186187566, 'alpha': 3.2758569875893566, 'critic_loss': 5587.374940728866, 'actor_loss': 4.422091007232666, 'time_step': 0.047554218700166385, 'td_error': 1.3017976098159818, 'init_value': -5.183289527893066, 'ave_value': -5.180560936527605} step=11072
2022-04-22 06:11.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:11.28 [info     ] CQL_20220422060215: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.0003836223844847927, 'time_algorithm_update': 0.04693403478302707, 'temp_loss': 1.7875899458896218, 'temp': 0.3539593909689457, 'alpha_loss': -59.17140434794343, 'alpha': 3.403381379353518, 'critic_loss': 5956.819750835441, 'actor_loss': 4.612497304905356, 'time_step': 0.04741789495324813, 'td_error': 1.3039419373914967, 'init_value': -5.310039520263672, 'ave_value': -5.307801199524591} step=11418
2022-04-22 06:11.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:11.45 [info     ] CQL_20220422060215: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.0003715202298467559, 'time_algorithm_update': 0.04664499015477351, 'temp_loss': 1.7327204112372647, 'temp': 0.3430547724569464, 'alpha_loss': -61.48631967974536, 'alpha': 3.535857441797422, 'critic_loss': 6333.474907141889, 'actor_loss': 4.781561402227148, 'time_step': 0.04712154617199319, 'td_error': 1.3071172208926014, 'init_value': -5.505621910095215, 'ave_value': -5.5034372923893065} step=11764
2022-04-22 06:11.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:12.02 [info     ] CQL_20220422060215: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.0003796712511536703, 'time_algorithm_update': 0.04752187094936481, 'temp_loss': 1.6795069861274234, 'temp': 0.332484521703913, 'alpha_loss': -63.880446704136844, 'alpha': 3.673515182010011, 'critic_loss': 6709.726153246929, 'actor_loss': 4.949789554397495, 'time_step': 0.048006698575323026, 'td_error': 1.309263678423844, 'init_value': -5.62845516204834, 'ave_value': -5.626528070412749} step=12110
2022-04-22 06:12.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:12.19 [info     ] CQL_20220422060215: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.0003749524926863654, 'time_algorithm_update': 0.04670154772741946, 'temp_loss': 1.6276684892659932, 'temp': 0.32224135057774583, 'alpha_loss': -66.36415020716673, 'alpha': 3.816522729190099, 'critic_loss': 7055.402555432623, 'actor_loss': 5.1053323952448855, 'time_step': 0.04717736023698928, 'td_error': 1.311774082887154, 'init_value': -5.77326774597168, 'ave_value': -5.771793755630341} step=12456
2022-04-22 06:12.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:12.36 [info     ] CQL_20220422060215: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.00036601730853835974, 'time_algorithm_update': 0.04656219137886356, 'temp_loss': 1.5772889931766974, 'temp': 0.312313942378656, 'alpha_loss': -68.95152280509816, 'alpha': 3.9650937594430293, 'critic_loss': 7457.601640116962, 'actor_loss': 5.308591663492897, 'time_step': 0.04703370890865436, 'td_error': 1.3152925597048988, 'init_value': -5.978115081787109, 'ave_value': -5.976866124398678} step=12802
2022-04-22 06:12.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:12.53 [info     ] CQL_20220422060215: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.000374887720008806, 'time_algorithm_update': 0.047348893446729365, 'temp_loss': 1.5293426844425972, 'temp': 0.3026918262415539, 'alpha_loss': -71.63232166091831, 'alpha': 4.119447169276331, 'critic_loss': 7799.75250349982, 'actor_loss': 5.463162271962689, 'time_step': 0.04782346287214687, 'td_error': 1.318360597514657, 'init_value': -6.151528835296631, 'ave_value': -6.150490999470508} step=13148
2022-04-22 06:12.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:13.10 [info     ] CQL_20220422060215: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.00037061203421884876, 'time_algorithm_update': 0.04690585384479148, 'temp_loss': 1.482114221319298, 'temp': 0.29336503303119904, 'alpha_loss': -74.4196908322373, 'alpha': 4.279806240445617, 'critic_loss': 8200.330539593118, 'actor_loss': 5.646846195176847, 'time_step': 0.04737736172758775, 'td_error': 1.3202070278125382, 'init_value': -6.24868631362915, 'ave_value': -6.247699936151844} step=13494
2022-04-22 06:13.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:13.26 [info     ] CQL_20220422060215: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.00037384722274162865, 'time_algorithm_update': 0.04367956191818149, 'temp_loss': 1.4362848693924832, 'temp': 0.2843265891764205, 'alpha_loss': -77.31314929234499, 'alpha': 4.446400179339282, 'critic_loss': 8559.391782198338, 'actor_loss': 5.805833868897719, 'time_step': 0.04415373926217846, 'td_error': 1.3227999641284387, 'init_value': -6.389890193939209, 'ave_value': -6.389003638624297} step=13840
2022-04-22 06:13.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:13.42 [info     ] CQL_20220422060215: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.00038436727027672563, 'time_algorithm_update': 0.04420926598455176, 'temp_loss': 1.3920924611863372, 'temp': 0.2755671268425925, 'alpha_loss': -80.33331810394463, 'alpha': 4.619485033729862, 'critic_loss': 8912.884155979047, 'actor_loss': 5.957251929134303, 'time_step': 0.0446952078383782, 'td_error': 1.3249545444663735, 'init_value': -6.503598213195801, 'ave_value': -6.502964219825422} step=14186
2022-04-22 06:13.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:13.58 [info     ] CQL_20220422060215: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.0003772333178217011, 'time_algorithm_update': 0.043626733597992476, 'temp_loss': 1.3489060043599563, 'temp': 0.2670785667579298, 'alpha_loss': -83.44207470403241, 'alpha': 4.799303468252193, 'critic_loss': 9238.352620913114, 'actor_loss': 6.113470876837052, 'time_step': 0.04410307807040352, 'td_error': 1.3284237049437768, 'init_value': -6.689789772033691, 'ave_value': -6.689200863718365} step=14532
2022-04-22 06:13.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:14.14 [info     ] CQL_20220422060215: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.0003857964036092593, 'time_algorithm_update': 0.04383089018694927, 'temp_loss': 1.307467219457461, 'temp': 0.2588518885518774, 'alpha_loss': -86.69823916661257, 'alpha': 4.986106180731272, 'critic_loss': 9590.972326025108, 'actor_loss': 6.306815231466569, 'time_step': 0.04431900123640292, 'td_error': 1.332957446250306, 'init_value': -6.9303483963012695, 'ave_value': -6.9294395736180645} step=14878
2022-04-22 06:14.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:14.29 [info     ] CQL_20220422060215: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.00038361549377441406, 'time_algorithm_update': 0.04359040990730242, 'temp_loss': 1.2671958259764435, 'temp': 0.2508773652115309, 'alpha_loss': -90.07480197972644, 'alpha': 5.1801941932281315, 'critic_loss': 9935.200804958453, 'actor_loss': 6.453149554357363, 'time_step': 0.04407720758735789, 'td_error': 1.3351899541549646, 'init_value': -7.040250301361084, 'ave_value': -7.039658685632654} step=15224
2022-04-22 06:14.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:14.45 [info     ] CQL_20220422060215: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.00037140997848069735, 'time_algorithm_update': 0.04316614473486222, 'temp_loss': 1.2278589110153948, 'temp': 0.24314916491336216, 'alpha_loss': -93.57098130683679, 'alpha': 5.381807529857393, 'critic_loss': 9994.896475907695, 'actor_loss': 6.540439055834202, 'time_step': 0.04363868691328633, 'td_error': 1.3314652814865007, 'init_value': -6.829610824584961, 'ave_value': -6.830105008757923} step=15570
2022-04-22 06:14.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:15.01 [info     ] CQL_20220422060215: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.0003747891828503912, 'time_algorithm_update': 0.0433504326495132, 'temp_loss': 1.1901480386711958, 'temp': 0.23566074889002508, 'alpha_loss': -97.21923839150136, 'alpha': 5.591289064098645, 'critic_loss': 9056.47527688087, 'actor_loss': 6.43149831804926, 'time_step': 0.043827908576568424, 'td_error': 1.3305759414657978, 'init_value': -6.77431058883667, 'ave_value': -6.775592412175359} step=15916
2022-04-22 06:15.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:15.17 [info     ] CQL_20220422060215: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.0003705996309401672, 'time_algorithm_update': 0.04411387650263792, 'temp_loss': 1.153618357085079, 'temp': 0.2284004396269087, 'alpha_loss': -101.01189759976602, 'alpha': 5.80895140956592, 'critic_loss': 7894.404979904263, 'actor_loss': 6.343516963065704, 'time_step': 0.0445855738799696, 'td_error': 1.3295295853146827, 'init_value': -6.716421127319336, 'ave_value': -6.7181382857097045} step=16262
2022-04-22 06:15.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:15.33 [info     ] CQL_20220422060215: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.0003768557068929507, 'time_algorithm_update': 0.04567855837717222, 'temp_loss': 1.1182168181231946, 'temp': 0.22136453241971188, 'alpha_loss': -104.93154563242301, 'alpha': 6.035075150473269, 'critic_loss': 7164.9626323722005, 'actor_loss': 6.354511012920755, 'time_step': 0.04615728979165844, 'td_error': 1.3314977133956203, 'init_value': -6.829521656036377, 'ave_value': -6.830479353375207} step=16608
2022-04-22 06:15.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:15.50 [info     ] CQL_20220422060215: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.00038610579650526103, 'time_algorithm_update': 0.04691463536609804, 'temp_loss': 1.083861314147883, 'temp': 0.21454371300908182, 'alpha_loss': -109.03153264040203, 'alpha': 6.270014224024866, 'critic_loss': 6555.397659919166, 'actor_loss': 6.335887881372705, 'time_step': 0.047400935536864176, 'td_error': 1.3283481186383514, 'init_value': -6.653804302215576, 'ave_value': -6.6548877410382214} step=16954
2022-04-22 06:15.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:16.07 [info     ] CQL_20220422060215: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.00038172468284651035, 'time_algorithm_update': 0.0472458222008854, 'temp_loss': 1.0505200903539713, 'temp': 0.20793282994300644, 'alpha_loss': -113.27077684788345, 'alpha': 6.514098518845663, 'critic_loss': 6036.362111350704, 'actor_loss': 6.337704654373875, 'time_step': 0.047729418456898945, 'td_error': 1.3292933004287837, 'init_value': -6.710770606994629, 'ave_value': -6.711915051535782} step=17300
2022-04-22 06:16.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422060215/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.519

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 06:16.09 [info     ] FQE_20220422061608: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00016890232821544968, 'time_algorithm_update': 0.004634442099605699, 'loss': 0.006515465584213296, 'time_step': 0.0048815572118184655, 'init_value': -0.06944002211093903, 'ave_value': -0.01545924239284317, 'soft_opc': nan} step=166




2022-04-22 06:16.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.09 [info     ] FQE_20220422061608: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00020688413137412933, 'time_algorithm_update': 0.005192837083196065, 'loss': 0.004411100715690528, 'time_step': 0.005471345889999206, 'init_value': -0.15356552600860596, 'ave_value': -0.07914406902635017, 'soft_opc': nan} step=332




2022-04-22 06:16.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.10 [info     ] FQE_20220422061608: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00017432132399225808, 'time_algorithm_update': 0.005254255719931729, 'loss': 0.0037551694752996317, 'time_step': 0.0055042921778667405, 'init_value': -0.1754021793603897, 'ave_value': -0.09914405381323786, 'soft_opc': nan} step=498




2022-04-22 06:16.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.12 [info     ] FQE_20220422061608: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00017363479338496565, 'time_algorithm_update': 0.005380218287548387, 'loss': 0.003386180831052094, 'time_step': 0.0056295624698501035, 'init_value': -0.23227404057979584, 'ave_value': -0.1408019781899261, 'soft_opc': nan} step=664




2022-04-22 06:16.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.13 [info     ] FQE_20220422061608: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00017133678298398672, 'time_algorithm_update': 0.005165884293705584, 'loss': 0.0030778301942880346, 'time_step': 0.005408969270177634, 'init_value': -0.29225969314575195, 'ave_value': -0.18831731967841955, 'soft_opc': nan} step=830




2022-04-22 06:16.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.13 [info     ] FQE_20220422061608: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00016638457056987718, 'time_algorithm_update': 0.0051346988563078, 'loss': 0.002699818124108196, 'time_step': 0.005372393562133054, 'init_value': -0.30712875723838806, 'ave_value': -0.20499679310486427, 'soft_opc': nan} step=996




2022-04-22 06:16.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.14 [info     ] FQE_20220422061608: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00017152062381606503, 'time_algorithm_update': 0.005233480269650379, 'loss': 0.00247087498182846, 'time_step': 0.005477566316903356, 'init_value': -0.3373761773109436, 'ave_value': -0.2387483559632758, 'soft_opc': nan} step=1162




2022-04-22 06:16.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.15 [info     ] FQE_20220422061608: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001703127320990505, 'time_algorithm_update': 0.005246054695313235, 'loss': 0.0023452074180009314, 'time_step': 0.005489663905408009, 'init_value': -0.36880600452423096, 'ave_value': -0.2679769876052265, 'soft_opc': nan} step=1328




2022-04-22 06:16.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.16 [info     ] FQE_20220422061608: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00017152493258556687, 'time_algorithm_update': 0.005225899707840149, 'loss': 0.0020664505154197386, 'time_step': 0.005469915378524597, 'init_value': -0.38763824105262756, 'ave_value': -0.2848277788412094, 'soft_opc': nan} step=1494




2022-04-22 06:16.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.17 [info     ] FQE_20220422061608: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00017208076385130365, 'time_algorithm_update': 0.005318294088524508, 'loss': 0.0020015226699501634, 'time_step': 0.00556818979332246, 'init_value': -0.41394323110580444, 'ave_value': -0.3099030027082106, 'soft_opc': nan} step=1660




2022-04-22 06:16.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.18 [info     ] FQE_20220422061608: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.0001616391790918557, 'time_algorithm_update': 0.004867140069065324, 'loss': 0.001787387850450307, 'time_step': 0.005094269672072077, 'init_value': -0.4606494903564453, 'ave_value': -0.35372720803931224, 'soft_opc': nan} step=1826




2022-04-22 06:16.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.19 [info     ] FQE_20220422061608: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.0001687845551823995, 'time_algorithm_update': 0.005225227539797863, 'loss': 0.001754593320824879, 'time_step': 0.005467555609094091, 'init_value': -0.4940614104270935, 'ave_value': -0.37724024424169983, 'soft_opc': nan} step=1992




2022-04-22 06:16.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.20 [info     ] FQE_20220422061608: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00016982009611934065, 'time_algorithm_update': 0.005129015589334878, 'loss': 0.001787014547188441, 'time_step': 0.0053708524589078975, 'init_value': -0.5348643064498901, 'ave_value': -0.4124015681139353, 'soft_opc': nan} step=2158




2022-04-22 06:16.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.21 [info     ] FQE_20220422061608: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.0001735672893294369, 'time_algorithm_update': 0.00539872301630227, 'loss': 0.00180421432680787, 'time_step': 0.0056497433099402, 'init_value': -0.589421808719635, 'ave_value': -0.4674243419947153, 'soft_opc': nan} step=2324




2022-04-22 06:16.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.22 [info     ] FQE_20220422061608: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00017320104392178087, 'time_algorithm_update': 0.005272927054439683, 'loss': 0.0019683656717527165, 'time_step': 0.0055253591882177145, 'init_value': -0.632800817489624, 'ave_value': -0.5005300911730742, 'soft_opc': nan} step=2490




2022-04-22 06:16.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.23 [info     ] FQE_20220422061608: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00017870477883212538, 'time_algorithm_update': 0.00510477014334805, 'loss': 0.0021211217674398683, 'time_step': 0.005355520420763866, 'init_value': -0.6890666484832764, 'ave_value': -0.5507987226143496, 'soft_opc': nan} step=2656




2022-04-22 06:16.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.24 [info     ] FQE_20220422061608: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00016881328031241176, 'time_algorithm_update': 0.005022712500698595, 'loss': 0.0021861738760966852, 'time_step': 0.005262130714324583, 'init_value': -0.7464755773544312, 'ave_value': -0.6101875335716449, 'soft_opc': nan} step=2822




2022-04-22 06:16.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.25 [info     ] FQE_20220422061608: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00017030985958604928, 'time_algorithm_update': 0.005194788955780397, 'loss': 0.002342921563884221, 'time_step': 0.00543543947748391, 'init_value': -0.779090940952301, 'ave_value': -0.6291075490786366, 'soft_opc': nan} step=2988




2022-04-22 06:16.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.26 [info     ] FQE_20220422061608: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00016789407615202018, 'time_algorithm_update': 0.005319358354591462, 'loss': 0.002576401462274918, 'time_step': 0.005558831145964473, 'init_value': -0.816985011100769, 'ave_value': -0.6528797133624352, 'soft_opc': nan} step=3154




2022-04-22 06:16.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.27 [info     ] FQE_20220422061608: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00017114288835640414, 'time_algorithm_update': 0.004680053297295628, 'loss': 0.0026864925618928642, 'time_step': 0.0049240546054150685, 'init_value': -0.8889914751052856, 'ave_value': -0.7203253590407455, 'soft_opc': nan} step=3320




2022-04-22 06:16.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.28 [info     ] FQE_20220422061608: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00017352563789091915, 'time_algorithm_update': 0.005254484084715326, 'loss': 0.002857773030016863, 'time_step': 0.005501928099666734, 'init_value': -0.8892632722854614, 'ave_value': -0.7163958120847809, 'soft_opc': nan} step=3486




2022-04-22 06:16.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.29 [info     ] FQE_20220422061608: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00016861651317182793, 'time_algorithm_update': 0.005207160869276667, 'loss': 0.0029163982313131936, 'time_step': 0.005449608147862446, 'init_value': -0.9869592189788818, 'ave_value': -0.809056684338731, 'soft_opc': nan} step=3652




2022-04-22 06:16.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.30 [info     ] FQE_20220422061608: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.0001716829208006342, 'time_algorithm_update': 0.0053475262170814605, 'loss': 0.003198084206508293, 'time_step': 0.005596389253455472, 'init_value': -1.0233319997787476, 'ave_value': -0.8302496243161275, 'soft_opc': nan} step=3818




2022-04-22 06:16.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.31 [info     ] FQE_20220422061608: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00017776690333722587, 'time_algorithm_update': 0.005159752914704472, 'loss': 0.0032966384751489386, 'time_step': 0.005412972117044839, 'init_value': -1.1021209955215454, 'ave_value': -0.8947289077911174, 'soft_opc': nan} step=3984




2022-04-22 06:16.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.32 [info     ] FQE_20220422061608: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00017595434763345374, 'time_algorithm_update': 0.005447193800684917, 'loss': 0.003598774234898927, 'time_step': 0.005700150168085673, 'init_value': -1.1736241579055786, 'ave_value': -0.958597845750282, 'soft_opc': nan} step=4150




2022-04-22 06:16.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.33 [info     ] FQE_20220422061608: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.000170421887593097, 'time_algorithm_update': 0.0053240032081144405, 'loss': 0.0036725146679841654, 'time_step': 0.005570411682128906, 'init_value': -1.278673529624939, 'ave_value': -1.0587257423639498, 'soft_opc': nan} step=4316




2022-04-22 06:16.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.34 [info     ] FQE_20220422061608: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.0001698646200708596, 'time_algorithm_update': 0.005234594804694854, 'loss': 0.003836330937548451, 'time_step': 0.005479192159262048, 'init_value': -1.2647974491119385, 'ave_value': -1.0320359950004196, 'soft_opc': nan} step=4482




2022-04-22 06:16.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.35 [info     ] FQE_20220422061608: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00017610228205301674, 'time_algorithm_update': 0.005173381552638778, 'loss': 0.004141949604760508, 'time_step': 0.005419886255838785, 'init_value': -1.349390983581543, 'ave_value': -1.108532767641979, 'soft_opc': nan} step=4648




2022-04-22 06:16.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.36 [info     ] FQE_20220422061608: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00017549187304025674, 'time_algorithm_update': 0.0047509167567793146, 'loss': 0.004336144693977065, 'time_step': 0.004999162202858063, 'init_value': -1.3967268466949463, 'ave_value': -1.12829531135671, 'soft_opc': nan} step=4814




2022-04-22 06:16.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.37 [info     ] FQE_20220422061608: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00017785164247076195, 'time_algorithm_update': 0.005158557949295963, 'loss': 0.004536567331419371, 'time_step': 0.00541095417666148, 'init_value': -1.415693759918213, 'ave_value': -1.1374565477812293, 'soft_opc': nan} step=4980




2022-04-22 06:16.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.38 [info     ] FQE_20220422061608: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00017073786402323158, 'time_algorithm_update': 0.00533334605665092, 'loss': 0.004676862004253711, 'time_step': 0.005581052906541939, 'init_value': -1.5124865770339966, 'ave_value': -1.2116415112237404, 'soft_opc': nan} step=5146




2022-04-22 06:16.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.39 [info     ] FQE_20220422061608: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00016753357577036662, 'time_algorithm_update': 0.005153587065547346, 'loss': 0.004784095351947528, 'time_step': 0.005396759653665933, 'init_value': -1.546762466430664, 'ave_value': -1.2204757885856403, 'soft_opc': nan} step=5312




2022-04-22 06:16.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.40 [info     ] FQE_20220422061608: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00017715793058096645, 'time_algorithm_update': 0.005228769348328372, 'loss': 0.004890014882254167, 'time_step': 0.005482789981796081, 'init_value': -1.5754120349884033, 'ave_value': -1.2399364775428465, 'soft_opc': nan} step=5478




2022-04-22 06:16.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.41 [info     ] FQE_20220422061608: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00016963051026125988, 'time_algorithm_update': 0.005213922764881548, 'loss': 0.0051353898820543035, 'time_step': 0.005457702889499894, 'init_value': -1.655093789100647, 'ave_value': -1.300264803165788, 'soft_opc': nan} step=5644




2022-04-22 06:16.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.42 [info     ] FQE_20220422061608: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00017330876315932675, 'time_algorithm_update': 0.005322034100452101, 'loss': 0.005264078347392504, 'time_step': 0.005568524441087103, 'init_value': -1.7382910251617432, 'ave_value': -1.374713861576055, 'soft_opc': nan} step=5810




2022-04-22 06:16.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.43 [info     ] FQE_20220422061608: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00017027395317353397, 'time_algorithm_update': 0.005147042044674058, 'loss': 0.00565467984345461, 'time_step': 0.005388896149325083, 'init_value': -1.754828929901123, 'ave_value': -1.3674167453873534, 'soft_opc': nan} step=5976




2022-04-22 06:16.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.44 [info     ] FQE_20220422061608: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.000169522791023714, 'time_algorithm_update': 0.005208579890699272, 'loss': 0.00574657833641984, 'time_step': 0.005451814237847386, 'init_value': -1.762520432472229, 'ave_value': -1.3559732747197084, 'soft_opc': nan} step=6142




2022-04-22 06:16.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.45 [info     ] FQE_20220422061608: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00018664727728050877, 'time_algorithm_update': 0.005150180265127894, 'loss': 0.005920804130244462, 'time_step': 0.005413512149489069, 'init_value': -1.8804829120635986, 'ave_value': -1.4763877435780324, 'soft_opc': nan} step=6308




2022-04-22 06:16.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.46 [info     ] FQE_20220422061608: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00016485208488372434, 'time_algorithm_update': 0.0044375370783978196, 'loss': 0.006002800170779621, 'time_step': 0.004677068756287356, 'init_value': -1.92937433719635, 'ave_value': -1.5018404356766124, 'soft_opc': nan} step=6474




2022-04-22 06:16.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.47 [info     ] FQE_20220422061608: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001524902251829584, 'time_algorithm_update': 0.004901489579533956, 'loss': 0.006420517924307643, 'time_step': 0.005121996603816389, 'init_value': -2.001232624053955, 'ave_value': -1.539420842818983, 'soft_opc': nan} step=6640




2022-04-22 06:16.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.48 [info     ] FQE_20220422061608: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00016141081430825842, 'time_algorithm_update': 0.005207883306296475, 'loss': 0.00632389749018812, 'time_step': 0.005437878241021949, 'init_value': -1.9853577613830566, 'ave_value': -1.5061019327155911, 'soft_opc': nan} step=6806




2022-04-22 06:16.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.49 [info     ] FQE_20220422061608: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.0001530632915267025, 'time_algorithm_update': 0.005091896976333067, 'loss': 0.006835354655295195, 'time_step': 0.005308628082275391, 'init_value': -2.040757656097412, 'ave_value': -1.5513095410087625, 'soft_opc': nan} step=6972




2022-04-22 06:16.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.50 [info     ] FQE_20220422061608: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.000159973121551146, 'time_algorithm_update': 0.005048681454486157, 'loss': 0.006813458748391818, 'time_step': 0.0052778835756232935, 'init_value': -2.1047143936157227, 'ave_value': -1.5945305215614336, 'soft_opc': nan} step=7138




2022-04-22 06:16.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.51 [info     ] FQE_20220422061608: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00014689887862607656, 'time_algorithm_update': 0.0049019348190491455, 'loss': 0.006977143847342883, 'time_step': 0.005113537053027785, 'init_value': -2.208158016204834, 'ave_value': -1.6745980906325417, 'soft_opc': nan} step=7304




2022-04-22 06:16.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.52 [info     ] FQE_20220422061608: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00015319255461175758, 'time_algorithm_update': 0.004926032330616411, 'loss': 0.007349464938736027, 'time_step': 0.005144985325365181, 'init_value': -2.2074673175811768, 'ave_value': -1.6514381747525018, 'soft_opc': nan} step=7470




2022-04-22 06:16.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.53 [info     ] FQE_20220422061608: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.0001615056072372988, 'time_algorithm_update': 0.005079309624361704, 'loss': 0.007549919669813734, 'time_step': 0.005308513181755342, 'init_value': -2.2882931232452393, 'ave_value': -1.7227918414919225, 'soft_opc': nan} step=7636




2022-04-22 06:16.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.54 [info     ] FQE_20220422061608: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00016031782311129282, 'time_algorithm_update': 0.004963405161018831, 'loss': 0.0076075117712130725, 'time_step': 0.005195061844515513, 'init_value': -2.340167284011841, 'ave_value': -1.757927485427878, 'soft_opc': nan} step=7802




2022-04-22 06:16.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.55 [info     ] FQE_20220422061608: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.0001687328499483775, 'time_algorithm_update': 0.0045667771833488744, 'loss': 0.00801919953752427, 'time_step': 0.004811522472335632, 'init_value': -2.4064481258392334, 'ave_value': -1.7968776353173428, 'soft_opc': nan} step=7968




2022-04-22 06:16.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.56 [info     ] FQE_20220422061608: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00017340642860136837, 'time_algorithm_update': 0.005281316228659756, 'loss': 0.00793136362633696, 'time_step': 0.005529227026973863, 'init_value': -2.410275936126709, 'ave_value': -1.7905148141287468, 'soft_opc': nan} step=8134




2022-04-22 06:16.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:16.57 [info     ] FQE_20220422061608: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00017125491636345186, 'time_algorithm_update': 0.00532524556998747, 'loss': 0.008262864772667996, 'time_step': 0.005573325846568647, 'init_value': -2.471513509750366, 'ave_value': -1.8275409005917944, 'soft_opc': nan} step=8300




2022-04-22 06:16.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061608/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-22 06:16.57 [info     ] Directory is created at d3rlpy_logs/FQE_20220422061657
2022-04-22 06:16.57 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 06:16.57 [debug    ] Building models...
2022-04-22 06:16.57 [debug    ] Models have been built.
2022-04-22 06:16.57 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422061657/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 06:16.59 [info     ] FQE_20220422061657: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00017152899919554244, 'time_algorithm_update': 0.005281817774439967, 'loss': 0.023537551407528997, 'time_step': 0.0055284534775933555, 'init_value': -1.0716724395751953, 'ave_value': -1.0479967827531131, 'soft_opc': nan} step=344




2022-04-22 06:16.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:17.01 [info     ] FQE_20220422061657: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.0001665305259615876, 'time_algorithm_update': 0.005153378081876178, 'loss': 0.022701113449604533, 'time_step': 0.005393218855525172, 'init_value': -1.9452353715896606, 'ave_value': -1.9325904548302426, 'soft_opc': nan} step=688




2022-04-22 06:17.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:17.03 [info     ] FQE_20220422061657: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00017084077347156613, 'time_algorithm_update': 0.005321923383446627, 'loss': 0.024188204438338967, 'time_step': 0.005571638428887656, 'init_value': -3.004936456680298, 'ave_value': -3.002772776191836, 'soft_opc': nan} step=1032




2022-04-22 06:17.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:17.05 [info     ] FQE_20220422061657: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.0001719767271086227, 'time_algorithm_update': 0.004981357690899871, 'loss': 0.02751246526252565, 'time_step': 0.0052292340023573055, 'init_value': -3.830453872680664, 'ave_value': -3.821608168020979, 'soft_opc': nan} step=1376




2022-04-22 06:17.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:17.07 [info     ] FQE_20220422061657: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00017016918160194574, 'time_algorithm_update': 0.005260629016299581, 'loss': 0.03327945077272002, 'time_step': 0.005508002846740013, 'init_value': -4.896059036254883, 'ave_value': -4.894435713661683, 'soft_opc': nan} step=1720




2022-04-22 06:17.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:17.09 [info     ] FQE_20220422061657: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00017480032388554064, 'time_algorithm_update': 0.005259879799776299, 'loss': 0.03739578386248891, 'time_step': 0.005511513283086377, 'init_value': -5.6115803718566895, 'ave_value': -5.606638339108175, 'soft_opc': nan} step=2064




2022-04-22 06:17.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:17.11 [info     ] FQE_20220422061657: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001675133095231167, 'time_algorithm_update': 0.005156298016392907, 'loss': 0.04734013638145095, 'time_step': 0.005395946807639543, 'init_value': -6.544074058532715, 'ave_value': -6.599476802993465, 'soft_opc': nan} step=2408




2022-04-22 06:17.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:17.13 [info     ] FQE_20220422061657: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00017263584358747616, 'time_algorithm_update': 0.0053173733312030175, 'loss': 0.05837093082224127, 'time_step': 0.005568914635236873, 'init_value': -7.125874996185303, 'ave_value': -7.278381134797861, 'soft_opc': nan} step=2752




2022-04-22 06:17.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:17.15 [info     ] FQE_20220422061657: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00017064047414203022, 'time_algorithm_update': 0.005233069491940875, 'loss': 0.06879459654500839, 'time_step': 0.0054836162300997, 'init_value': -7.673361778259277, 'ave_value': -7.977317487388044, 'soft_opc': nan} step=3096




2022-04-22 06:17.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:17.17 [info     ] FQE_20220422061657: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00017515032790427985, 'time_algorithm_update': 0.005224936230238094, 'loss': 0.08718795439250074, 'time_step': 0.00547800715579543, 'init_value': -8.222692489624023, 'ave_value': -8.793251736166779, 'soft_opc': nan} step=3440




2022-04-22 06:17.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:17.19 [info     ] FQE_20220422061657: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00017513508020445358, 'time_algorithm_update': 0.005286268023557441, 'loss': 0.10085918046100888, 'time_step': 0.005539733310078465, 'init_value': -8.509475708007812, 'ave_value': -9.407733121930479, 'soft_opc': nan} step=3784




2022-04-22 06:17.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:17.21 [info     ] FQE_20220422061657: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00017343981321467909, 'time_algorithm_update': 0.005242444748102233, 'loss': 0.12312997311765198, 'time_step': 0.0054887422295503835, 'init_value': -8.861695289611816, 'ave_value': -10.175016182577153, 'soft_opc': nan} step=4128




2022-04-22 06:17.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:17.23 [info     ] FQE_20220422061657: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00017227267110070518, 'time_algorithm_update': 0.005026895639508269, 'loss': 0.14314996494998244, 'time_step': 0.005273979763652003, 'init_value': -8.927806854248047, 'ave_value': -10.75960953692614, 'soft_opc': nan} step=4472




2022-04-22 06:17.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:17.25 [info     ] FQE_20220422061657: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00016892302867978117, 'time_algorithm_update': 0.00535242710002633, 'loss': 0.16933401267390785, 'time_step': 0.0055952654328457144, 'init_value': -9.263218879699707, 'ave_value': -11.655094427931719, 'soft_opc': nan} step=4816




2022-04-22 06:17.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:17.28 [info     ] FQE_20220422061657: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00017984592637350393, 'time_algorithm_update': 0.005331915478373683, 'loss': 0.18617607745190345, 'time_step': 0.005591716184172519, 'init_value': -9.360435485839844, 'ave_value': -12.215331976611884, 'soft_opc': nan} step=5160




2022-04-22 06:17.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:17.30 [info     ] FQE_20220422061657: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.0001693464988885924, 'time_algorithm_update': 0.0051874518394470215, 'loss': 0.20937662906294993, 'time_step': 0.0054358708304028175, 'init_value': -9.567301750183105, 'ave_value': -12.986135335924494, 'soft_opc': nan} step=5504




2022-04-22 06:17.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:17.32 [info     ] FQE_20220422061657: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00016936798428380213, 'time_algorithm_update': 0.00518696807151617, 'loss': 0.2302886085628077, 'time_step': 0.0054298951182254526, 'init_value': -9.526602745056152, 'ave_value': -13.490432510373543, 'soft_opc': nan} step=5848




2022-04-22 06:17.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:17.34 [info     ] FQE_20220422061657: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.0001666871614234392, 'time_algorithm_update': 0.004868668872256612, 'loss': 0.24536417263321753, 'time_step': 0.005110653333885725, 'init_value': -9.759560585021973, 'ave_value': -14.175490394882702, 'soft_opc': nan} step=6192




2022-04-22 06:17.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:17.36 [info     ] FQE_20220422061657: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00017214098642038744, 'time_algorithm_update': 0.0052326134471006175, 'loss': 0.26930308384214374, 'time_step': 0.005483631477799527, 'init_value': -10.101573944091797, 'ave_value': -15.04715563633882, 'soft_opc': nan} step=6536




2022-04-22 06:17.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:17.38 [info     ] FQE_20220422061657: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00017102513202401094, 'time_algorithm_update': 0.005225589802098828, 'loss': 0.2852900389224551, 'time_step': 0.005475258411363114, 'init_value': -10.473864555358887, 'ave_value': -15.792475328866816, 'soft_opc': nan} step=6880




2022-04-22 06:17.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:17.40 [info     ] FQE_20220422061657: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.0001640617847442627, 'time_algorithm_update': 0.005167273587958757, 'loss': 0.3127779359062917, 'time_step': 0.005405738603237064, 'init_value': -10.95047664642334, 'ave_value': -16.82552861978451, 'soft_opc': nan} step=7224




2022-04-22 06:17.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:17.42 [info     ] FQE_20220422061657: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.0001703978970993397, 'time_algorithm_update': 0.005289799945299016, 'loss': 0.32839876550877856, 'time_step': 0.0055359463359034335, 'init_value': -11.228337287902832, 'ave_value': -17.581452178509554, 'soft_opc': nan} step=7568




2022-04-22 06:17.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:17.44 [info     ] FQE_20220422061657: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.000172093857166379, 'time_algorithm_update': 0.005074406779089639, 'loss': 0.35217873396373595, 'time_step': 0.005325393621311631, 'init_value': -11.53911018371582, 'ave_value': -18.36721399945358, 'soft_opc': nan} step=7912




2022-04-22 06:17.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:17.46 [info     ] FQE_20220422061657: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.0001714589983917946, 'time_algorithm_update': 0.005279324775518373, 'loss': 0.3950892361855611, 'time_step': 0.005528127731278885, 'init_value': -11.781251907348633, 'ave_value': -18.749395163793135, 'soft_opc': nan} step=8256




2022-04-22 06:17.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:17.48 [info     ] FQE_20220422061657: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00017679638640825138, 'time_algorithm_update': 0.005224141270615334, 'loss': 0.41729835469578935, 'time_step': 0.005478010621181754, 'init_value': -12.264918327331543, 'ave_value': -19.710244065211867, 'soft_opc': nan} step=8600




2022-04-22 06:17.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:17.50 [info     ] FQE_20220422061657: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00016883570094441259, 'time_algorithm_update': 0.00521255093951558, 'loss': 0.4459397183720369, 'time_step': 0.00545974803525348, 'init_value': -12.695919036865234, 'ave_value': -20.56202498544165, 'soft_opc': nan} step=8944




2022-04-22 06:17.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:17.52 [info     ] FQE_20220422061657: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.0001728832721710205, 'time_algorithm_update': 0.00488601521004078, 'loss': 0.463317648404776, 'time_step': 0.005133933106134104, 'init_value': -12.753345489501953, 'ave_value': -20.88899492922004, 'soft_opc': nan} step=9288




2022-04-22 06:17.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:17.54 [info     ] FQE_20220422061657: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00017051225484803666, 'time_algorithm_update': 0.005340219930160877, 'loss': 0.4890953377484842, 'time_step': 0.00558848783027294, 'init_value': -13.076312065124512, 'ave_value': -21.67538176056759, 'soft_opc': nan} step=9632




2022-04-22 06:17.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:17.56 [info     ] FQE_20220422061657: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00016855639080668605, 'time_algorithm_update': 0.005194215580474499, 'loss': 0.5167790246899998, 'time_step': 0.005437485700429872, 'init_value': -13.16450309753418, 'ave_value': -21.94710738174224, 'soft_opc': nan} step=9976




2022-04-22 06:17.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:17.58 [info     ] FQE_20220422061657: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00017131968986156375, 'time_algorithm_update': 0.005245518545771754, 'loss': 0.5302319524723084, 'time_step': 0.005495187155036039, 'init_value': -13.973623275756836, 'ave_value': -22.882185433250704, 'soft_opc': nan} step=10320




2022-04-22 06:17.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:18.00 [info     ] FQE_20220422061657: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.000169829573742179, 'time_algorithm_update': 0.005235626253970834, 'loss': 0.5495191438415976, 'time_step': 0.005482522554175798, 'init_value': -14.456097602844238, 'ave_value': -23.321610794295996, 'soft_opc': nan} step=10664




2022-04-22 06:18.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:18.02 [info     ] FQE_20220422061657: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00016751123029132222, 'time_algorithm_update': 0.0048621054305586706, 'loss': 0.5722768877546281, 'time_step': 0.005101700854855914, 'init_value': -15.174630165100098, 'ave_value': -24.151427250278292, 'soft_opc': nan} step=11008




2022-04-22 06:18.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:18.04 [info     ] FQE_20220422061657: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00016735598098400028, 'time_algorithm_update': 0.0052361162595970685, 'loss': 0.5854797337778173, 'time_step': 0.0054830458275107454, 'init_value': -15.604236602783203, 'ave_value': -24.449001763318037, 'soft_opc': nan} step=11352




2022-04-22 06:18.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:18.06 [info     ] FQE_20220422061657: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00017023779625116394, 'time_algorithm_update': 0.0053634338600690975, 'loss': 0.5987506819791485, 'time_step': 0.0056109255136445515, 'init_value': -16.341632843017578, 'ave_value': -25.116567437954853, 'soft_opc': nan} step=11696




2022-04-22 06:18.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:18.08 [info     ] FQE_20220422061657: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.0001730530761009039, 'time_algorithm_update': 0.005328026621840721, 'loss': 0.6164960514829863, 'time_step': 0.005579074454862018, 'init_value': -17.192970275878906, 'ave_value': -25.416439247037378, 'soft_opc': nan} step=12040




2022-04-22 06:18.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:18.10 [info     ] FQE_20220422061657: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00017067235569621241, 'time_algorithm_update': 0.005191442578337913, 'loss': 0.6279114770139892, 'time_step': 0.005438500365545583, 'init_value': -18.222288131713867, 'ave_value': -26.157524670861864, 'soft_opc': nan} step=12384




2022-04-22 06:18.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:18.12 [info     ] FQE_20220422061657: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00017201484635818835, 'time_algorithm_update': 0.005041971456172855, 'loss': 0.6180165051915791, 'time_step': 0.005290145097776901, 'init_value': -19.1590633392334, 'ave_value': -26.299479182158503, 'soft_opc': nan} step=12728




2022-04-22 06:18.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:18.14 [info     ] FQE_20220422061657: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00017032651014106217, 'time_algorithm_update': 0.005166220110516215, 'loss': 0.6394590078636484, 'time_step': 0.005412672148194424, 'init_value': -19.45896339416504, 'ave_value': -26.433431342351543, 'soft_opc': nan} step=13072




2022-04-22 06:18.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:18.16 [info     ] FQE_20220422061657: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00016573279402976813, 'time_algorithm_update': 0.005114234464113103, 'loss': 0.6607259480813301, 'time_step': 0.005354174347810967, 'init_value': -20.90011978149414, 'ave_value': -27.247367234702583, 'soft_opc': nan} step=13416




2022-04-22 06:18.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:18.18 [info     ] FQE_20220422061657: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00017193583554999772, 'time_algorithm_update': 0.0051682314207387526, 'loss': 0.6812076718806354, 'time_step': 0.005416523578555085, 'init_value': -21.42295265197754, 'ave_value': -27.397763335167824, 'soft_opc': nan} step=13760




2022-04-22 06:18.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:18.20 [info     ] FQE_20220422061657: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00016441664030385572, 'time_algorithm_update': 0.004785948021467342, 'loss': 0.7097848945264813, 'time_step': 0.005026106224503628, 'init_value': -22.35847282409668, 'ave_value': -27.67759113827267, 'soft_opc': nan} step=14104




2022-04-22 06:18.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:18.22 [info     ] FQE_20220422061657: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.0001750595347825871, 'time_algorithm_update': 0.005249044229817945, 'loss': 0.747851520706358, 'time_step': 0.005500726921613826, 'init_value': -23.207197189331055, 'ave_value': -28.00062322670275, 'soft_opc': nan} step=14448




2022-04-22 06:18.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:18.24 [info     ] FQE_20220422061657: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.0001717875170153241, 'time_algorithm_update': 0.0052845637465632235, 'loss': 0.7807209811465772, 'time_step': 0.0055359837620757345, 'init_value': -24.07918930053711, 'ave_value': -28.50076619311496, 'soft_opc': nan} step=14792




2022-04-22 06:18.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:18.26 [info     ] FQE_20220422061657: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00016902837642403536, 'time_algorithm_update': 0.005206852458244146, 'loss': 0.8234477957093351, 'time_step': 0.005452299533888351, 'init_value': -24.810447692871094, 'ave_value': -28.559895047089, 'soft_opc': nan} step=15136




2022-04-22 06:18.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:18.28 [info     ] FQE_20220422061657: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00017173137775687285, 'time_algorithm_update': 0.005170199067093605, 'loss': 0.8644343978389665, 'time_step': 0.005420381246611129, 'init_value': -25.271812438964844, 'ave_value': -28.64401026247321, 'soft_opc': nan} step=15480




2022-04-22 06:18.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:18.30 [info     ] FQE_20220422061657: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.0001715761284495509, 'time_algorithm_update': 0.005009623460991438, 'loss': 0.8849506168734542, 'time_step': 0.005256513523501019, 'init_value': -25.568504333496094, 'ave_value': -28.347903288350448, 'soft_opc': nan} step=15824




2022-04-22 06:18.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:18.32 [info     ] FQE_20220422061657: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00017288465832555018, 'time_algorithm_update': 0.005231967499089795, 'loss': 0.9391242137861027, 'time_step': 0.005481881457705831, 'init_value': -26.755859375, 'ave_value': -29.08723285520399, 'soft_opc': nan} step=16168




2022-04-22 06:18.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:18.34 [info     ] FQE_20220422061657: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00016981293988782307, 'time_algorithm_update': 0.005209939424381699, 'loss': 0.9676248403049486, 'time_step': 0.005452983601148738, 'init_value': -26.778993606567383, 'ave_value': -28.81476765038731, 'soft_opc': nan} step=16512




2022-04-22 06:18.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:18.36 [info     ] FQE_20220422061657: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016896599947020065, 'time_algorithm_update': 0.005277016828226489, 'loss': 1.0188468059005087, 'time_step': 0.005520977946214898, 'init_value': -27.824798583984375, 'ave_value': -29.4587973658029, 'soft_opc': nan} step=16856




2022-04-22 06:18.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:18.38 [info     ] FQE_20220422061657: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00016716469165890715, 'time_algorithm_update': 0.00521005863367125, 'loss': 1.0424286961923679, 'time_step': 0.005449064249216124, 'init_value': -28.09905242919922, 'ave_value': -29.735447533388395, 'soft_opc': nan} step=17200




2022-04-22 06:18.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422061657/model_17200.pt
search iteration:  25
using hyper params:  [0.001749258448528415, 0.009387597622627055, 7.269571096331571e-05, 5]
2022-04-22 06:18.38 [debug    ] RoundIterator is selected.
2022-04-22 06:18.38 [info     ] Directory is created at d3rlpy_logs/CQL_20220422061838
2022-04-22 06:18.38 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 06:18.38 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 06:18.38 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422061838/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.001749258448528415, 'actor_optim_factory': {'optim

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:18.55 [info     ] CQL_20220422061838: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.00040385764458275943, 'time_algorithm_update': 0.04633617745658566, 'temp_loss': 4.769157416558679, 'temp': 0.9867534766651992, 'alpha_loss': -17.659676171451636, 'alpha': 1.0177884349933248, 'critic_loss': 97.99846891722927, 'actor_loss': 4.5890212007125335, 'time_step': 0.04684044102023792, 'td_error': 1.3225003489749516, 'init_value': -7.198488712310791, 'ave_value': -6.609956434152883} step=346
2022-04-22 06:18.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:19.12 [info     ] CQL_20220422061838: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.0004056650779150814, 'time_algorithm_update': 0.046234526386150734, 'temp_loss': 4.853264069970632, 'temp': 0.9615593419943241, 'alpha_loss': -18.388481663830706, 'alpha': 1.0543775517127418, 'critic_loss': 200.4954645454539, 'actor_loss': 7.819138897636722, 'time_step': 0.04674399588149407, 'td_error': 1.3436411520014346, 'init_value': -9.220577239990234, 'ave_value': -8.522545394969812} step=692
2022-04-22 06:19.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:19.29 [info     ] CQL_20220422061838: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.00041175439867670137, 'time_algorithm_update': 0.04679229218146704, 'temp_loss': 4.736449524157309, 'temp': 0.9378036045270159, 'alpha_loss': -19.030254132485805, 'alpha': 1.0926935948388425, 'critic_loss': 446.5403273918725, 'actor_loss': 7.67666306936672, 'time_step': 0.047304992730906936, 'td_error': 1.291054796627974, 'init_value': -7.4933366775512695, 'ave_value': -7.139211252528155} step=1038
2022-04-22 06:19.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:19.46 [info     ] CQL_20220422061838: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.00040462526971894193, 'time_algorithm_update': 0.046932390659530726, 'temp_loss': 4.619452366250099, 'temp': 0.9150054518542537, 'alpha_loss': -19.7072671989485, 'alpha': 1.1328505474018913, 'critic_loss': 830.1724394869942, 'actor_loss': 4.979348811111009, 'time_step': 0.047436056798593154, 'td_error': 1.2891104884163946, 'init_value': -5.9847822189331055, 'ave_value': -5.855327512145099} step=1384
2022-04-22 06:19.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:20.03 [info     ] CQL_20220422061838: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.00040463422764243416, 'time_algorithm_update': 0.047321411915597196, 'temp_loss': 4.508614227261846, 'temp': 0.893008582509322, 'alpha_loss': -20.43258554535794, 'alpha': 1.1749259939772545, 'critic_loss': 1256.1940798015264, 'actor_loss': 4.680981950263757, 'time_step': 0.04782766896176201, 'td_error': 1.3073656294976566, 'init_value': -6.448838233947754, 'ave_value': -6.372013116617687} step=1730
2022-04-22 06:20.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:20.20 [info     ] CQL_20220422061838: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.00040935022982558765, 'time_algorithm_update': 0.047250339750609646, 'temp_loss': 4.402615592658864, 'temp': 0.8717081577791644, 'alpha_loss': -21.197529842398758, 'alpha': 1.2189785373004185, 'critic_loss': 1672.1530415970467, 'actor_loss': 5.212501411493114, 'time_step': 0.047765138521359836, 'td_error': 1.3231689432093714, 'init_value': -7.084102630615234, 'ave_value': -7.026170098448094} step=2076
2022-04-22 06:20.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:20.37 [info     ] CQL_20220422061838: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.00039782689485935805, 'time_algorithm_update': 0.04714905526596687, 'temp_loss': 4.29669404581103, 'temp': 0.8510493975498773, 'alpha_loss': -21.99800695849292, 'alpha': 1.2650431590962272, 'critic_loss': 2088.107062014541, 'actor_loss': 5.907309493577549, 'time_step': 0.047650301387544315, 'td_error': 1.3403245720471793, 'init_value': -7.681733131408691, 'ave_value': -7.64682964754082} step=2422
2022-04-22 06:20.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:20.54 [info     ] CQL_20220422061838: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.0004093709019567236, 'time_algorithm_update': 0.04750830382970027, 'temp_loss': 4.196208901488023, 'temp': 0.8309848541124708, 'alpha_loss': -22.8348141543438, 'alpha': 1.313161553330504, 'critic_loss': 2510.909738529624, 'actor_loss': 6.707973895045374, 'time_step': 0.04802302749170733, 'td_error': 1.3603255896853754, 'init_value': -8.641850471496582, 'ave_value': -8.599109506041815} step=2768
2022-04-22 06:20.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:21.12 [info     ] CQL_20220422061838: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.0004120155566000525, 'time_algorithm_update': 0.04724640584405447, 'temp_loss': 4.097635853497279, 'temp': 0.811470831922024, 'alpha_loss': -23.707899755136125, 'alpha': 1.3633701325841032, 'critic_loss': 2955.793425984465, 'actor_loss': 7.505470842295299, 'time_step': 0.04776081528966827, 'td_error': 1.3822480214609016, 'init_value': -9.584493637084961, 'ave_value': -9.539365150330472} step=3114
2022-04-22 06:21.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:21.28 [info     ] CQL_20220422061838: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.00041055817135496634, 'time_algorithm_update': 0.046773691397870895, 'temp_loss': 4.002677953312163, 'temp': 0.7924760495651664, 'alpha_loss': -24.61755627979433, 'alpha': 1.4157115634466182, 'critic_loss': 3404.713456523212, 'actor_loss': 8.360223140330673, 'time_step': 0.04728510476261205, 'td_error': 1.4012323117615886, 'init_value': -10.151789665222168, 'ave_value': -10.13094450519683} step=3460
2022-04-22 06:21.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:21.45 [info     ] CQL_20220422061838: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.0004123786970370078, 'time_algorithm_update': 0.04662332507227197, 'temp_loss': 3.9086964598969915, 'temp': 0.7739732739208751, 'alpha_loss': -25.567907493238504, 'alpha': 1.470245280362278, 'critic_loss': 3854.9592073473627, 'actor_loss': 9.2543744269134, 'time_step': 0.047135725186739356, 'td_error': 1.4271687658546275, 'init_value': -11.164129257202148, 'ave_value': -11.135726474935606} step=3806
2022-04-22 06:21.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:22.02 [info     ] CQL_20220422061838: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.0004173027986735967, 'time_algorithm_update': 0.04648545091551853, 'temp_loss': 3.8164874404841074, 'temp': 0.7559430561313739, 'alpha_loss': -26.549972186887885, 'alpha': 1.5270167620195818, 'critic_loss': 4279.104267098311, 'actor_loss': 10.218092706162116, 'time_step': 0.04700341596768771, 'td_error': 1.4528171543239683, 'init_value': -11.951868057250977, 'ave_value': -11.935589608110597} step=4152
2022-04-22 06:22.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:22.19 [info     ] CQL_20220422061838: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.0004151832161611215, 'time_algorithm_update': 0.04673288185472433, 'temp_loss': 3.7279612363418404, 'temp': 0.7383627168015937, 'alpha_loss': -27.579819381581565, 'alpha': 1.586096158261933, 'critic_loss': 4699.673169086434, 'actor_loss': 11.218456246260274, 'time_step': 0.0472461005855847, 'td_error': 1.4848448711033013, 'init_value': -12.983377456665039, 'ave_value': -12.970697282354319} step=4498
2022-04-22 06:22.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:22.36 [info     ] CQL_20220422061838: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.0004091049205361074, 'time_algorithm_update': 0.04672042620664387, 'temp_loss': 3.641940710861559, 'temp': 0.7212101758560004, 'alpha_loss': -28.650978749887102, 'alpha': 1.6475580524157927, 'critic_loss': 5086.787531329028, 'actor_loss': 12.29019703065729, 'time_step': 0.04722755079324535, 'td_error': 1.5262905796962085, 'init_value': -14.394224166870117, 'ave_value': -14.355744038469252} step=4844
2022-04-22 06:22.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:22.53 [info     ] CQL_20220422061838: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.00041752192326363803, 'time_algorithm_update': 0.0465873376482484, 'temp_loss': 3.557471570941065, 'temp': 0.7044730096883167, 'alpha_loss': -29.76021862581286, 'alpha': 1.7114737240565305, 'critic_loss': 5364.6506206534505, 'actor_loss': 13.37745717495163, 'time_step': 0.047098323099875036, 'td_error': 1.560716450953034, 'init_value': -15.243236541748047, 'ave_value': -15.227824033273233} step=5190
2022-04-22 06:22.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:23.10 [info     ] CQL_20220422061838: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.00040643477026437746, 'time_algorithm_update': 0.046789612384200784, 'temp_loss': 3.474871226128815, 'temp': 0.6881333961652193, 'alpha_loss': -30.91729086396322, 'alpha': 1.7779263716212588, 'critic_loss': 5720.220375722543, 'actor_loss': 14.570640065077413, 'time_step': 0.047301281394296985, 'td_error': 1.603968166924193, 'init_value': -16.369237899780273, 'ave_value': -16.36079231902512} step=5536
2022-04-22 06:23.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:23.27 [info     ] CQL_20220422061838: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.00040404438283402105, 'time_algorithm_update': 0.04705094670973761, 'temp_loss': 3.3944425817169894, 'temp': 0.6721848836868485, 'alpha_loss': -32.11442143655237, 'alpha': 1.8470020115030983, 'critic_loss': 6042.135232737987, 'actor_loss': 15.819616742216782, 'time_step': 0.04755240437612368, 'td_error': 1.653341689596845, 'init_value': -17.60316276550293, 'ave_value': -17.596673567343686} step=5882
2022-04-22 06:23.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:23.44 [info     ] CQL_20220422061838: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.000423708403041597, 'time_algorithm_update': 0.04725894624787259, 'temp_loss': 3.3157341418238735, 'temp': 0.6566136615469277, 'alpha_loss': -33.359461216568256, 'alpha': 1.9187905457667533, 'critic_loss': 6292.396044075144, 'actor_loss': 17.101026132616692, 'time_step': 0.04777857471752718, 'td_error': 1.7103488292547502, 'init_value': -18.980594635009766, 'ave_value': -18.967874291837358} step=6228
2022-04-22 06:23.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:24.01 [info     ] CQL_20220422061838: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.0003991450877547953, 'time_algorithm_update': 0.046799905038293385, 'temp_loss': 3.2399491002793948, 'temp': 0.641405918694645, 'alpha_loss': -34.65559166704299, 'alpha': 1.9933955159490508, 'critic_loss': 6580.354358121839, 'actor_loss': 18.440686677921715, 'time_step': 0.047293711948946035, 'td_error': 1.7698367392375618, 'init_value': -20.343156814575195, 'ave_value': -20.32538760580006} step=6574
2022-04-22 06:24.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:24.17 [info     ] CQL_20220422061838: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.00040788939922531213, 'time_algorithm_update': 0.0428548231290255, 'temp_loss': 3.1642261068255917, 'temp': 0.6265535950660706, 'alpha_loss': -36.01568906706882, 'alpha': 2.070931910090364, 'critic_loss': 6771.475974022309, 'actor_loss': 19.673871211233855, 'time_step': 0.043358289437486944, 'td_error': 1.8296300880254763, 'init_value': -21.595016479492188, 'ave_value': -21.57833419544768} step=6920
2022-04-22 06:24.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:24.33 [info     ] CQL_20220422061838: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.0004056092631610143, 'time_algorithm_update': 0.04349397097019791, 'temp_loss': 3.090803264882523, 'temp': 0.6120508940233661, 'alpha_loss': -37.419969955620736, 'alpha': 2.1515125732201374, 'critic_loss': 7038.795493418082, 'actor_loss': 20.9861045407422, 'time_step': 0.04399982422073453, 'td_error': 1.8889921999164094, 'init_value': -22.759613037109375, 'ave_value': -22.748812906546522} step=7266
2022-04-22 06:24.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:24.48 [info     ] CQL_20220422061838: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.00040070927901075066, 'time_algorithm_update': 0.04383028311536491, 'temp_loss': 3.019357892130152, 'temp': 0.5978861652702265, 'alpha_loss': -38.86643418549113, 'alpha': 2.235229922167828, 'critic_loss': 7357.868737016799, 'actor_loss': 22.265819770063278, 'time_step': 0.04433035850524902, 'td_error': 1.9641512858928991, 'init_value': -24.278213500976562, 'ave_value': -24.24765250041848} step=7612
2022-04-22 06:24.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:25.04 [info     ] CQL_20220422061838: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.00040069894294518266, 'time_algorithm_update': 0.04419421874029788, 'temp_loss': 2.950258586448052, 'temp': 0.5840479721223688, 'alpha_loss': -40.379671681134, 'alpha': 2.3222132604246193, 'critic_loss': 7320.828208261832, 'actor_loss': 23.375394777066447, 'time_step': 0.04469662594657413, 'td_error': 2.0143479295511475, 'init_value': -25.080398559570312, 'ave_value': -25.076677637023348} step=7958
2022-04-22 06:25.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:25.20 [info     ] CQL_20220422061838: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.0004141544331015879, 'time_algorithm_update': 0.04342195064346225, 'temp_loss': 2.881845487335514, 'temp': 0.5705319555164072, 'alpha_loss': -41.954686434971805, 'alpha': 2.4125851868204986, 'critic_loss': 6718.155760307533, 'actor_loss': 24.595987485323338, 'time_step': 0.043936829346452835, 'td_error': 2.0820438601930427, 'init_value': -26.251733779907227, 'ave_value': -26.25008469161517} step=8304
2022-04-22 06:25.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:25.36 [info     ] CQL_20220422061838: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.0004121905806436704, 'time_algorithm_update': 0.04372948787115902, 'temp_loss': 2.8156791139889314, 'temp': 0.5573285806041233, 'alpha_loss': -43.58773266786785, 'alpha': 2.5064809983865373, 'critic_loss': 6269.386010318823, 'actor_loss': 25.79851680270509, 'time_step': 0.044244172256116925, 'td_error': 2.1482528763261572, 'init_value': -27.343421936035156, 'ave_value': -27.34915913698966} step=8650
2022-04-22 06:25.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:25.52 [info     ] CQL_20220422061838: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.000430639768611489, 'time_algorithm_update': 0.04437712231123379, 'temp_loss': 2.7502464228282775, 'temp': 0.5444325432612028, 'alpha_loss': -45.28492655230395, 'alpha': 2.60403708295326, 'critic_loss': 6064.246407040282, 'actor_loss': 27.067315873383098, 'time_step': 0.04490116910438317, 'td_error': 2.226171636596402, 'init_value': -28.60566520690918, 'ave_value': -28.609833016809326} step=8996
2022-04-22 06:25.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:26.08 [info     ] CQL_20220422061838: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.000417306244028786, 'time_algorithm_update': 0.04366616844441849, 'temp_loss': 2.6867661910250007, 'temp': 0.5318348075268585, 'alpha_loss': -47.04155863502811, 'alpha': 2.7053869487233246, 'critic_loss': 5784.879100998013, 'actor_loss': 28.164048652428423, 'time_step': 0.04418361876052239, 'td_error': 2.2906125447495835, 'init_value': -29.597448348999023, 'ave_value': -29.609689506957633} step=9342
2022-04-22 06:26.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:26.24 [info     ] CQL_20220422061838: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.0004038280145281312, 'time_algorithm_update': 0.043916751194551504, 'temp_loss': 2.6240986819901218, 'temp': 0.5195315075403004, 'alpha_loss': -48.872134346493404, 'alpha': 2.810684119345825, 'critic_loss': 5732.128245800217, 'actor_loss': 29.46040322739265, 'time_step': 0.044421140858203687, 'td_error': 2.3853879489685017, 'init_value': -31.124849319458008, 'ave_value': -31.108709055329456} step=9688
2022-04-22 06:26.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:26.41 [info     ] CQL_20220422061838: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.00039921330578754404, 'time_algorithm_update': 0.04727840630305296, 'temp_loss': 2.5637222556020483, 'temp': 0.5075127961318617, 'alpha_loss': -50.772394941032275, 'alpha': 2.9200734492671283, 'critic_loss': 5896.265481055816, 'actor_loss': 30.551758837837703, 'time_step': 0.04777454571916878, 'td_error': 2.445800967031817, 'init_value': -31.964275360107422, 'ave_value': -31.959246022987276} step=10034
2022-04-22 06:26.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:26.58 [info     ] CQL_20220422061838: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.00040167604567687635, 'time_algorithm_update': 0.04748324920676347, 'temp_loss': 2.504533449349376, 'temp': 0.49577064708822727, 'alpha_loss': -52.756484389994185, 'alpha': 3.0337391333772956, 'critic_loss': 5999.654673670068, 'actor_loss': 31.61759535839103, 'time_step': 0.04798343829336883, 'td_error': 2.512696064440292, 'init_value': -32.88323974609375, 'ave_value': -32.88670861545814} step=10380
2022-04-22 06:26.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:27.15 [info     ] CQL_20220422061838: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.00042236747080191025, 'time_algorithm_update': 0.047295683381185366, 'temp_loss': 2.445861805381113, 'temp': 0.48430153899799194, 'alpha_loss': -54.80721855163574, 'alpha': 3.1518406812855275, 'critic_loss': 5516.367393537753, 'actor_loss': 32.463147631959416, 'time_step': 0.04782089194810459, 'td_error': 2.5839620186241934, 'init_value': -33.89934158325195, 'ave_value': -33.89442580373664} step=10726
2022-04-22 06:27.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:27.33 [info     ] CQL_20220422061838: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.00042126909156755215, 'time_algorithm_update': 0.04746647790677286, 'temp_loss': 2.3900452514604336, 'temp': 0.47309925919667833, 'alpha_loss': -56.94053860482453, 'alpha': 3.2745210897026724, 'critic_loss': 5295.405561325867, 'actor_loss': 33.506212840879584, 'time_step': 0.04798581421030739, 'td_error': 2.651583128809594, 'init_value': -34.77381134033203, 'ave_value': -34.77981822130069} step=11072
2022-04-22 06:27.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:27.50 [info     ] CQL_20220422061838: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.00040929303692944477, 'time_algorithm_update': 0.047874629842063594, 'temp_loss': 2.3341409822419887, 'temp': 0.46215400676851326, 'alpha_loss': -59.167643607696355, 'alpha': 3.401988999002931, 'critic_loss': 5255.982485379787, 'actor_loss': 34.40918628053169, 'time_step': 0.04838802290789654, 'td_error': 2.7221935840031866, 'init_value': -35.74485397338867, 'ave_value': -35.73493450973315} step=11418
2022-04-22 06:27.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:28.07 [info     ] CQL_20220422061838: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.0004159122533191835, 'time_algorithm_update': 0.04724078784788275, 'temp_loss': 2.2801897381082434, 'temp': 0.4514634970984707, 'alpha_loss': -61.45925823939329, 'alpha': 3.5344099867550622, 'critic_loss': 4872.5973542494585, 'actor_loss': 35.26644807054817, 'time_step': 0.04775703918038076, 'td_error': 2.784751429764681, 'init_value': -36.532955169677734, 'ave_value': -36.52838615211281} step=11764
2022-04-22 06:28.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:28.25 [info     ] CQL_20220422061838: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.00041085240468813507, 'time_algorithm_update': 0.04810520334739905, 'temp_loss': 2.2273095108870136, 'temp': 0.4410208685032894, 'alpha_loss': -63.85195959234513, 'alpha': 3.671987114614145, 'critic_loss': 4578.525036409411, 'actor_loss': 36.11418025066398, 'time_step': 0.04861857711924294, 'td_error': 2.8520859785268797, 'init_value': -37.3986930847168, 'ave_value': -37.39028119520872} step=12110
2022-04-22 06:28.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:28.42 [info     ] CQL_20220422061838: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.00042258315003676223, 'time_algorithm_update': 0.04771794060062122, 'temp_loss': 2.1757684456819746, 'temp': 0.4308196084003228, 'alpha_loss': -66.3427167241973, 'alpha': 3.814926013092085, 'critic_loss': 4294.647453175804, 'actor_loss': 36.9183406829834, 'time_step': 0.04823821955333556, 'td_error': 2.908513127101529, 'init_value': -38.04841232299805, 'ave_value': -38.05361867391876} step=12456
2022-04-22 06:28.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:28.59 [info     ] CQL_20220422061838: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.0004327607292660399, 'time_algorithm_update': 0.04731264279756932, 'temp_loss': 2.1259629423218653, 'temp': 0.4208526705214054, 'alpha_loss': -68.92054894066959, 'alpha': 3.9634127747805823, 'critic_loss': 4103.302809875136, 'actor_loss': 37.73767535259269, 'time_step': 0.047848441697269505, 'td_error': 2.9748673217353567, 'init_value': -38.88715744018555, 'ave_value': -38.88088569894341} step=12802
2022-04-22 06:28.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:29.16 [info     ] CQL_20220422061838: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.00041863615113186696, 'time_algorithm_update': 0.04727567758174301, 'temp_loss': 2.076304796114133, 'temp': 0.41111631088518685, 'alpha_loss': -71.60724919931047, 'alpha': 4.117697655121026, 'critic_loss': 3983.550087636606, 'actor_loss': 38.53897141032136, 'time_step': 0.047788300955226654, 'td_error': 3.0220461949440374, 'init_value': -39.35798263549805, 'ave_value': -39.380818091076435} step=13148
2022-04-22 06:29.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:29.34 [info     ] CQL_20220422061838: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.0004108358669832263, 'time_algorithm_update': 0.0476441211094057, 'temp_loss': 2.028421825067156, 'temp': 0.4016063430405766, 'alpha_loss': -74.38106241391573, 'alpha': 4.2779732880564785, 'critic_loss': 3777.812003957054, 'actor_loss': 39.1387447621781, 'time_step': 0.0481551168970979, 'td_error': 3.089099452230529, 'init_value': -40.22990417480469, 'ave_value': -40.22483604561384} step=13494
2022-04-22 06:29.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:29.51 [info     ] CQL_20220422061838: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.0004278683248971928, 'time_algorithm_update': 0.04778559290604784, 'temp_loss': 1.9811712134780222, 'temp': 0.3923166258142174, 'alpha_loss': -77.2867524692778, 'alpha': 4.444492659816852, 'critic_loss': 3705.9418902975976, 'actor_loss': 39.83154668422104, 'time_step': 0.048318519068591165, 'td_error': 3.1412974605422868, 'init_value': -40.84562301635742, 'ave_value': -40.83651988073343} step=13840
2022-04-22 06:29.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:30.08 [info     ] CQL_20220422061838: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.0004105809106992159, 'time_algorithm_update': 0.04695588178028261, 'temp_loss': 1.9361583893698764, 'temp': 0.3832395820087091, 'alpha_loss': -80.28782539147173, 'alpha': 4.617495498216221, 'critic_loss': 3466.31968380261, 'actor_loss': 40.44288091714672, 'time_step': 0.047463695437921956, 'td_error': 3.19303470326173, 'init_value': -41.391780853271484, 'ave_value': -41.39660458643892} step=14186
2022-04-22 06:30.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:30.25 [info     ] CQL_20220422061838: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.00040393206425484897, 'time_algorithm_update': 0.0467378783088199, 'temp_loss': 1.8912335265578561, 'temp': 0.3743722667067037, 'alpha_loss': -83.4225651956018, 'alpha': 4.797250528556074, 'critic_loss': 3385.0819599835168, 'actor_loss': 41.18949744604915, 'time_step': 0.04724397962493015, 'td_error': 3.2789771598100668, 'init_value': -42.45493698120117, 'ave_value': -42.42629941829068} step=14532
2022-04-22 06:30.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:30.41 [info     ] CQL_20220422061838: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.0003840351380364743, 'time_algorithm_update': 0.043752817060217004, 'temp_loss': 1.8474390399938374, 'temp': 0.36571075513183726, 'alpha_loss': -86.66993559026994, 'alpha': 4.983990299908412, 'critic_loss': 3497.4261093580653, 'actor_loss': 41.9971154604344, 'time_step': 0.04422879356869383, 'td_error': 3.339490885190911, 'init_value': -43.072532653808594, 'ave_value': -43.05979315702132} step=14878
2022-04-22 06:30.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:30.58 [info     ] CQL_20220422061838: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.00042344862326032165, 'time_algorithm_update': 0.04716303513918309, 'temp_loss': 1.8045268096675762, 'temp': 0.35725140580207626, 'alpha_loss': -90.03458589763311, 'alpha': 5.178014384528805, 'critic_loss': 3374.914451996026, 'actor_loss': 42.5775501273271, 'time_step': 0.04768676564872609, 'td_error': 3.3829681318127247, 'init_value': -43.515167236328125, 'ave_value': -43.51079723851949} step=15224
2022-04-22 06:30.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:31.15 [info     ] CQL_20220422061838: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.000408049263706097, 'time_algorithm_update': 0.047511112483250614, 'temp_loss': 1.7624950739689644, 'temp': 0.3489878883079297, 'alpha_loss': -93.54896417518572, 'alpha': 5.379567125629138, 'critic_loss': 3193.647033338602, 'actor_loss': 43.215775087389645, 'time_step': 0.0480152947365204, 'td_error': 3.44621822376302, 'init_value': -44.202392578125, 'ave_value': -44.19540298393181} step=15570
2022-04-22 06:31.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:31.32 [info     ] CQL_20220422061838: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.0004207040533165022, 'time_algorithm_update': 0.046857947559025934, 'temp_loss': 1.7215372430795879, 'temp': 0.3409177085567761, 'alpha_loss': -97.18120096460243, 'alpha': 5.588987776309769, 'critic_loss': 3276.986432555094, 'actor_loss': 44.05781380978623, 'time_step': 0.04738050664780457, 'td_error': 3.504780898325071, 'init_value': -44.76642990112305, 'ave_value': -44.77828025691257} step=15916
2022-04-22 06:31.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:31.49 [info     ] CQL_20220422061838: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.0004283355150608658, 'time_algorithm_update': 0.04712496051898581, 'temp_loss': 1.6821010539986494, 'temp': 0.33303152007519166, 'alpha_loss': -100.9637224715569, 'alpha': 5.806544885469999, 'critic_loss': 3097.245859487897, 'actor_loss': 44.5527339229694, 'time_step': 0.0476452622110444, 'td_error': 3.5664235613961304, 'init_value': -45.45671081542969, 'ave_value': -45.45474371659128} step=16262
2022-04-22 06:31.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:32.06 [info     ] CQL_20220422061838: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.0004018724309226681, 'time_algorithm_update': 0.04694412553930558, 'temp_loss': 1.643525696903295, 'temp': 0.32532648733585556, 'alpha_loss': -104.89560829559503, 'alpha': 6.032570490258277, 'critic_loss': 3175.3007897173047, 'actor_loss': 45.392806367378014, 'time_step': 0.04744251063793381, 'td_error': 3.6578580226573854, 'init_value': -46.425018310546875, 'ave_value': -46.41059881891329} step=16608
2022-04-22 06:32.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:32.23 [info     ] CQL_20220422061838: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.0004003261555136973, 'time_algorithm_update': 0.04651879092861462, 'temp_loss': 1.605405202490746, 'temp': 0.31779947515167944, 'alpha_loss': -108.98584392305054, 'alpha': 6.2674145767454466, 'critic_loss': 3233.3057445018967, 'actor_loss': 46.01874469470427, 'time_step': 0.047015440257298466, 'td_error': 3.7098829387032373, 'init_value': -46.92195510864258, 'ave_value': -46.919740674528484} step=16954
2022-04-22 06:32.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:32.41 [info     ] CQL_20220422061838: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.00042322743145716674, 'time_algorithm_update': 0.04831123214236574, 'temp_loss': 1.5681444106763498, 'temp': 0.3104473595329792, 'alpha_loss': -113.21874360542077, 'alpha': 6.511393862652641, 'critic_loss': 3241.7407515862083, 'actor_loss': 46.61527171713768, 'time_step': 0.04883799318633328, 'td_error': 3.7601055450950276, 'init_value': -47.4011116027832, 'ave_value': -47.4018829488596} step=17300
2022-04-22 06:32.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422061838/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.51910

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 06:32.42 [info     ] FQE_20220422063241: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.0001654883465134954, 'time_algorithm_update': 0.0053010403391826584, 'loss': 0.007394014403268874, 'time_step': 0.005543332502066371, 'init_value': -0.23955613374710083, 'ave_value': -0.22021585777401925, 'soft_opc': nan} step=166




2022-04-22 06:32.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:32.43 [info     ] FQE_20220422063241: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00016221080917909922, 'time_algorithm_update': 0.0052745686956198815, 'loss': 0.004507194153953299, 'time_step': 0.005506172237626041, 'init_value': -0.2853004038333893, 'ave_value': -0.22964938599784096, 'soft_opc': nan} step=332




2022-04-22 06:32.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:32.44 [info     ] FQE_20220422063241: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00016660862658397262, 'time_algorithm_update': 0.0045705947531275, 'loss': 0.003660359951465514, 'time_step': 0.004811888717743288, 'init_value': -0.31398680806159973, 'ave_value': -0.25115850541304363, 'soft_opc': nan} step=498




2022-04-22 06:32.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:32.45 [info     ] FQE_20220422063241: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00017098777265433805, 'time_algorithm_update': 0.00538980386343347, 'loss': 0.0033027231602099196, 'time_step': 0.00563626835145146, 'init_value': -0.3708973824977875, 'ave_value': -0.2804922565415099, 'soft_opc': nan} step=664




2022-04-22 06:32.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:32.46 [info     ] FQE_20220422063241: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00016759964356939476, 'time_algorithm_update': 0.005361651799764978, 'loss': 0.003056023053892226, 'time_step': 0.005605220794677734, 'init_value': -0.40401792526245117, 'ave_value': -0.2880391551433383, 'soft_opc': nan} step=830




2022-04-22 06:32.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:32.47 [info     ] FQE_20220422063241: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.0001655386154910168, 'time_algorithm_update': 0.005175165383212538, 'loss': 0.002796789557611323, 'time_step': 0.00541703528668507, 'init_value': -0.4539913535118103, 'ave_value': -0.31690757416591453, 'soft_opc': nan} step=996




2022-04-22 06:32.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:32.48 [info     ] FQE_20220422063241: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00016437093895601938, 'time_algorithm_update': 0.005263726395296763, 'loss': 0.002595806208225977, 'time_step': 0.005497567624930876, 'init_value': -0.4984634816646576, 'ave_value': -0.33782227258491626, 'soft_opc': nan} step=1162




2022-04-22 06:32.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:32.49 [info     ] FQE_20220422063241: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00016658564647996282, 'time_algorithm_update': 0.005319240581558411, 'loss': 0.0024531592842169016, 'time_step': 0.005563208855778338, 'init_value': -0.5699739456176758, 'ave_value': -0.38068453510803685, 'soft_opc': nan} step=1328




2022-04-22 06:32.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:32.50 [info     ] FQE_20220422063241: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.0001627422240843256, 'time_algorithm_update': 0.005209362650492105, 'loss': 0.002197533027487751, 'time_step': 0.005445985908967903, 'init_value': -0.6087121963500977, 'ave_value': -0.4057778387020032, 'soft_opc': nan} step=1494




2022-04-22 06:32.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:32.51 [info     ] FQE_20220422063241: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.0001639587333403438, 'time_algorithm_update': 0.005370575261403279, 'loss': 0.0020834954318040646, 'time_step': 0.00560746566358819, 'init_value': -0.6436282396316528, 'ave_value': -0.42922819997142025, 'soft_opc': nan} step=1660




2022-04-22 06:32.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:32.52 [info     ] FQE_20220422063241: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00016565495226756637, 'time_algorithm_update': 0.005448423236249441, 'loss': 0.002020978805025568, 'time_step': 0.005688525107969721, 'init_value': -0.6860427260398865, 'ave_value': -0.45635900239015487, 'soft_opc': nan} step=1826




2022-04-22 06:32.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:32.53 [info     ] FQE_20220422063241: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00016968939677778497, 'time_algorithm_update': 0.005374140050037798, 'loss': 0.0020582275791233965, 'time_step': 0.005623741322253124, 'init_value': -0.6991065740585327, 'ave_value': -0.4518996713112469, 'soft_opc': nan} step=1992




2022-04-22 06:32.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:32.54 [info     ] FQE_20220422063241: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00016409948647740376, 'time_algorithm_update': 0.004649450980037092, 'loss': 0.002163614230216425, 'time_step': 0.004887735987284097, 'init_value': -0.8183069229125977, 'ave_value': -0.5561184630318133, 'soft_opc': nan} step=2158




2022-04-22 06:32.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:32.55 [info     ] FQE_20220422063241: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.0001635967967021896, 'time_algorithm_update': 0.005232954599771155, 'loss': 0.0021160548010768078, 'time_step': 0.005469945539911109, 'init_value': -0.8354558944702148, 'ave_value': -0.5598747073462954, 'soft_opc': nan} step=2324




2022-04-22 06:32.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:32.56 [info     ] FQE_20220422063241: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00016303234789744918, 'time_algorithm_update': 0.005177104329488364, 'loss': 0.0022270610922083527, 'time_step': 0.00541529741631933, 'init_value': -0.8798667192459106, 'ave_value': -0.5967756239445628, 'soft_opc': nan} step=2490




2022-04-22 06:32.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:32.57 [info     ] FQE_20220422063241: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00016884775046842643, 'time_algorithm_update': 0.005339270614715944, 'loss': 0.0021506071026529557, 'time_step': 0.005584725414414004, 'init_value': -0.9238816499710083, 'ave_value': -0.6360111661490288, 'soft_opc': nan} step=2656




2022-04-22 06:32.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:32.58 [info     ] FQE_20220422063241: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001727040991725692, 'time_algorithm_update': 0.0053772753979786335, 'loss': 0.00225435485866998, 'time_step': 0.0056264371757047725, 'init_value': -0.9759098291397095, 'ave_value': -0.6690300005949564, 'soft_opc': nan} step=2822




2022-04-22 06:32.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:32.59 [info     ] FQE_20220422063241: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00016519391393086998, 'time_algorithm_update': 0.00536609270486487, 'loss': 0.0023412426627880656, 'time_step': 0.005603008959666792, 'init_value': -0.9921055436134338, 'ave_value': -0.6939254066227256, 'soft_opc': nan} step=2988




2022-04-22 06:32.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.00 [info     ] FQE_20220422063241: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00017637086201863117, 'time_algorithm_update': 0.005338867026639272, 'loss': 0.002650218374090247, 'time_step': 0.00558751175202519, 'init_value': -1.0246284008026123, 'ave_value': -0.7103175364887795, 'soft_opc': nan} step=3154




2022-04-22 06:33.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.01 [info     ] FQE_20220422063241: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00016051746276487787, 'time_algorithm_update': 0.00516890186861337, 'loss': 0.002788942516395676, 'time_step': 0.00540447522358722, 'init_value': -1.0892844200134277, 'ave_value': -0.75163168657806, 'soft_opc': nan} step=3320




2022-04-22 06:33.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.02 [info     ] FQE_20220422063241: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00016629695892333984, 'time_algorithm_update': 0.005206738609865487, 'loss': 0.0028537744535231425, 'time_step': 0.0054477424506681514, 'init_value': -1.0845595598220825, 'ave_value': -0.755515981348953, 'soft_opc': nan} step=3486




2022-04-22 06:33.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.03 [info     ] FQE_20220422063241: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00016001764550266495, 'time_algorithm_update': 0.004947073488350374, 'loss': 0.0030459062966338574, 'time_step': 0.005185318280415362, 'init_value': -1.1455191373825073, 'ave_value': -0.8054340917306642, 'soft_opc': nan} step=3652




2022-04-22 06:33.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.04 [info     ] FQE_20220422063241: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00015898210456572384, 'time_algorithm_update': 0.00504648110952722, 'loss': 0.0031231470451289116, 'time_step': 0.005281501505748335, 'init_value': -1.2441413402557373, 'ave_value': -0.8891563211542529, 'soft_opc': nan} step=3818




2022-04-22 06:33.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.05 [info     ] FQE_20220422063241: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00015983811344008847, 'time_algorithm_update': 0.005193387169435799, 'loss': 0.0034061920252651066, 'time_step': 0.005428966269435653, 'init_value': -1.2678275108337402, 'ave_value': -0.905878885060146, 'soft_opc': nan} step=3984




2022-04-22 06:33.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.06 [info     ] FQE_20220422063241: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00016238315995917264, 'time_algorithm_update': 0.005237623869654644, 'loss': 0.0038146997243727007, 'time_step': 0.005476200436971274, 'init_value': -1.305140733718872, 'ave_value': -0.9210788214914837, 'soft_opc': nan} step=4150




2022-04-22 06:33.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.07 [info     ] FQE_20220422063241: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00016161907150084713, 'time_algorithm_update': 0.005233517612319395, 'loss': 0.004090764994960256, 'time_step': 0.005472996148718409, 'init_value': -1.3879083395004272, 'ave_value': -0.9804589052424506, 'soft_opc': nan} step=4316




2022-04-22 06:33.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.08 [info     ] FQE_20220422063241: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00016458781368761178, 'time_algorithm_update': 0.005251656095665622, 'loss': 0.004224212743245144, 'time_step': 0.005490353308528303, 'init_value': -1.4171630144119263, 'ave_value': -1.0135470339659232, 'soft_opc': nan} step=4482




2022-04-22 06:33.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.09 [info     ] FQE_20220422063241: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001675824084913874, 'time_algorithm_update': 0.005403913647295481, 'loss': 0.004641146981332687, 'time_step': 0.0056508406099066676, 'init_value': -1.4678006172180176, 'ave_value': -1.0352800351810885, 'soft_opc': nan} step=4648




2022-04-22 06:33.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.10 [info     ] FQE_20220422063241: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00016483484980571702, 'time_algorithm_update': 0.005428226597337838, 'loss': 0.004882579272948149, 'time_step': 0.0056710257587662665, 'init_value': -1.5065526962280273, 'ave_value': -1.0723128701333662, 'soft_opc': nan} step=4814




2022-04-22 06:33.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.11 [info     ] FQE_20220422063241: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00016261870602527297, 'time_algorithm_update': 0.004978136843945606, 'loss': 0.0051198867773805485, 'time_step': 0.005214441253478269, 'init_value': -1.5385409593582153, 'ave_value': -1.1264041927651511, 'soft_opc': nan} step=4980




2022-04-22 06:33.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.12 [info     ] FQE_20220422063241: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00016578421535262143, 'time_algorithm_update': 0.005328020417546651, 'loss': 0.005394277449505101, 'time_step': 0.005574368568788092, 'init_value': -1.5477815866470337, 'ave_value': -1.139664940819614, 'soft_opc': nan} step=5146




2022-04-22 06:33.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.13 [info     ] FQE_20220422063241: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00015949197562344103, 'time_algorithm_update': 0.004581505993762648, 'loss': 0.005778845883162513, 'time_step': 0.004815927471023008, 'init_value': -1.625589370727539, 'ave_value': -1.2040208313748375, 'soft_opc': nan} step=5312




2022-04-22 06:33.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.14 [info     ] FQE_20220422063241: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00016500576432928982, 'time_algorithm_update': 0.005328945366733046, 'loss': 0.006024827006874685, 'time_step': 0.005575656890869141, 'init_value': -1.636886715888977, 'ave_value': -1.2165781749982958, 'soft_opc': nan} step=5478




2022-04-22 06:33.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.15 [info     ] FQE_20220422063241: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00016182301992393402, 'time_algorithm_update': 0.005265745771936624, 'loss': 0.00629912463580651, 'time_step': 0.005503572613359934, 'init_value': -1.6947016716003418, 'ave_value': -1.270011883044431, 'soft_opc': nan} step=5644




2022-04-22 06:33.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.16 [info     ] FQE_20220422063241: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00015819647225988917, 'time_algorithm_update': 0.005293950977095638, 'loss': 0.006768983443224163, 'time_step': 0.005526681980454778, 'init_value': -1.6937938928604126, 'ave_value': -1.2737156132873786, 'soft_opc': nan} step=5810




2022-04-22 06:33.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.17 [info     ] FQE_20220422063241: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00016320182616452137, 'time_algorithm_update': 0.005276668502623777, 'loss': 0.006629259021551606, 'time_step': 0.005516602332333484, 'init_value': -1.6643719673156738, 'ave_value': -1.2343962450348143, 'soft_opc': nan} step=5976




2022-04-22 06:33.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.18 [info     ] FQE_20220422063241: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.0001650517245373094, 'time_algorithm_update': 0.005330937454499394, 'loss': 0.007191379556673613, 'time_step': 0.005571233220847256, 'init_value': -1.7091341018676758, 'ave_value': -1.2756365426111262, 'soft_opc': nan} step=6142




2022-04-22 06:33.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.19 [info     ] FQE_20220422063241: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00015881406255515226, 'time_algorithm_update': 0.005124877734356616, 'loss': 0.007422054035158503, 'time_step': 0.005362680159419416, 'init_value': -1.7537729740142822, 'ave_value': -1.3076022736006627, 'soft_opc': nan} step=6308




2022-04-22 06:33.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.20 [info     ] FQE_20220422063241: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.0001653662647109434, 'time_algorithm_update': 0.005046709474310817, 'loss': 0.007649702106249036, 'time_step': 0.005286526967243976, 'init_value': -1.8019535541534424, 'ave_value': -1.3581969563706635, 'soft_opc': nan} step=6474




2022-04-22 06:33.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.21 [info     ] FQE_20220422063241: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00016282409070486045, 'time_algorithm_update': 0.005300168531486787, 'loss': 0.007842035395905365, 'time_step': 0.005542248128408409, 'init_value': -1.7965221405029297, 'ave_value': -1.3482990997050561, 'soft_opc': nan} step=6640




2022-04-22 06:33.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.22 [info     ] FQE_20220422063241: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.0001579795975282968, 'time_algorithm_update': 0.005192722182676017, 'loss': 0.008254524430467647, 'time_step': 0.005423785692237946, 'init_value': -1.8276617527008057, 'ave_value': -1.3874465374729117, 'soft_opc': nan} step=6806




2022-04-22 06:33.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.22 [info     ] FQE_20220422063241: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00015735482595053064, 'time_algorithm_update': 0.004569197275552405, 'loss': 0.008294293185673565, 'time_step': 0.004802056105740099, 'init_value': -1.8457754850387573, 'ave_value': -1.4045061871845712, 'soft_opc': nan} step=6972




2022-04-22 06:33.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.23 [info     ] FQE_20220422063241: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00016669192946100808, 'time_algorithm_update': 0.005361779626593532, 'loss': 0.008846240173197767, 'time_step': 0.005604363349546869, 'init_value': -1.8242614269256592, 'ave_value': -1.3965873250251148, 'soft_opc': nan} step=7138




2022-04-22 06:33.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.24 [info     ] FQE_20220422063241: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00016448583947606833, 'time_algorithm_update': 0.005294127636645214, 'loss': 0.008832379421345463, 'time_step': 0.005537811532078019, 'init_value': -1.8842295408248901, 'ave_value': -1.453062554888914, 'soft_opc': nan} step=7304




2022-04-22 06:33.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.25 [info     ] FQE_20220422063241: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00016816696488713645, 'time_algorithm_update': 0.005246577492679458, 'loss': 0.009565149434962792, 'time_step': 0.005488756191299622, 'init_value': -1.9492385387420654, 'ave_value': -1.5150090754040526, 'soft_opc': nan} step=7470




2022-04-22 06:33.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.26 [info     ] FQE_20220422063241: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00015940005520740188, 'time_algorithm_update': 0.005132390792111316, 'loss': 0.009744338658273625, 'time_step': 0.005363918212522943, 'init_value': -2.043928623199463, 'ave_value': -1.6183043667157222, 'soft_opc': nan} step=7636




2022-04-22 06:33.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.27 [info     ] FQE_20220422063241: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.0001672520694962467, 'time_algorithm_update': 0.005166283573012754, 'loss': 0.010376087699859706, 'time_step': 0.005408870168479092, 'init_value': -2.050549030303955, 'ave_value': -1.6445335801120278, 'soft_opc': nan} step=7802




2022-04-22 06:33.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.28 [info     ] FQE_20220422063241: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.0001612600073756942, 'time_algorithm_update': 0.005177006664046322, 'loss': 0.01031942186292951, 'time_step': 0.0054150001112237035, 'init_value': -1.9891407489776611, 'ave_value': -1.5869689662512896, 'soft_opc': nan} step=7968




2022-04-22 06:33.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.29 [info     ] FQE_20220422063241: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.0001626474311552852, 'time_algorithm_update': 0.005317222641175051, 'loss': 0.010660266087822482, 'time_step': 0.005554232252649514, 'init_value': -2.0384559631347656, 'ave_value': -1.6451257430805681, 'soft_opc': nan} step=8134




2022-04-22 06:33.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:33.30 [info     ] FQE_20220422063241: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.0001771277691944536, 'time_algorithm_update': 0.005251518215041563, 'loss': 0.010979041194365113, 'time_step': 0.005501586270619588, 'init_value': -1.9843676090240479, 'ave_value': -1.5872364508617367, 'soft_opc': nan} step=8300




2022-04-22 06:33.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063241/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-22 06:33.31 [info     ] Directory is created at d3rlpy_logs/FQE_20220422063331
2022-04-22 06:33.31 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 06:33.31 [debug    ] Building models...
2022-04-22 06:33.31 [debug    ] Models have been built.
2022-04-22 06:33.31 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422063331/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 06:33.33 [info     ] FQE_20220422063331: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00016443673954453578, 'time_algorithm_update': 0.004917562700981318, 'loss': 0.023277481812117404, 'time_step': 0.005157334166903829, 'init_value': -0.9611395597457886, 'ave_value': -0.9665850090483824, 'soft_opc': nan} step=344




2022-04-22 06:33.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:33.35 [info     ] FQE_20220422063331: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00016651319902996685, 'time_algorithm_update': 0.005262727654257486, 'loss': 0.021175057001866747, 'time_step': 0.005505855693373569, 'init_value': -1.5997754335403442, 'ave_value': -1.5819848306409947, 'soft_opc': nan} step=688




2022-04-22 06:33.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:33.37 [info     ] FQE_20220422063331: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00016755073569541754, 'time_algorithm_update': 0.0053021665229353795, 'loss': 0.024697223445400596, 'time_step': 0.005544541187064592, 'init_value': -2.3631591796875, 'ave_value': -2.309909350794178, 'soft_opc': nan} step=1032




2022-04-22 06:33.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:33.39 [info     ] FQE_20220422063331: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00016858896543813306, 'time_algorithm_update': 0.00527456194855446, 'loss': 0.02693498719126246, 'time_step': 0.005516555420188017, 'init_value': -2.7919301986694336, 'ave_value': -2.712879681640917, 'soft_opc': nan} step=1376




2022-04-22 06:33.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:33.41 [info     ] FQE_20220422063331: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00016519080760867097, 'time_algorithm_update': 0.0050150481767432635, 'loss': 0.0332275572866418, 'time_step': 0.005253913097603377, 'init_value': -3.3726320266723633, 'ave_value': -3.333642193299156, 'soft_opc': nan} step=1720




2022-04-22 06:33.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:33.43 [info     ] FQE_20220422063331: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00017276267672694005, 'time_algorithm_update': 0.005147580490555874, 'loss': 0.03895950268650817, 'time_step': 0.005395138679548751, 'init_value': -3.6286048889160156, 'ave_value': -3.747659544821258, 'soft_opc': nan} step=2064




2022-04-22 06:33.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:33.45 [info     ] FQE_20220422063331: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00017215692719747854, 'time_algorithm_update': 0.005163241264431975, 'loss': 0.04781245713685314, 'time_step': 0.005412561948909316, 'init_value': -4.01941442489624, 'ave_value': -4.41882441239851, 'soft_opc': nan} step=2408




2022-04-22 06:33.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:33.47 [info     ] FQE_20220422063331: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.0001737364502840264, 'time_algorithm_update': 0.005288774884024332, 'loss': 0.05833651735122467, 'time_step': 0.0055418548195861105, 'init_value': -4.159250259399414, 'ave_value': -4.9687281629120985, 'soft_opc': nan} step=2752




2022-04-22 06:33.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:33.49 [info     ] FQE_20220422063331: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00017138137373813364, 'time_algorithm_update': 0.0051545376001402386, 'loss': 0.06783555329744813, 'time_step': 0.005399449620135995, 'init_value': -4.241888999938965, 'ave_value': -5.384755570522032, 'soft_opc': nan} step=3096




2022-04-22 06:33.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:33.51 [info     ] FQE_20220422063331: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00016771984654803608, 'time_algorithm_update': 0.004890087732048922, 'loss': 0.08756166254170239, 'time_step': 0.0051352180713830995, 'init_value': -4.665382385253906, 'ave_value': -6.181966185140181, 'soft_opc': nan} step=3440




2022-04-22 06:33.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:33.53 [info     ] FQE_20220422063331: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00016942204431045886, 'time_algorithm_update': 0.005207838707192, 'loss': 0.09604634544378969, 'time_step': 0.0054564517597819485, 'init_value': -5.187653541564941, 'ave_value': -6.8623562856703195, 'soft_opc': nan} step=3784




2022-04-22 06:33.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:33.55 [info     ] FQE_20220422063331: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00017870442811832873, 'time_algorithm_update': 0.005263903806375903, 'loss': 0.1161620555348088, 'time_step': 0.005521425674127978, 'init_value': -5.935173511505127, 'ave_value': -7.765624493494764, 'soft_opc': nan} step=4128




2022-04-22 06:33.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:33.57 [info     ] FQE_20220422063331: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00016974432523860487, 'time_algorithm_update': 0.005278495855109636, 'loss': 0.1283488352024971, 'time_step': 0.005524737197299337, 'init_value': -6.651910781860352, 'ave_value': -8.458717552820842, 'soft_opc': nan} step=4472




2022-04-22 06:33.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:33.59 [info     ] FQE_20220422063331: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00017008601233016614, 'time_algorithm_update': 0.005155726227649423, 'loss': 0.14594084434319549, 'time_step': 0.005401901727498964, 'init_value': -7.4995012283325195, 'ave_value': -9.402947307988867, 'soft_opc': nan} step=4816




2022-04-22 06:33.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.01 [info     ] FQE_20220422063331: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.0001667211222094159, 'time_algorithm_update': 0.004974011071892672, 'loss': 0.16574894210735205, 'time_step': 0.005217493966568348, 'init_value': -7.9946746826171875, 'ave_value': -9.762097793313263, 'soft_opc': nan} step=5160




2022-04-22 06:34.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.03 [info     ] FQE_20220422063331: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00017784224000088003, 'time_algorithm_update': 0.005284811175146768, 'loss': 0.18539715773794194, 'time_step': 0.005539908658626468, 'init_value': -8.940781593322754, 'ave_value': -10.608764930441346, 'soft_opc': nan} step=5504




2022-04-22 06:34.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.05 [info     ] FQE_20220422063331: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.0001719372217045274, 'time_algorithm_update': 0.0052165985107421875, 'loss': 0.2141062639989392, 'time_step': 0.005465292653372121, 'init_value': -9.295207977294922, 'ave_value': -10.950815040267466, 'soft_opc': nan} step=5848




2022-04-22 06:34.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.07 [info     ] FQE_20220422063331: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00017128780830738156, 'time_algorithm_update': 0.005245016757832017, 'loss': 0.24200953191774355, 'time_step': 0.005495496267496154, 'init_value': -9.865352630615234, 'ave_value': -11.540586109599099, 'soft_opc': nan} step=6192




2022-04-22 06:34.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.09 [info     ] FQE_20220422063331: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00017860878345578215, 'time_algorithm_update': 0.005196976107220317, 'loss': 0.26379750044150063, 'time_step': 0.005450324263683585, 'init_value': -10.45315170288086, 'ave_value': -12.181268427709142, 'soft_opc': nan} step=6536




2022-04-22 06:34.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.11 [info     ] FQE_20220422063331: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00017137929450633914, 'time_algorithm_update': 0.004981758289558943, 'loss': 0.27800830973983676, 'time_step': 0.0052302167859188346, 'init_value': -10.750938415527344, 'ave_value': -12.456622603941742, 'soft_opc': nan} step=6880




2022-04-22 06:34.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.13 [info     ] FQE_20220422063331: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.0001685480738795081, 'time_algorithm_update': 0.005171081354451734, 'loss': 0.29708425129417243, 'time_step': 0.0054149343523868295, 'init_value': -11.192581176757812, 'ave_value': -12.899593339378837, 'soft_opc': nan} step=7224




2022-04-22 06:34.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.15 [info     ] FQE_20220422063331: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.000168136385984199, 'time_algorithm_update': 0.0052068656267121784, 'loss': 0.3155219069855331, 'time_step': 0.0054530009280803595, 'init_value': -11.611982345581055, 'ave_value': -13.505704694655766, 'soft_opc': nan} step=7568




2022-04-22 06:34.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.17 [info     ] FQE_20220422063331: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00017451338989790097, 'time_algorithm_update': 0.005248651948086051, 'loss': 0.32997196492045944, 'time_step': 0.00550083504166714, 'init_value': -11.798624038696289, 'ave_value': -13.629501786981793, 'soft_opc': nan} step=7912




2022-04-22 06:34.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.19 [info     ] FQE_20220422063331: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00016832213069117345, 'time_algorithm_update': 0.005052630984505942, 'loss': 0.3436171419398729, 'time_step': 0.005296560914017433, 'init_value': -11.902098655700684, 'ave_value': -13.814691854081838, 'soft_opc': nan} step=8256




2022-04-22 06:34.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.21 [info     ] FQE_20220422063331: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00016967917597571084, 'time_algorithm_update': 0.00508374946061955, 'loss': 0.35308640187627877, 'time_step': 0.005328725936800935, 'init_value': -11.521711349487305, 'ave_value': -13.748107707451794, 'soft_opc': nan} step=8600




2022-04-22 06:34.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.23 [info     ] FQE_20220422063331: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00018163614494856015, 'time_algorithm_update': 0.005349847466446633, 'loss': 0.36498067426213693, 'time_step': 0.0056069216062856276, 'init_value': -11.646284103393555, 'ave_value': -13.989712036496577, 'soft_opc': nan} step=8944




2022-04-22 06:34.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.25 [info     ] FQE_20220422063331: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00016832767530929212, 'time_algorithm_update': 0.00526018267454103, 'loss': 0.3693251627995524, 'time_step': 0.005505628364030705, 'init_value': -11.790249824523926, 'ave_value': -14.368316096220132, 'soft_opc': nan} step=9288




2022-04-22 06:34.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.27 [info     ] FQE_20220422063331: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00017253188199775163, 'time_algorithm_update': 0.005208030689594357, 'loss': 0.3712310960765408, 'time_step': 0.005459634370582048, 'init_value': -11.9557523727417, 'ave_value': -14.701084551627135, 'soft_opc': nan} step=9632




2022-04-22 06:34.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.29 [info     ] FQE_20220422063331: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.0001651180344958638, 'time_algorithm_update': 0.0048045543737189715, 'loss': 0.3726654224460511, 'time_step': 0.005047379538070324, 'init_value': -11.66316032409668, 'ave_value': -14.653543276239715, 'soft_opc': nan} step=9976




2022-04-22 06:34.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.31 [info     ] FQE_20220422063331: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00016796311666799146, 'time_algorithm_update': 0.0052744337292604665, 'loss': 0.37006199507172716, 'time_step': 0.005518218112546344, 'init_value': -11.738598823547363, 'ave_value': -14.7615331892566, 'soft_opc': nan} step=10320




2022-04-22 06:34.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.33 [info     ] FQE_20220422063331: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00016572586325711982, 'time_algorithm_update': 0.005188594030779462, 'loss': 0.3749076373615237, 'time_step': 0.005432218313217163, 'init_value': -11.740172386169434, 'ave_value': -14.686030655464489, 'soft_opc': nan} step=10664




2022-04-22 06:34.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.35 [info     ] FQE_20220422063331: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00017064324645108954, 'time_algorithm_update': 0.005208268415096195, 'loss': 0.3650843463946394, 'time_step': 0.005458766637846481, 'init_value': -11.517082214355469, 'ave_value': -14.588028998810458, 'soft_opc': nan} step=11008




2022-04-22 06:34.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.37 [info     ] FQE_20220422063331: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00017018789468809615, 'time_algorithm_update': 0.0052394492681636365, 'loss': 0.37367969664181905, 'time_step': 0.00548412425573482, 'init_value': -11.448806762695312, 'ave_value': -14.805080118794036, 'soft_opc': nan} step=11352




2022-04-22 06:34.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.39 [info     ] FQE_20220422063331: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.0001683623291725336, 'time_algorithm_update': 0.0048275042411892915, 'loss': 0.3527611654717475, 'time_step': 0.0050719241763270174, 'init_value': -11.421494483947754, 'ave_value': -14.757078652807957, 'soft_opc': nan} step=11696




2022-04-22 06:34.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.41 [info     ] FQE_20220422063331: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.0001685349054114763, 'time_algorithm_update': 0.005224710980127024, 'loss': 0.3640195614452539, 'time_step': 0.0054713321286578515, 'init_value': -11.536298751831055, 'ave_value': -14.990044070789297, 'soft_opc': nan} step=12040




2022-04-22 06:34.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.43 [info     ] FQE_20220422063331: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00017023848932842876, 'time_algorithm_update': 0.005324646484020145, 'loss': 0.3698046055465374, 'time_step': 0.005573688551437023, 'init_value': -11.639732360839844, 'ave_value': -15.1911443522931, 'soft_opc': nan} step=12384




2022-04-22 06:34.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.45 [info     ] FQE_20220422063331: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00016449565111204635, 'time_algorithm_update': 0.005089938640594482, 'loss': 0.3457713227686581, 'time_step': 0.005330461402272069, 'init_value': -11.74222183227539, 'ave_value': -15.550537810553504, 'soft_opc': nan} step=12728




2022-04-22 06:34.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.47 [info     ] FQE_20220422063331: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.0001699543276498484, 'time_algorithm_update': 0.005109299753987512, 'loss': 0.3691604140615307, 'time_step': 0.005358277365218761, 'init_value': -11.136627197265625, 'ave_value': -14.948216451324212, 'soft_opc': nan} step=13072




2022-04-22 06:34.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.49 [info     ] FQE_20220422063331: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.0001715740492177564, 'time_algorithm_update': 0.005087637624075246, 'loss': 0.3702348049941234, 'time_step': 0.005338643179383389, 'init_value': -11.454558372497559, 'ave_value': -15.175009055975993, 'soft_opc': nan} step=13416




2022-04-22 06:34.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.51 [info     ] FQE_20220422063331: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00017216385797012683, 'time_algorithm_update': 0.0052634955838669175, 'loss': 0.3744375867888245, 'time_step': 0.005512996468433114, 'init_value': -11.44385051727295, 'ave_value': -15.366877617348228, 'soft_opc': nan} step=13760




2022-04-22 06:34.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.53 [info     ] FQE_20220422063331: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.0001674190510150998, 'time_algorithm_update': 0.0052229713561923, 'loss': 0.3729906718078784, 'time_step': 0.005467209012009377, 'init_value': -11.610311508178711, 'ave_value': -15.396109150081815, 'soft_opc': nan} step=14104




2022-04-22 06:34.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.55 [info     ] FQE_20220422063331: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.000171698110048161, 'time_algorithm_update': 0.005195173413254494, 'loss': 0.37020498503616817, 'time_step': 0.0054452204427053764, 'init_value': -11.148542404174805, 'ave_value': -14.950526276486297, 'soft_opc': nan} step=14448




2022-04-22 06:34.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.57 [info     ] FQE_20220422063331: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00016665874525558118, 'time_algorithm_update': 0.004549800656562628, 'loss': 0.38047017259956445, 'time_step': 0.004793606525243715, 'init_value': -10.916234970092773, 'ave_value': -14.763376643607259, 'soft_opc': nan} step=14792




2022-04-22 06:34.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:34.59 [info     ] FQE_20220422063331: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00016768519268479457, 'time_algorithm_update': 0.003874809936035511, 'loss': 0.3860278405427803, 'time_step': 0.0041179234205290325, 'init_value': -11.118685722351074, 'ave_value': -14.650847255891222, 'soft_opc': nan} step=15136




2022-04-22 06:34.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:35.00 [info     ] FQE_20220422063331: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00015939876090648563, 'time_algorithm_update': 0.0038076729275459465, 'loss': 0.39088782188915755, 'time_step': 0.004040090150611345, 'init_value': -10.954627990722656, 'ave_value': -14.634824220609206, 'soft_opc': nan} step=15480




2022-04-22 06:35.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:35.02 [info     ] FQE_20220422063331: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00016350940216419308, 'time_algorithm_update': 0.0038186242414075273, 'loss': 0.40140308973346944, 'time_step': 0.004054353680721549, 'init_value': -11.417021751403809, 'ave_value': -14.97253815640427, 'soft_opc': nan} step=15824




2022-04-22 06:35.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:35.03 [info     ] FQE_20220422063331: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00016675508299539255, 'time_algorithm_update': 0.0038729102112526116, 'loss': 0.41328803606082276, 'time_step': 0.0041135029737339466, 'init_value': -11.518567085266113, 'ave_value': -15.035832178910045, 'soft_opc': nan} step=16168




2022-04-22 06:35.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:35.05 [info     ] FQE_20220422063331: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.0001654008100199145, 'time_algorithm_update': 0.003985903290815131, 'loss': 0.41337903471511983, 'time_step': 0.004228980042213617, 'init_value': -11.759912490844727, 'ave_value': -14.990982766945633, 'soft_opc': nan} step=16512




2022-04-22 06:35.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:35.06 [info     ] FQE_20220422063331: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00016388297080993652, 'time_algorithm_update': 0.0038257878880168118, 'loss': 0.4343793832393753, 'time_step': 0.0040648295435794565, 'init_value': -12.015647888183594, 'ave_value': -15.034988678843916, 'soft_opc': nan} step=16856




2022-04-22 06:35.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:35.08 [info     ] FQE_20220422063331: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00017305723456449286, 'time_algorithm_update': 0.00401155477346376, 'loss': 0.444055629956406, 'time_step': 0.004262825777364331, 'init_value': -11.637921333312988, 'ave_value': -14.809577294876316, 'soft_opc': nan} step=17200




2022-04-22 06:35.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422063331/model_17200.pt
search iteration:  26
using hyper params:  [0.004452690457406191, 0.008162127740581532, 6.430683596969691e-05, 7]
2022-04-22 06:35.08 [debug    ] RoundIterator is selected.
2022-04-22 06:35.08 [info     ] Directory is created at d3rlpy_logs/CQL_20220422063508
2022-04-22 06:35.08 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 06:35.08 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 06:35.08 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422063508/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.004452690457406191, 'actor_optim_factory': {'optim

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:35.23 [info     ] CQL_20220422063508: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.0004391876948362141, 'time_algorithm_update': 0.04240203937354115, 'temp_loss': 4.929104365365354, 'temp': 0.9886671184115328, 'alpha_loss': -17.742951183649847, 'alpha': 1.0177063266665949, 'critic_loss': 150.36107523041653, 'actor_loss': 7.0008617013132985, 'time_step': 0.042942446780342584, 'td_error': 1.489395187150195, 'init_value': -11.962594985961914, 'ave_value': -11.003144373821387} step=346
2022-04-22 06:35.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:35.39 [info     ] CQL_20220422063508: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.0004251251330954491, 'time_algorithm_update': 0.04221865758730497, 'temp_loss': 4.87995388604313, 'temp': 0.9668539552330282, 'alpha_loss': -18.400641540571446, 'alpha': 1.0541569266705155, 'critic_loss': 270.3257944184232, 'actor_loss': 13.76754283078144, 'time_step': 0.042742699556957094, 'td_error': 1.579003839006169, 'init_value': -15.479243278503418, 'ave_value': -14.454894649886361} step=692
2022-04-22 06:35.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:35.55 [info     ] CQL_20220422063508: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.0004334215483913532, 'time_algorithm_update': 0.043915468144279, 'temp_loss': 4.771546737307069, 'temp': 0.9458857552509088, 'alpha_loss': -19.05173211290657, 'alpha': 1.0924128597182345, 'critic_loss': 553.9964097745157, 'actor_loss': 15.658091806951974, 'time_step': 0.04445455460190084, 'td_error': 1.481244253557081, 'init_value': -14.228529930114746, 'ave_value': -13.472475691167254} step=1038
2022-04-22 06:35.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:36.11 [info     ] CQL_20220422063508: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.00044064094565507303, 'time_algorithm_update': 0.04387800955358957, 'temp_loss': 4.67194479876171, 'temp': 0.9255719755081772, 'alpha_loss': -19.71208878886493, 'alpha': 1.132528973108082, 'critic_loss': 997.0317428677068, 'actor_loss': 11.043213054623907, 'time_step': 0.04442153087241112, 'td_error': 1.3314425826787055, 'init_value': -8.805892944335938, 'ave_value': -8.575495339864348} step=1384
2022-04-22 06:36.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:36.26 [info     ] CQL_20220422063508: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.00043050333254599155, 'time_algorithm_update': 0.043664252826933224, 'temp_loss': 4.572716382197562, 'temp': 0.9058374554091106, 'alpha_loss': -20.42675408600383, 'alpha': 1.1745448577610744, 'critic_loss': 1541.7006391403993, 'actor_loss': 6.690618548090058, 'time_step': 0.04419821673045958, 'td_error': 1.3308979936833907, 'init_value': -7.818098545074463, 'ave_value': -7.713062520725798} step=1730
2022-04-22 06:36.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:36.42 [info     ] CQL_20220422063508: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.000431846331998792, 'time_algorithm_update': 0.04382989999186786, 'temp_loss': 4.477434348508802, 'temp': 0.8866404362152077, 'alpha_loss': -21.18928499717933, 'alpha': 1.218543473695744, 'critic_loss': 2038.7006899442288, 'actor_loss': 6.5115887057574495, 'time_step': 0.04436396312162366, 'td_error': 1.3461013720692916, 'init_value': -8.265576362609863, 'ave_value': -8.195317980258483} step=2076
2022-04-22 06:36.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:36.58 [info     ] CQL_20220422063508: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.0004353164937454841, 'time_algorithm_update': 0.04330532743751658, 'temp_loss': 4.382817499899451, 'temp': 0.8679468066706134, 'alpha_loss': -21.992212852301627, 'alpha': 1.2645677018027774, 'critic_loss': 2489.8211980389724, 'actor_loss': 7.05165405356126, 'time_step': 0.04384299440880042, 'td_error': 1.3657540524967964, 'init_value': -9.01491641998291, 'ave_value': -8.962944871161774} step=2422
2022-04-22 06:36.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:37.14 [info     ] CQL_20220422063508: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.00042722473254782614, 'time_algorithm_update': 0.0433108634342348, 'temp_loss': 4.29126206298784, 'temp': 0.8497221032319041, 'alpha_loss': -22.827572684756593, 'alpha': 1.312653861982974, 'critic_loss': 2919.147496923546, 'actor_loss': 7.789305108131011, 'time_step': 0.04383844585087947, 'td_error': 1.384275038113526, 'init_value': -9.618117332458496, 'ave_value': -9.583831995389314} step=2768
2022-04-22 06:37.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:37.31 [info     ] CQL_20220422063508: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.00043262291505846674, 'time_algorithm_update': 0.04723819073914103, 'temp_loss': 4.200695214243982, 'temp': 0.8319432029145302, 'alpha_loss': -23.699692952150553, 'alpha': 1.3628305637767548, 'critic_loss': 3329.6217894802203, 'actor_loss': 8.632568665322541, 'time_step': 0.047775880449769124, 'td_error': 1.4081367541395153, 'init_value': -10.484967231750488, 'ave_value': -10.457918581774926} step=3114
2022-04-22 06:37.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:37.48 [info     ] CQL_20220422063508: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.00046066672815752856, 'time_algorithm_update': 0.04634038423527183, 'temp_loss': 4.113886126204033, 'temp': 0.8145817623317586, 'alpha_loss': -24.611621261332076, 'alpha': 1.415148668206496, 'critic_loss': 3704.1423643255507, 'actor_loss': 9.612208275436666, 'time_step': 0.04690549552785179, 'td_error': 1.439533843197943, 'init_value': -11.636503219604492, 'ave_value': -11.60438487042517} step=3460
2022-04-22 06:37.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:38.05 [info     ] CQL_20220422063508: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.00043440691997550124, 'time_algorithm_update': 0.04713312945613971, 'temp_loss': 4.027888384857619, 'temp': 0.7976189777685728, 'alpha_loss': -25.55576047180705, 'alpha': 1.469650514553048, 'critic_loss': 4080.5026862524837, 'actor_loss': 10.653936333738999, 'time_step': 0.04767115336622117, 'td_error': 1.4692746273308217, 'init_value': -12.545748710632324, 'ave_value': -12.52477007794007} step=3806
2022-04-22 06:38.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:38.22 [info     ] CQL_20220422063508: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.0004351297554942225, 'time_algorithm_update': 0.04672915673669363, 'temp_loss': 3.9441614130328846, 'temp': 0.7810415689311275, 'alpha_loss': -26.544138302003716, 'alpha': 1.5263934914087285, 'critic_loss': 4412.3187178242415, 'actor_loss': 11.872706093540081, 'time_step': 0.04726415011235055, 'td_error': 1.51448487278091, 'init_value': -14.002145767211914, 'ave_value': -13.976155870364368} step=4152
2022-04-22 06:38.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:38.39 [info     ] CQL_20220422063508: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.00041752674676090305, 'time_algorithm_update': 0.04719903703369846, 'temp_loss': 3.863220085987466, 'temp': 0.7648277797795444, 'alpha_loss': -27.568078564770648, 'alpha': 1.5854477965073779, 'critic_loss': 4735.500215916275, 'actor_loss': 13.131768292774355, 'time_step': 0.047714593782590305, 'td_error': 1.5576894423672016, 'init_value': -15.190635681152344, 'ave_value': -15.171897792228311} step=4498
2022-04-22 06:38.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:38.56 [info     ] CQL_20220422063508: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.00042834860741058526, 'time_algorithm_update': 0.04696583610049562, 'temp_loss': 3.7831474453038565, 'temp': 0.7489663653635565, 'alpha_loss': -28.63572019786504, 'alpha': 1.6468758689874858, 'critic_loss': 5004.336905595195, 'actor_loss': 14.476004741095394, 'time_step': 0.04749124794337102, 'td_error': 1.6009606734557005, 'init_value': -16.30457305908203, 'ave_value': -16.296448112856783} step=4844
2022-04-22 06:38.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:39.13 [info     ] CQL_20220422063508: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.00041756395659694784, 'time_algorithm_update': 0.04617810111514406, 'temp_loss': 3.7033096434753063, 'temp': 0.7334496935323483, 'alpha_loss': -29.750800882460755, 'alpha': 1.710762832550644, 'critic_loss': 5247.837138446081, 'actor_loss': 15.926053474404219, 'time_step': 0.046692436141085765, 'td_error': 1.6680811200879053, 'init_value': -18.019458770751953, 'ave_value': -18.004451088161478} step=5190
2022-04-22 06:39.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:39.30 [info     ] CQL_20220422063508: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.0004397451533058475, 'time_algorithm_update': 0.0470273239764175, 'temp_loss': 3.627365488537474, 'temp': 0.7182660240658446, 'alpha_loss': -30.901004206927524, 'alpha': 1.7771875086547322, 'critic_loss': 5603.045760138187, 'actor_loss': 17.471535186547076, 'time_step': 0.04756586468977735, 'td_error': 1.7294050415714635, 'init_value': -19.405620574951172, 'ave_value': -19.39806444105688} step=5536
2022-04-22 06:39.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:39.47 [info     ] CQL_20220422063508: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.00044309128226572376, 'time_algorithm_update': 0.04664085159412009, 'temp_loss': 3.5512259006500244, 'temp': 0.7034043524995705, 'alpha_loss': -32.10172650993215, 'alpha': 1.846227882569925, 'critic_loss': 5903.766097757858, 'actor_loss': 19.06990721597837, 'time_step': 0.04718334550802418, 'td_error': 1.826815082985648, 'init_value': -21.599349975585938, 'ave_value': -21.5750988671315} step=5882
2022-04-22 06:39.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:40.04 [info     ] CQL_20220422063508: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.00042010662872667256, 'time_algorithm_update': 0.04702716342286568, 'temp_loss': 3.478031233556009, 'temp': 0.6888577886399506, 'alpha_loss': -33.355263638358586, 'alpha': 1.9179965708986184, 'critic_loss': 6141.009312624187, 'actor_loss': 20.719426083426946, 'time_step': 0.04754466266301326, 'td_error': 1.8899358417902201, 'init_value': -22.78656578063965, 'ave_value': -22.77612700238506} step=6228
2022-04-22 06:40.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:40.21 [info     ] CQL_20220422063508: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.0004276829647880069, 'time_algorithm_update': 0.04675934838421772, 'temp_loss': 3.4076413605254507, 'temp': 0.6746104487104913, 'alpha_loss': -34.6523272453705, 'alpha': 1.9925860831503235, 'critic_loss': 6387.576640399206, 'actor_loss': 22.416364862739695, 'time_step': 0.047283030658788076, 'td_error': 1.984334800205222, 'init_value': -24.57709503173828, 'ave_value': -24.563967281791935} step=6574
2022-04-22 06:40.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:40.38 [info     ] CQL_20220422063508: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.00043384325986652706, 'time_algorithm_update': 0.047223071142428186, 'temp_loss': 3.3364864601565234, 'temp': 0.6606618886049083, 'alpha_loss': -35.997333802239744, 'alpha': 2.0700929481859154, 'critic_loss': 6713.1307577673415, 'actor_loss': 24.183424277112664, 'time_step': 0.04775485344704865, 'td_error': 2.0844282165774044, 'init_value': -26.342859268188477, 'ave_value': -26.332063867832147} step=6920
2022-04-22 06:40.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:40.55 [info     ] CQL_20220422063508: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.00046146191613522566, 'time_algorithm_update': 0.04649903732917212, 'temp_loss': 3.2681784967466587, 'temp': 0.6470024127491637, 'alpha_loss': -37.39839338842844, 'alpha': 2.1506256078709067, 'critic_loss': 7063.878456071622, 'actor_loss': 25.953314218906996, 'time_step': 0.04706111946546963, 'td_error': 2.187536495117867, 'init_value': -28.05335807800293, 'ave_value': -28.043579519388516} step=7266
2022-04-22 06:40.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:41.12 [info     ] CQL_20220422063508: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.00043530546860887824, 'time_algorithm_update': 0.046321077842932906, 'temp_loss': 3.2006454205926445, 'temp': 0.6336278420996804, 'alpha_loss': -38.856398058764505, 'alpha': 2.234308529451403, 'critic_loss': 7422.859850580293, 'actor_loss': 27.790348245918405, 'time_step': 0.04685683402022874, 'td_error': 2.315519644206293, 'init_value': -30.084335327148438, 'ave_value': -30.07217821959581} step=7612
2022-04-22 06:41.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:41.29 [info     ] CQL_20220422063508: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.0004364334778978645, 'time_algorithm_update': 0.0465015317663292, 'temp_loss': 3.134561533183721, 'temp': 0.6205309827548231, 'alpha_loss': -40.36263158280036, 'alpha': 2.321250504841005, 'critic_loss': 7868.306042268786, 'actor_loss': 29.617426287921177, 'time_step': 0.04703825953378843, 'td_error': 2.4323668141872288, 'init_value': -31.802160263061523, 'ave_value': -31.793890779512424} step=7958
2022-04-22 06:41.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:41.45 [info     ] CQL_20220422063508: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.00042149166151278283, 'time_algorithm_update': 0.04601374113490816, 'temp_loss': 3.0694836915572945, 'temp': 0.6077072262074906, 'alpha_loss': -41.9434116010721, 'alpha': 2.411592486965863, 'critic_loss': 8244.463538373826, 'actor_loss': 31.439577119198837, 'time_step': 0.04652918487614979, 'td_error': 2.5422431508504335, 'init_value': -33.32436752319336, 'ave_value': -33.321628602391} step=8304
2022-04-22 06:41.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:42.02 [info     ] CQL_20220422063508: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.0004241376942981874, 'time_algorithm_update': 0.04644410872045969, 'temp_loss': 3.006319467042912, 'temp': 0.5951485289314579, 'alpha_loss': -43.56661024810262, 'alpha': 2.505450124685475, 'critic_loss': 8579.499525830925, 'actor_loss': 33.19924247609398, 'time_step': 0.046969832712515244, 'td_error': 2.6847336215174793, 'init_value': -35.263427734375, 'ave_value': -35.25552674370843} step=8650
2022-04-22 06:42.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:42.19 [info     ] CQL_20220422063508: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.00044771012543253814, 'time_algorithm_update': 0.04665914643017543, 'temp_loss': 2.943963469797476, 'temp': 0.5828494476100613, 'alpha_loss': -45.255612621417626, 'alpha': 2.6029497812249067, 'critic_loss': 8890.016313674132, 'actor_loss': 35.00801187857038, 'time_step': 0.047206665739158674, 'td_error': 2.8255493976496133, 'init_value': -37.072994232177734, 'ave_value': -37.063784568826414} step=8996
2022-04-22 06:42.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:42.36 [info     ] CQL_20220422063508: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.0004365671576792105, 'time_algorithm_update': 0.046584932790326244, 'temp_loss': 2.8830815994670624, 'temp': 0.5708067193196688, 'alpha_loss': -47.0263688743459, 'alpha': 2.7042598751928075, 'critic_loss': 9085.615276711525, 'actor_loss': 36.724324198816554, 'time_step': 0.04711487114084938, 'td_error': 2.9495615027669166, 'init_value': -38.581356048583984, 'ave_value': -38.57875554684838} step=9342
2022-04-22 06:42.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:42.53 [info     ] CQL_20220422063508: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.00043489891669653744, 'time_algorithm_update': 0.046961340601044586, 'temp_loss': 2.8232150188071192, 'temp': 0.5590145158974421, 'alpha_loss': -48.847907325435926, 'alpha': 2.8095133042748954, 'critic_loss': 8318.452396811777, 'actor_loss': 38.15762853898065, 'time_step': 0.04749854176030683, 'td_error': 3.061521924188143, 'init_value': -39.88313293457031, 'ave_value': -39.88546727715425} step=9688
2022-04-22 06:42.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:43.10 [info     ] CQL_20220422063508: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.0004355783407398731, 'time_algorithm_update': 0.046382996388253446, 'temp_loss': 2.7647533085993947, 'temp': 0.5474643510890145, 'alpha_loss': -50.75851722673185, 'alpha': 2.9188665647727214, 'critic_loss': 7418.102484025018, 'actor_loss': 39.85460877831961, 'time_step': 0.0469202740344009, 'td_error': 3.2337375883182684, 'init_value': -41.89982604980469, 'ave_value': -41.891854962402746} step=10034
2022-04-22 06:43.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:43.27 [info     ] CQL_20220422063508: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.00044869894237187555, 'time_algorithm_update': 0.04775718112901456, 'temp_loss': 2.7079234178355662, 'temp': 0.5361543456253978, 'alpha_loss': -52.73122015716024, 'alpha': 3.032487185704226, 'critic_loss': 6833.647932284141, 'actor_loss': 41.58894021800488, 'time_step': 0.04830743742816021, 'td_error': 3.3909152933918474, 'init_value': -43.62038040161133, 'ave_value': -43.61354108008235} step=10380
2022-04-22 06:43.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:43.44 [info     ] CQL_20220422063508: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.0004365010068595754, 'time_algorithm_update': 0.047539133557005425, 'temp_loss': 2.6514716520474826, 'temp': 0.525077439284738, 'alpha_loss': -54.78281177675104, 'alpha': 3.1505385554594802, 'critic_loss': 6530.609751795068, 'actor_loss': 43.32895877595582, 'time_step': 0.04807798435233232, 'td_error': 3.530264385221392, 'init_value': -45.080413818359375, 'ave_value': -45.07727752529993} step=10726
2022-04-22 06:43.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:44.01 [info     ] CQL_20220422063508: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.0004362315800837699, 'time_algorithm_update': 0.045952391073193856, 'temp_loss': 2.5967242476568058, 'temp': 0.514230567079059, 'alpha_loss': -56.91369193413354, 'alpha': 3.273167116104523, 'critic_loss': 5810.50561241194, 'actor_loss': 44.99514206020818, 'time_step': 0.04649221759311037, 'td_error': 3.708901699226806, 'init_value': -46.93886947631836, 'ave_value': -46.93159698533356} step=11072
2022-04-22 06:44.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:44.16 [info     ] CQL_20220422063508: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.00036947506700637026, 'time_algorithm_update': 0.039919565867826426, 'temp_loss': 2.543531227663073, 'temp': 0.5036064674916295, 'alpha_loss': -59.12093975089189, 'alpha': 3.400579444245796, 'critic_loss': 5286.662106552565, 'actor_loss': 46.749187226929415, 'time_step': 0.040371812836972276, 'td_error': 3.891322004594607, 'init_value': -48.7518424987793, 'ave_value': -48.741087948693774} step=11418
2022-04-22 06:44.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:44.30 [info     ] CQL_20220422063508: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.0003860279314779822, 'time_algorithm_update': 0.04058214488057043, 'temp_loss': 2.4909127867979812, 'temp': 0.4932009389117963, 'alpha_loss': -61.432700658809246, 'alpha': 3.5329508250848405, 'critic_loss': 4890.117416117233, 'actor_loss': 48.480926987752746, 'time_step': 0.041059709008718505, 'td_error': 4.046084160726068, 'init_value': -50.201080322265625, 'ave_value': -50.197326911213615} step=11764
2022-04-22 06:44.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:44.45 [info     ] CQL_20220422063508: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.00037221412438188673, 'time_algorithm_update': 0.04011073553493257, 'temp_loss': 2.4394809145458862, 'temp': 0.48301094107200643, 'alpha_loss': -63.827183111554625, 'alpha': 3.670484115622636, 'critic_loss': 4481.751426740878, 'actor_loss': 50.1822226640117, 'time_step': 0.0405680630248406, 'td_error': 4.216088509782516, 'init_value': -51.76368713378906, 'ave_value': -51.76553949452848} step=12110
2022-04-22 06:44.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:44.59 [info     ] CQL_20220422063508: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.00037072021837179374, 'time_algorithm_update': 0.040037688492350496, 'temp_loss': 2.3894572816143147, 'temp': 0.47303129982397046, 'alpha_loss': -66.31280533013316, 'alpha': 3.8133730660973257, 'critic_loss': 4112.111091040462, 'actor_loss': 51.82511998876671, 'time_step': 0.040490202821059036, 'td_error': 4.397670640418847, 'init_value': -53.403533935546875, 'ave_value': -53.403177158795245} step=12456
2022-04-22 06:45.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:45.14 [info     ] CQL_20220422063508: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.0003668600424176696, 'time_algorithm_update': 0.03995213618857323, 'temp_loss': 2.339410411829204, 'temp': 0.4632571984749998, 'alpha_loss': -68.89130994763677, 'alpha': 3.961822451883658, 'critic_loss': 3776.7640020998915, 'actor_loss': 53.4389177994921, 'time_step': 0.04039978360854132, 'td_error': 4.593577369092657, 'init_value': -55.16541290283203, 'ave_value': -55.156130634703985} step=12802
2022-04-22 06:45.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:45.28 [info     ] CQL_20220422063508: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.0003783716631762554, 'time_algorithm_update': 0.03945502173693883, 'temp_loss': 2.2913135262583033, 'temp': 0.4536865580977732, 'alpha_loss': -71.57220031209074, 'alpha': 4.11605647395801, 'critic_loss': 3497.1157960395594, 'actor_loss': 55.01197333142937, 'time_step': 0.03991527226618949, 'td_error': 4.749552216877342, 'init_value': -56.44264602661133, 'ave_value': -56.44316487518064} step=13148
2022-04-22 06:45.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:45.43 [info     ] CQL_20220422063508: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.0003805112287488287, 'time_algorithm_update': 0.039994898559041106, 'temp_loss': 2.243745390390385, 'temp': 0.44431385354844133, 'alpha_loss': -74.36259996270857, 'alpha': 4.27629094868037, 'critic_loss': 3225.351542742955, 'actor_loss': 56.45856747048439, 'time_step': 0.0404556410850128, 'td_error': 4.932963904761759, 'init_value': -58.03030014038086, 'ave_value': -58.02069107062899} step=13494
2022-04-22 06:45.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:45.57 [info     ] CQL_20220422063508: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.00036863233312706036, 'time_algorithm_update': 0.03800415165851571, 'temp_loss': 2.1980301734339984, 'temp': 0.43513295218090103, 'alpha_loss': -77.24924195570753, 'alpha': 4.442755717073561, 'critic_loss': 3010.8882943743224, 'actor_loss': 57.78285554654337, 'time_step': 0.03845198581673506, 'td_error': 5.078292887414125, 'init_value': -59.17528533935547, 'ave_value': -59.17187394313532} step=13840
2022-04-22 06:45.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:46.09 [info     ] CQL_20220422063508: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.00036167891728395673, 'time_algorithm_update': 0.035095530438285345, 'temp_loss': 2.151943356315525, 'temp': 0.4261430552756855, 'alpha_loss': -80.25982282340871, 'alpha': 4.615707856382249, 'critic_loss': 2843.506222057894, 'actor_loss': 59.00475606752958, 'time_step': 0.035539497529840196, 'td_error': 5.2306469426494155, 'init_value': -60.42654037475586, 'ave_value': -60.41967765659786} step=14186
2022-04-22 06:46.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:46.22 [info     ] CQL_20220422063508: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.0003648231484297383, 'time_algorithm_update': 0.035453976923330674, 'temp_loss': 2.1079016841215896, 'temp': 0.41733945036210074, 'alpha_loss': -83.38849207707223, 'alpha': 4.795393342916676, 'critic_loss': 2813.2199495348627, 'actor_loss': 60.1668155427613, 'time_step': 0.035904413702860044, 'td_error': 5.360774480269673, 'init_value': -61.453094482421875, 'ave_value': -61.44629614890159} step=14532
2022-04-22 06:46.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:46.35 [info     ] CQL_20220422063508: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.0003765290872210023, 'time_algorithm_update': 0.03451710146975655, 'temp_loss': 2.064277642724142, 'temp': 0.408716009531407, 'alpha_loss': -86.63345345734172, 'alpha': 4.982077391850466, 'critic_loss': 2747.62974804123, 'actor_loss': 61.084023403983586, 'time_step': 0.034976771801193324, 'td_error': 5.465441307422808, 'init_value': -62.21323776245117, 'ave_value': -62.21423635830976} step=14878
2022-04-22 06:46.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:46.48 [info     ] CQL_20220422063508: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.00037712237738460477, 'time_algorithm_update': 0.035090560168889215, 'temp_loss': 2.0214531063344436, 'temp': 0.4002717163176895, 'alpha_loss': -90.0050281017502, 'alpha': 5.176031065814068, 'critic_loss': 2832.1844990460168, 'actor_loss': 61.92589413637371, 'time_step': 0.03555046685169198, 'td_error': 5.574111346373305, 'init_value': -63.0902214050293, 'ave_value': -63.08432465015627} step=15224
2022-04-22 06:46.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:47.00 [info     ] CQL_20220422063508: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.00036902510361864387, 'time_algorithm_update': 0.03540891925723567, 'temp_loss': 1.9797934638282466, 'temp': 0.39200250916398327, 'alpha_loss': -93.50507006342012, 'alpha': 5.377527886043394, 'critic_loss': 2988.4397614760205, 'actor_loss': 62.628721986891904, 'time_step': 0.03585713171545481, 'td_error': 5.655740613100672, 'init_value': -63.6754035949707, 'ave_value': -63.67083237884507} step=15570
2022-04-22 06:47.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:47.13 [info     ] CQL_20220422063508: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.0003738423992443636, 'time_algorithm_update': 0.03526478833545839, 'temp_loss': 1.9388089865618359, 'temp': 0.38390416888832357, 'alpha_loss': -97.16014709913662, 'alpha': 5.586878580854118, 'critic_loss': 3192.150690508716, 'actor_loss': 63.23592525548329, 'time_step': 0.03571982810952071, 'td_error': 5.7408120634329975, 'init_value': -64.28473663330078, 'ave_value': -64.28486154193615} step=15916
2022-04-22 06:47.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:47.26 [info     ] CQL_20220422063508: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.0003628551615455936, 'time_algorithm_update': 0.03499938504544297, 'temp_loss': 1.8990558075767032, 'temp': 0.37597179249187423, 'alpha_loss': -100.9318044364797, 'alpha': 5.80437890504826, 'critic_loss': 3259.6565632902816, 'actor_loss': 63.78721423507426, 'time_step': 0.035436681929351274, 'td_error': 5.774063376236778, 'init_value': -64.59625244140625, 'ave_value': -64.58447660949909} step=16262
2022-04-22 06:47.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:47.39 [info     ] CQL_20220422063508: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.0003704611276615562, 'time_algorithm_update': 0.03526035829775595, 'temp_loss': 1.8592646900629033, 'temp': 0.36820407392661697, 'alpha_loss': -104.8617595385954, 'alpha': 6.030344657126189, 'critic_loss': 3082.875062799178, 'actor_loss': 63.72768147418954, 'time_step': 0.03570834749695883, 'td_error': 5.752286741360035, 'init_value': -64.38716125488281, 'ave_value': -64.38605974286922} step=16608
2022-04-22 06:47.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:47.52 [info     ] CQL_20220422063508: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.00038615127519376014, 'time_algorithm_update': 0.03513755412460062, 'temp_loss': 1.8212614993139498, 'temp': 0.36059826466044936, 'alpha_loss': -108.938745641984, 'alpha': 6.265090067262594, 'critic_loss': 3024.931911578757, 'actor_loss': 63.77894741124501, 'time_step': 0.035602497916690186, 'td_error': 5.774234090573702, 'init_value': -64.61140441894531, 'ave_value': -64.59718714244222} step=16954
2022-04-22 06:47.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:48.05 [info     ] CQL_20220422063508: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.0003628882369554112, 'time_algorithm_update': 0.037254819291175446, 'temp_loss': 1.7832550272087142, 'temp': 0.35314894256564233, 'alpha_loss': -113.17611711976156, 'alpha': 6.5089685227829595, 'critic_loss': 2917.252545130735, 'actor_loss': 63.71862763200881, 'time_step': 0.037693863659235785, 'td_error': 5.741363248467855, 'init_value': -64.31452941894531, 'ave_value': -64.30918438958014} step=17300
2022-04-22 06:48.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422063508/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.51

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 06:48.07 [info     ] FQE_20220422064805: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.0001349104456154697, 'time_algorithm_update': 0.006857119410871023, 'loss': 0.007055914288958961, 'time_step': 0.007054481161646096, 'init_value': -0.12704460322856903, 'ave_value': -0.062167036318144686, 'soft_opc': nan} step=166




2022-04-22 06:48.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.07 [info     ] FQE_20220422064805: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00012460243271057866, 'time_algorithm_update': 0.004687070846557617, 'loss': 0.004487847401592207, 'time_step': 0.0048701375363821, 'init_value': -0.18197879195213318, 'ave_value': -0.07842568743969776, 'soft_opc': nan} step=332




2022-04-22 06:48.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.08 [info     ] FQE_20220422064805: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00013060598488313607, 'time_algorithm_update': 0.004610334534242928, 'loss': 0.0035739104994236075, 'time_step': 0.004802611937005836, 'init_value': -0.21272236108779907, 'ave_value': -0.10028029029542866, 'soft_opc': nan} step=498




2022-04-22 06:48.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.09 [info     ] FQE_20220422064805: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.000125230076801346, 'time_algorithm_update': 0.0050076906939586965, 'loss': 0.0032045026898877807, 'time_step': 0.005192130444997765, 'init_value': -0.2648654580116272, 'ave_value': -0.13759381620345537, 'soft_opc': nan} step=664




2022-04-22 06:48.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.10 [info     ] FQE_20220422064805: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00012463690286659333, 'time_algorithm_update': 0.004880609282528062, 'loss': 0.002821411337866167, 'time_step': 0.005062117634049381, 'init_value': -0.27731943130493164, 'ave_value': -0.13390419820401198, 'soft_opc': nan} step=830




2022-04-22 06:48.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.11 [info     ] FQE_20220422064805: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00012934207916259766, 'time_algorithm_update': 0.0049297479261834936, 'loss': 0.0024981959793065867, 'time_step': 0.005118670233760972, 'init_value': -0.3036642372608185, 'ave_value': -0.15678029689803585, 'soft_opc': nan} step=996




2022-04-22 06:48.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.12 [info     ] FQE_20220422064805: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.0001224724643201713, 'time_algorithm_update': 0.004620896764548428, 'loss': 0.0022291062352056785, 'time_step': 0.0048032884138176244, 'init_value': -0.33943939208984375, 'ave_value': -0.181298190236788, 'soft_opc': nan} step=1162




2022-04-22 06:48.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.13 [info     ] FQE_20220422064805: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001256652625210314, 'time_algorithm_update': 0.004832511924835573, 'loss': 0.0020431597592275455, 'time_step': 0.005015794053135148, 'init_value': -0.386980265378952, 'ave_value': -0.22276711943213248, 'soft_opc': nan} step=1328




2022-04-22 06:48.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.14 [info     ] FQE_20220422064805: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00012749505330281085, 'time_algorithm_update': 0.004843617060098304, 'loss': 0.0018553764830029245, 'time_step': 0.005035210804766919, 'init_value': -0.414393812417984, 'ave_value': -0.2430473747367442, 'soft_opc': nan} step=1494




2022-04-22 06:48.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.15 [info     ] FQE_20220422064805: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00012750797961131637, 'time_algorithm_update': 0.004787812750023532, 'loss': 0.0017654466213439647, 'time_step': 0.0049714625599872634, 'init_value': -0.44916826486587524, 'ave_value': -0.2682161662698228, 'soft_opc': nan} step=1660




2022-04-22 06:48.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.16 [info     ] FQE_20220422064805: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00011738380753850362, 'time_algorithm_update': 0.003985504069960261, 'loss': 0.0017096319909807834, 'time_step': 0.004155523805733186, 'init_value': -0.5001734495162964, 'ave_value': -0.3136444628873194, 'soft_opc': nan} step=1826




2022-04-22 06:48.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.16 [info     ] FQE_20220422064805: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00012390153953828007, 'time_algorithm_update': 0.004724114774221398, 'loss': 0.0016277497762278374, 'time_step': 0.0049083548856068806, 'init_value': -0.49846380949020386, 'ave_value': -0.3021604059381528, 'soft_opc': nan} step=1992




2022-04-22 06:48.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.17 [info     ] FQE_20220422064805: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.000127709055521402, 'time_algorithm_update': 0.004817711301596768, 'loss': 0.0016771519142174981, 'time_step': 0.005004072763833655, 'init_value': -0.5684483051300049, 'ave_value': -0.3588392683237724, 'soft_opc': nan} step=2158




2022-04-22 06:48.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.18 [info     ] FQE_20220422064805: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00012521140546683805, 'time_algorithm_update': 0.004799841398216155, 'loss': 0.0017678105980312138, 'time_step': 0.004984008260520108, 'init_value': -0.6062943935394287, 'ave_value': -0.39981778422748115, 'soft_opc': nan} step=2324




2022-04-22 06:48.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.19 [info     ] FQE_20220422064805: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00012644514980086362, 'time_algorithm_update': 0.004796479121748224, 'loss': 0.001707059670315427, 'time_step': 0.004983022988560688, 'init_value': -0.6515426635742188, 'ave_value': -0.4433678814531975, 'soft_opc': nan} step=2490




2022-04-22 06:48.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.20 [info     ] FQE_20220422064805: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00012180891381688865, 'time_algorithm_update': 0.004704153681375894, 'loss': 0.001821758488546608, 'time_step': 0.004883919853761971, 'init_value': -0.71722412109375, 'ave_value': -0.5024940540714122, 'soft_opc': nan} step=2656




2022-04-22 06:48.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.21 [info     ] FQE_20220422064805: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00012055506189185453, 'time_algorithm_update': 0.004710425813514066, 'loss': 0.001719528223080729, 'time_step': 0.004886091473590897, 'init_value': -0.7484936118125916, 'ave_value': -0.5097704651455085, 'soft_opc': nan} step=2822




2022-04-22 06:48.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.22 [info     ] FQE_20220422064805: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00012496724186173403, 'time_algorithm_update': 0.004701528204492776, 'loss': 0.001883548363399445, 'time_step': 0.004885590220072183, 'init_value': -0.7795660495758057, 'ave_value': -0.5342750396768285, 'soft_opc': nan} step=2988




2022-04-22 06:48.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.23 [info     ] FQE_20220422064805: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00012123441121664392, 'time_algorithm_update': 0.004638946199991617, 'loss': 0.0020202094014363744, 'time_step': 0.004816812205027385, 'init_value': -0.80743408203125, 'ave_value': -0.5706353886648677, 'soft_opc': nan} step=3154




2022-04-22 06:48.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.23 [info     ] FQE_20220422064805: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00012108504054058029, 'time_algorithm_update': 0.003943127321909709, 'loss': 0.0020857384159785122, 'time_step': 0.004121777522994812, 'init_value': -0.860586404800415, 'ave_value': -0.6153718278382544, 'soft_opc': nan} step=3320




2022-04-22 06:48.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.24 [info     ] FQE_20220422064805: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.0001245952514280756, 'time_algorithm_update': 0.0047040301633168415, 'loss': 0.002152708375455492, 'time_step': 0.004891915493700878, 'init_value': -0.878467321395874, 'ave_value': -0.6207279935639628, 'soft_opc': nan} step=3486




2022-04-22 06:48.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.25 [info     ] FQE_20220422064805: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.0001227367355162839, 'time_algorithm_update': 0.004624669810375535, 'loss': 0.0024113798910592042, 'time_step': 0.004804539393229657, 'init_value': -0.956520676612854, 'ave_value': -0.6823264835505151, 'soft_opc': nan} step=3652




2022-04-22 06:48.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.26 [info     ] FQE_20220422064805: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00012713024415165545, 'time_algorithm_update': 0.004823601389505777, 'loss': 0.002602605625505963, 'time_step': 0.00501342853867864, 'init_value': -0.9830072522163391, 'ave_value': -0.6891048589868152, 'soft_opc': nan} step=3818




2022-04-22 06:48.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.27 [info     ] FQE_20220422064805: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.0001251912978758295, 'time_algorithm_update': 0.004780674555215491, 'loss': 0.0028017082450426103, 'time_step': 0.004966450024800128, 'init_value': -1.0189673900604248, 'ave_value': -0.7214756784343941, 'soft_opc': nan} step=3984




2022-04-22 06:48.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.28 [info     ] FQE_20220422064805: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.0001216810869883342, 'time_algorithm_update': 0.004759883306112634, 'loss': 0.0029611603645481595, 'time_step': 0.004939017525638442, 'init_value': -1.0777854919433594, 'ave_value': -0.7526380123853734, 'soft_opc': nan} step=4150




2022-04-22 06:48.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.29 [info     ] FQE_20220422064805: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00012110371187508824, 'time_algorithm_update': 0.00470130271222218, 'loss': 0.0031468637137889795, 'time_step': 0.004883286464645202, 'init_value': -1.1228922605514526, 'ave_value': -0.7725915883382497, 'soft_opc': nan} step=4316




2022-04-22 06:48.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.30 [info     ] FQE_20220422064805: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00012440997433949667, 'time_algorithm_update': 0.00474452110658209, 'loss': 0.003220728319769456, 'time_step': 0.00493358847606613, 'init_value': -1.1673648357391357, 'ave_value': -0.8059804657216159, 'soft_opc': nan} step=4482




2022-04-22 06:48.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.31 [info     ] FQE_20220422064805: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001240753265748541, 'time_algorithm_update': 0.004691806184240134, 'loss': 0.003527033904529491, 'time_step': 0.004874021173959755, 'init_value': -1.2281163930892944, 'ave_value': -0.858160562155483, 'soft_opc': nan} step=4648




2022-04-22 06:48.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.31 [info     ] FQE_20220422064805: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00012550296553646224, 'time_algorithm_update': 0.004098676773438971, 'loss': 0.0038335796377932966, 'time_step': 0.004286846482610128, 'init_value': -1.2974481582641602, 'ave_value': -0.892155421898608, 'soft_opc': nan} step=4814




2022-04-22 06:48.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.32 [info     ] FQE_20220422064805: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00012587926473962255, 'time_algorithm_update': 0.0047567048704767806, 'loss': 0.004077477474431844, 'time_step': 0.00494095934442727, 'init_value': -1.3610939979553223, 'ave_value': -0.937150952044606, 'soft_opc': nan} step=4980




2022-04-22 06:48.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.33 [info     ] FQE_20220422064805: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00012207318501300122, 'time_algorithm_update': 0.004604240497910833, 'loss': 0.0043332264551763835, 'time_step': 0.0047815693430153725, 'init_value': -1.3905367851257324, 'ave_value': -0.9487618904569195, 'soft_opc': nan} step=5146




2022-04-22 06:48.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.34 [info     ] FQE_20220422064805: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00012946416096514966, 'time_algorithm_update': 0.004996660244033997, 'loss': 0.0044921304885415835, 'time_step': 0.005188070147870535, 'init_value': -1.5245455503463745, 'ave_value': -1.0477150929636565, 'soft_opc': nan} step=5312




2022-04-22 06:48.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.35 [info     ] FQE_20220422064805: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00012603725295468984, 'time_algorithm_update': 0.004673628921968391, 'loss': 0.0046414731882852, 'time_step': 0.004857465445277202, 'init_value': -1.5442463159561157, 'ave_value': -1.0552583955563941, 'soft_opc': nan} step=5478




2022-04-22 06:48.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.36 [info     ] FQE_20220422064805: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00012311877974544662, 'time_algorithm_update': 0.004637808684843132, 'loss': 0.005064522879617569, 'time_step': 0.004821052034217191, 'init_value': -1.605456829071045, 'ave_value': -1.0824298428321206, 'soft_opc': nan} step=5644




2022-04-22 06:48.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.37 [info     ] FQE_20220422064805: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00012474174959113798, 'time_algorithm_update': 0.004893022847462849, 'loss': 0.005361133293811153, 'time_step': 0.005077976778329137, 'init_value': -1.66489839553833, 'ave_value': -1.1058122897405296, 'soft_opc': nan} step=5810




2022-04-22 06:48.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.38 [info     ] FQE_20220422064805: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.0001249141003712114, 'time_algorithm_update': 0.00472313381103148, 'loss': 0.005339284571580566, 'time_step': 0.004909699221691453, 'init_value': -1.7095763683319092, 'ave_value': -1.1298281695658072, 'soft_opc': nan} step=5976




2022-04-22 06:48.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.39 [info     ] FQE_20220422064805: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00012131053281117634, 'time_algorithm_update': 0.004626130483236657, 'loss': 0.005832915995915207, 'time_step': 0.00480525177645396, 'init_value': -1.7580207586288452, 'ave_value': -1.1542284991449534, 'soft_opc': nan} step=6142




2022-04-22 06:48.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.39 [info     ] FQE_20220422064805: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00012836686099868222, 'time_algorithm_update': 0.004060713641614799, 'loss': 0.006035802091846344, 'time_step': 0.00425187363682023, 'init_value': -1.836348295211792, 'ave_value': -1.213731681809316, 'soft_opc': nan} step=6308




2022-04-22 06:48.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.40 [info     ] FQE_20220422064805: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00013087169233574924, 'time_algorithm_update': 0.005001022155026355, 'loss': 0.006252915242193155, 'time_step': 0.005198666848332049, 'init_value': -1.8726238012313843, 'ave_value': -1.237937618571602, 'soft_opc': nan} step=6474




2022-04-22 06:48.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.41 [info     ] FQE_20220422064805: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001270972102521414, 'time_algorithm_update': 0.0046748554850199135, 'loss': 0.006793842308190988, 'time_step': 0.004862707781504436, 'init_value': -1.9972413778305054, 'ave_value': -1.3221828259984953, 'soft_opc': nan} step=6640




2022-04-22 06:48.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.42 [info     ] FQE_20220422064805: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00012429794633244895, 'time_algorithm_update': 0.004663330962859005, 'loss': 0.00724236253268192, 'time_step': 0.004847112908420792, 'init_value': -2.0663669109344482, 'ave_value': -1.363771620725055, 'soft_opc': nan} step=6806




2022-04-22 06:48.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.43 [info     ] FQE_20220422064805: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00012504479971276708, 'time_algorithm_update': 0.00460367030408009, 'loss': 0.00762483105536814, 'time_step': 0.004787420651998864, 'init_value': -2.1241188049316406, 'ave_value': -1.4132306766662712, 'soft_opc': nan} step=6972




2022-04-22 06:48.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.44 [info     ] FQE_20220422064805: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.000128241906683129, 'time_algorithm_update': 0.004764865679913257, 'loss': 0.007733144699729115, 'time_step': 0.00495175999331187, 'init_value': -2.1705002784729004, 'ave_value': -1.4299932742428438, 'soft_opc': nan} step=7138




2022-04-22 06:48.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.45 [info     ] FQE_20220422064805: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00012131196906767695, 'time_algorithm_update': 0.004723449787461614, 'loss': 0.008116888772999218, 'time_step': 0.004902370004768831, 'init_value': -2.2230336666107178, 'ave_value': -1.4520604485696231, 'soft_opc': nan} step=7304




2022-04-22 06:48.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.46 [info     ] FQE_20220422064805: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00012535646737339985, 'time_algorithm_update': 0.004674158900617117, 'loss': 0.008667935806267658, 'time_step': 0.004860689841121076, 'init_value': -2.232203483581543, 'ave_value': -1.4403139717988083, 'soft_opc': nan} step=7470




2022-04-22 06:48.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.46 [info     ] FQE_20220422064805: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00013154816914753742, 'time_algorithm_update': 0.004793791885835579, 'loss': 0.00884020709308532, 'time_step': 0.004986607884786215, 'init_value': -2.280242681503296, 'ave_value': -1.4623707845301561, 'soft_opc': nan} step=7636




2022-04-22 06:48.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.47 [info     ] FQE_20220422064805: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00012224840830607587, 'time_algorithm_update': 0.003935559686407985, 'loss': 0.009270062231976281, 'time_step': 0.00411628240562347, 'init_value': -2.3106987476348877, 'ave_value': -1.4766825876112708, 'soft_opc': nan} step=7802




2022-04-22 06:48.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.48 [info     ] FQE_20220422064805: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.0001325535486979657, 'time_algorithm_update': 0.0050410477511854054, 'loss': 0.00947339429834953, 'time_step': 0.005235962120883436, 'init_value': -2.3302059173583984, 'ave_value': -1.4793677597562573, 'soft_opc': nan} step=7968




2022-04-22 06:48.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.49 [info     ] FQE_20220422064805: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00012586777468761765, 'time_algorithm_update': 0.004898028201367481, 'loss': 0.009939494653933132, 'time_step': 0.0050835810511945245, 'init_value': -2.4397850036621094, 'ave_value': -1.5705751162790065, 'soft_opc': nan} step=8134




2022-04-22 06:48.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 06:48.50 [info     ] FQE_20220422064805: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00012245092047266214, 'time_algorithm_update': 0.004840460168309958, 'loss': 0.010120383203268074, 'time_step': 0.00501986009528838, 'init_value': -2.4707305431365967, 'ave_value': -1.5731644302623364, 'soft_opc': nan} step=8300




2022-04-22 06:48.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064805/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-22 06:48.50 [info     ] Directory is created at d3rlpy_logs/FQE_20220422064850
2022-04-22 06:48.50 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 06:48.50 [debug    ] Building models...
2022-04-22 06:48.50 [debug    ] Models have been built.
2022-04-22 06:48.50 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422064850/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 06:48.52 [info     ] FQE_20220422064850: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00013930437176726585, 'time_algorithm_update': 0.004777285941811495, 'loss': 0.02570532830323764, 'time_step': 0.0049783656763476, 'init_value': -1.2713656425476074, 'ave_value': -1.27285384104864, 'soft_opc': nan} step=344




2022-04-22 06:48.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:48.54 [info     ] FQE_20220422064850: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00013824881509292956, 'time_algorithm_update': 0.004852473042732061, 'loss': 0.023592965425598588, 'time_step': 0.005054184863733691, 'init_value': -2.0292184352874756, 'ave_value': -2.019520802460275, 'soft_opc': nan} step=688




2022-04-22 06:48.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:48.56 [info     ] FQE_20220422064850: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00013671434202859568, 'time_algorithm_update': 0.004472297984500264, 'loss': 0.02641623891175313, 'time_step': 0.004669940748880076, 'init_value': -2.844672679901123, 'ave_value': -2.8801979657363246, 'soft_opc': nan} step=1032




2022-04-22 06:48.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:48.58 [info     ] FQE_20220422064850: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00013486521188602892, 'time_algorithm_update': 0.004809839780940566, 'loss': 0.028543207786305872, 'time_step': 0.005004706077797468, 'init_value': -3.292821168899536, 'ave_value': -3.4306269340552724, 'soft_opc': nan} step=1376




2022-04-22 06:48.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.00 [info     ] FQE_20220422064850: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.0001353760098302087, 'time_algorithm_update': 0.004860929971517518, 'loss': 0.03443876688292813, 'time_step': 0.005056548257206761, 'init_value': -3.850107192993164, 'ave_value': -4.1382007135867, 'soft_opc': nan} step=1720




2022-04-22 06:49.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.01 [info     ] FQE_20220422064850: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00013894951620767283, 'time_algorithm_update': 0.004884799552518268, 'loss': 0.04109577674332053, 'time_step': 0.005087598811748416, 'init_value': -4.15118408203125, 'ave_value': -4.609748377416048, 'soft_opc': nan} step=2064




2022-04-22 06:49.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.03 [info     ] FQE_20220422064850: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00013572116230809413, 'time_algorithm_update': 0.0043520899706108625, 'loss': 0.04981427561480899, 'time_step': 0.004549095103907031, 'init_value': -4.553530216217041, 'ave_value': -5.247911111601033, 'soft_opc': nan} step=2408




2022-04-22 06:49.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.05 [info     ] FQE_20220422064850: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00013455540634864984, 'time_algorithm_update': 0.00479393019232639, 'loss': 0.06213607808553376, 'time_step': 0.0049886904483617735, 'init_value': -4.817165374755859, 'ave_value': -5.7875182568691335, 'soft_opc': nan} step=2752




2022-04-22 06:49.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.07 [info     ] FQE_20220422064850: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00013849485752194426, 'time_algorithm_update': 0.004798790743184644, 'loss': 0.06994679081795174, 'time_step': 0.004999663247618564, 'init_value': -4.833547592163086, 'ave_value': -6.037762896035303, 'soft_opc': nan} step=3096




2022-04-22 06:49.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.09 [info     ] FQE_20220422064850: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.0001408125078955362, 'time_algorithm_update': 0.004706900480181672, 'loss': 0.0880113374685522, 'time_step': 0.004909292903057364, 'init_value': -5.11417818069458, 'ave_value': -6.613947537136977, 'soft_opc': nan} step=3440




2022-04-22 06:49.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.10 [info     ] FQE_20220422064850: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00013915328092353288, 'time_algorithm_update': 0.004684667254603187, 'loss': 0.10209766428289545, 'time_step': 0.0048801205879033995, 'init_value': -5.457483291625977, 'ave_value': -7.240127722693708, 'soft_opc': nan} step=3784




2022-04-22 06:49.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.12 [info     ] FQE_20220422064850: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.0001479172429373098, 'time_algorithm_update': 0.004694775786510733, 'loss': 0.1222980924892824, 'time_step': 0.004899890616882679, 'init_value': -5.749208450317383, 'ave_value': -7.8537699680360085, 'soft_opc': nan} step=4128




2022-04-22 06:49.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.14 [info     ] FQE_20220422064850: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.0001314365586569143, 'time_algorithm_update': 0.004642659841581832, 'loss': 0.13567820735015843, 'time_step': 0.0048323439997296, 'init_value': -6.168619632720947, 'ave_value': -8.450144783319352, 'soft_opc': nan} step=4472




2022-04-22 06:49.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.16 [info     ] FQE_20220422064850: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00013894119928049487, 'time_algorithm_update': 0.004814694094103436, 'loss': 0.15115501091620603, 'time_step': 0.005015458478484043, 'init_value': -6.6786603927612305, 'ave_value': -9.21723377730798, 'soft_opc': nan} step=4816




2022-04-22 06:49.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.18 [info     ] FQE_20220422064850: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00013230151908342227, 'time_algorithm_update': 0.004655311967051307, 'loss': 0.16624296946061212, 'time_step': 0.004846890305363854, 'init_value': -6.997426986694336, 'ave_value': -9.684368585536744, 'soft_opc': nan} step=5160




2022-04-22 06:49.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.20 [info     ] FQE_20220422064850: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00013637057570523993, 'time_algorithm_update': 0.004509505837462669, 'loss': 0.17956108944273966, 'time_step': 0.00470507075620252, 'init_value': -7.858240127563477, 'ave_value': -10.575854709883732, 'soft_opc': nan} step=5504




2022-04-22 06:49.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.21 [info     ] FQE_20220422064850: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00013705880142921625, 'time_algorithm_update': 0.004853244437727817, 'loss': 0.19469713147859588, 'time_step': 0.005052812570749327, 'init_value': -7.937894821166992, 'ave_value': -10.689890255305816, 'soft_opc': nan} step=5848




2022-04-22 06:49.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.23 [info     ] FQE_20220422064850: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00013680513515028844, 'time_algorithm_update': 0.0048314277515854945, 'loss': 0.19666993481561887, 'time_step': 0.005028606847275135, 'init_value': -8.273709297180176, 'ave_value': -11.094231034805663, 'soft_opc': nan} step=6192




2022-04-22 06:49.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.25 [info     ] FQE_20220422064850: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00013845396596331929, 'time_algorithm_update': 0.004835131556488747, 'loss': 0.20514819106130405, 'time_step': 0.005035352568293727, 'init_value': -8.850824356079102, 'ave_value': -11.650024812750786, 'soft_opc': nan} step=6536




2022-04-22 06:49.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.27 [info     ] FQE_20220422064850: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00013585284698841183, 'time_algorithm_update': 0.004382916661195977, 'loss': 0.20652408922784204, 'time_step': 0.004578874554744986, 'init_value': -9.167902946472168, 'ave_value': -12.002467266157655, 'soft_opc': nan} step=6880




2022-04-22 06:49.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.29 [info     ] FQE_20220422064850: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00014406997104023778, 'time_algorithm_update': 0.004805948152098545, 'loss': 0.2095385629488805, 'time_step': 0.005010811395423357, 'init_value': -9.827253341674805, 'ave_value': -12.507651126723703, 'soft_opc': nan} step=7224




2022-04-22 06:49.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.31 [info     ] FQE_20220422064850: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00013776920562566712, 'time_algorithm_update': 0.0046971280907475675, 'loss': 0.2161432394411328, 'time_step': 0.004894280156423879, 'init_value': -10.173032760620117, 'ave_value': -12.691934279711111, 'soft_opc': nan} step=7568




2022-04-22 06:49.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.32 [info     ] FQE_20220422064850: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00013796049495076024, 'time_algorithm_update': 0.004807734905287277, 'loss': 0.22327881071413325, 'time_step': 0.005005297272704368, 'init_value': -11.1899995803833, 'ave_value': -13.560576217858172, 'soft_opc': nan} step=7912




2022-04-22 06:49.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.34 [info     ] FQE_20220422064850: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00013719741688218227, 'time_algorithm_update': 0.0047072158303371695, 'loss': 0.2294247873928831, 'time_step': 0.00490869962891867, 'init_value': -11.72630500793457, 'ave_value': -14.04209448414905, 'soft_opc': nan} step=8256




2022-04-22 06:49.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.36 [info     ] FQE_20220422064850: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00013805336730424747, 'time_algorithm_update': 0.004676477160564688, 'loss': 0.23436488327028793, 'time_step': 0.004875831825788631, 'init_value': -12.046140670776367, 'ave_value': -14.280518979461746, 'soft_opc': nan} step=8600




2022-04-22 06:49.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.38 [info     ] FQE_20220422064850: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00013848030289938284, 'time_algorithm_update': 0.004782220651937085, 'loss': 0.24127675221453226, 'time_step': 0.004983140285625015, 'init_value': -12.77109432220459, 'ave_value': -14.886029816377773, 'soft_opc': nan} step=8944




2022-04-22 06:49.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.40 [info     ] FQE_20220422064850: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00013617720714835234, 'time_algorithm_update': 0.004788926174474317, 'loss': 0.24989042841547796, 'time_step': 0.004988481139027795, 'init_value': -13.357604026794434, 'ave_value': -15.296740982716333, 'soft_opc': nan} step=9288




2022-04-22 06:49.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.42 [info     ] FQE_20220422064850: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00013846852058588074, 'time_algorithm_update': 0.004823686771614607, 'loss': 0.2648265724568519, 'time_step': 0.005027084849601568, 'init_value': -14.08016300201416, 'ave_value': -16.00205092964073, 'soft_opc': nan} step=9632




2022-04-22 06:49.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.43 [info     ] FQE_20220422064850: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00013622225717056628, 'time_algorithm_update': 0.004487748062887857, 'loss': 0.27562200628914113, 'time_step': 0.004686123409936595, 'init_value': -14.397415161132812, 'ave_value': -16.205583422810577, 'soft_opc': nan} step=9976




2022-04-22 06:49.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.45 [info     ] FQE_20220422064850: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.0001360933447993079, 'time_algorithm_update': 0.004718662694443104, 'loss': 0.2829011150440851, 'time_step': 0.004916037237921426, 'init_value': -15.058235168457031, 'ave_value': -16.550404763839268, 'soft_opc': nan} step=10320




2022-04-22 06:49.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.47 [info     ] FQE_20220422064850: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.0001371149406876675, 'time_algorithm_update': 0.004891459331956021, 'loss': 0.29514638029667006, 'time_step': 0.005091556976007861, 'init_value': -15.17605209350586, 'ave_value': -16.642721419670696, 'soft_opc': nan} step=10664




2022-04-22 06:49.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.49 [info     ] FQE_20220422064850: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00013784544412479845, 'time_algorithm_update': 0.004796249921931777, 'loss': 0.30659715731108433, 'time_step': 0.004996316377506699, 'init_value': -15.524673461914062, 'ave_value': -16.81543511287435, 'soft_opc': nan} step=11008




2022-04-22 06:49.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.51 [info     ] FQE_20220422064850: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00013461986253427905, 'time_algorithm_update': 0.00431635906529981, 'loss': 0.32273839459602915, 'time_step': 0.004511819329372672, 'init_value': -16.087867736816406, 'ave_value': -17.230919602176023, 'soft_opc': nan} step=11352




2022-04-22 06:49.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.52 [info     ] FQE_20220422064850: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00015864469284235046, 'time_algorithm_update': 0.004739877789519554, 'loss': 0.3343706054905386, 'time_step': 0.004963208769643029, 'init_value': -16.485294342041016, 'ave_value': -17.509781762293063, 'soft_opc': nan} step=11696




2022-04-22 06:49.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.54 [info     ] FQE_20220422064850: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.0001405983470207037, 'time_algorithm_update': 0.004926309336063473, 'loss': 0.351242063572457, 'time_step': 0.005129355330799901, 'init_value': -16.81540298461914, 'ave_value': -17.670351978946847, 'soft_opc': nan} step=12040




2022-04-22 06:49.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.56 [info     ] FQE_20220422064850: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.0001370920691379281, 'time_algorithm_update': 0.004632847253666367, 'loss': 0.364395891901019, 'time_step': 0.0048310777475667555, 'init_value': -17.046077728271484, 'ave_value': -17.728604029418968, 'soft_opc': nan} step=12384




2022-04-22 06:49.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:49.58 [info     ] FQE_20220422064850: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00014173083527143612, 'time_algorithm_update': 0.004553817732389583, 'loss': 0.37865910052625074, 'time_step': 0.004755555197250011, 'init_value': -17.370468139648438, 'ave_value': -17.837856846116065, 'soft_opc': nan} step=12728




2022-04-22 06:49.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:50.00 [info     ] FQE_20220422064850: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00013662909352502159, 'time_algorithm_update': 0.004750550486320673, 'loss': 0.3912505315140236, 'time_step': 0.004949155241944069, 'init_value': -17.882308959960938, 'ave_value': -18.309055943652854, 'soft_opc': nan} step=13072




2022-04-22 06:50.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:50.02 [info     ] FQE_20220422064850: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00014633979908255644, 'time_algorithm_update': 0.004918265481327855, 'loss': 0.39946909538513525, 'time_step': 0.00512748263603033, 'init_value': -18.075889587402344, 'ave_value': -18.151577344919378, 'soft_opc': nan} step=13416




2022-04-22 06:50.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:50.03 [info     ] FQE_20220422064850: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00013925862866778706, 'time_algorithm_update': 0.00483413768369098, 'loss': 0.41173504160576346, 'time_step': 0.005035017811974814, 'init_value': -18.302932739257812, 'ave_value': -18.275365629842554, 'soft_opc': nan} step=13760




2022-04-22 06:50.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:50.05 [info     ] FQE_20220422064850: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00013844772826793582, 'time_algorithm_update': 0.00474976661593415, 'loss': 0.42076036859801863, 'time_step': 0.00495025307633156, 'init_value': -18.52073097229004, 'ave_value': -18.47025994691612, 'soft_opc': nan} step=14104




2022-04-22 06:50.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:50.07 [info     ] FQE_20220422064850: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00013735751773035802, 'time_algorithm_update': 0.004473851170650748, 'loss': 0.42607774561390194, 'time_step': 0.004672873158787572, 'init_value': -18.833972930908203, 'ave_value': -18.58380356070938, 'soft_opc': nan} step=14448




2022-04-22 06:50.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:50.09 [info     ] FQE_20220422064850: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.0001418639061062835, 'time_algorithm_update': 0.004658270713894866, 'loss': 0.4488891726197285, 'time_step': 0.004862670981606772, 'init_value': -19.301342010498047, 'ave_value': -18.962673757149762, 'soft_opc': nan} step=14792




2022-04-22 06:50.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:50.11 [info     ] FQE_20220422064850: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00013442510782286178, 'time_algorithm_update': 0.004730341739432756, 'loss': 0.44300203649381303, 'time_step': 0.004923572373944659, 'init_value': -19.656879425048828, 'ave_value': -19.37684983182111, 'soft_opc': nan} step=15136




2022-04-22 06:50.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:50.12 [info     ] FQE_20220422064850: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.000136537607326064, 'time_algorithm_update': 0.004700734171756479, 'loss': 0.46434850299048647, 'time_step': 0.0048990762510965034, 'init_value': -19.576723098754883, 'ave_value': -19.213345045983573, 'soft_opc': nan} step=15480




2022-04-22 06:50.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:50.14 [info     ] FQE_20220422064850: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00013827515202899311, 'time_algorithm_update': 0.004418841628141181, 'loss': 0.4605686839989408, 'time_step': 0.004619522150172744, 'init_value': -19.37891387939453, 'ave_value': -18.86340236365837, 'soft_opc': nan} step=15824




2022-04-22 06:50.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:50.16 [info     ] FQE_20220422064850: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.0001360981963401617, 'time_algorithm_update': 0.004792891962583675, 'loss': 0.45235221654577396, 'time_step': 0.004992315242456836, 'init_value': -19.332015991210938, 'ave_value': -18.944573882024166, 'soft_opc': nan} step=16168




2022-04-22 06:50.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:50.18 [info     ] FQE_20220422064850: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00013978467431179312, 'time_algorithm_update': 0.004858121622440426, 'loss': 0.4499164880821915, 'time_step': 0.00506092296090237, 'init_value': -19.169078826904297, 'ave_value': -18.844566724942744, 'soft_opc': nan} step=16512




2022-04-22 06:50.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:50.20 [info     ] FQE_20220422064850: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00013919833094574685, 'time_algorithm_update': 0.004847727542699769, 'loss': 0.4539393563023877, 'time_step': 0.005050658486610235, 'init_value': -19.251178741455078, 'ave_value': -19.106228460733956, 'soft_opc': nan} step=16856




2022-04-22 06:50.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 06:50.22 [info     ] FQE_20220422064850: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.0001353621482849121, 'time_algorithm_update': 0.004418760538101196, 'loss': 0.4610145379177316, 'time_step': 0.004611078389855318, 'init_value': -19.62482452392578, 'ave_value': -19.39449397722922, 'soft_opc': nan} step=17200




2022-04-22 06:50.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422064850/model_17200.pt
search iteration:  27
using hyper params:  [0.005710941525871375, 0.002081210419391545, 6.968638745149185e-05, 5]
2022-04-22 06:50.22 [debug    ] RoundIterator is selected.
2022-04-22 06:50.22 [info     ] Directory is created at d3rlpy_logs/CQL_20220422065022
2022-04-22 06:50.22 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 06:50.22 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 06:50.22 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422065022/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.005710941525871375, 'actor_optim_factory': {'optim

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:50.36 [info     ] CQL_20220422065022: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.00035360782821743477, 'time_algorithm_update': 0.04014871299611351, 'temp_loss': 4.916837861083146, 'temp': 0.9876365265405247, 'alpha_loss': -17.73021344091162, 'alpha': 1.017719796627243, 'critic_loss': 131.4192436967971, 'actor_loss': 1.3156671792570223, 'time_step': 0.04058607189641523, 'td_error': 1.2919768187151919, 'init_value': -6.25044059753418, 'ave_value': -5.844076616631802} step=346
2022-04-22 06:50.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:50.51 [info     ] CQL_20220422065022: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.00035420938723349155, 'time_algorithm_update': 0.039945541089669814, 'temp_loss': 4.863716464511232, 'temp': 0.9640326070992243, 'alpha_loss': -18.366043112870585, 'alpha': 1.0541841205144893, 'critic_loss': 122.37577180366296, 'actor_loss': 7.373710544123126, 'time_step': 0.040387919872482386, 'td_error': 1.3811644781318604, 'init_value': -10.445364952087402, 'ave_value': -9.901931932776277} step=692
2022-04-22 06:50.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:51.05 [info     ] CQL_20220422065022: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.00035155784187978405, 'time_algorithm_update': 0.04022682057639767, 'temp_loss': 4.7517019172624355, 'temp': 0.9414224345560018, 'alpha_loss': -19.026366978022406, 'alpha': 1.0924256995234187, 'critic_loss': 215.30674002763163, 'actor_loss': 12.517185161568525, 'time_step': 0.04066229142205564, 'td_error': 1.5430092794651111, 'init_value': -15.168067932128906, 'ave_value': -14.369397586688661} step=1038
2022-04-22 06:51.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:51.20 [info     ] CQL_20220422065022: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.0003537415079987807, 'time_algorithm_update': 0.03950600954838571, 'temp_loss': 4.640514005815363, 'temp': 0.9195728121120805, 'alpha_loss': -19.73875458667733, 'alpha': 1.132588959153677, 'critic_loss': 345.67695820538296, 'actor_loss': 16.598543139551417, 'time_step': 0.03994380600879647, 'td_error': 1.6393003760059695, 'init_value': -17.87742805480957, 'ave_value': -17.043806876837785} step=1384
2022-04-22 06:51.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:51.34 [info     ] CQL_20220422065022: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.0003498200047222865, 'time_algorithm_update': 0.040716194003992685, 'temp_loss': 4.535895054050953, 'temp': 0.8984038828080789, 'alpha_loss': -20.468139857915094, 'alpha': 1.1746894796459662, 'critic_loss': 510.2148424269836, 'actor_loss': 19.213104727640317, 'time_step': 0.04115186536932267, 'td_error': 1.7068301022055121, 'init_value': -19.7164249420166, 'ave_value': -18.909413708686376} step=1730
2022-04-22 06:51.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:51.49 [info     ] CQL_20220422065022: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.0003509280309511747, 'time_algorithm_update': 0.03982948223290416, 'temp_loss': 4.431367355964087, 'temp': 0.8778571210844668, 'alpha_loss': -21.2336647000616, 'alpha': 1.218757477109832, 'critic_loss': 707.3590936385139, 'actor_loss': 19.889794724525053, 'time_step': 0.04026626713703133, 'td_error': 1.6821744025581573, 'init_value': -19.780858993530273, 'ave_value': -19.092115627051868} step=2076
2022-04-22 06:51.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:52.04 [info     ] CQL_20220422065022: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.00035711864515536096, 'time_algorithm_update': 0.04108967533001321, 'temp_loss': 4.332211683251265, 'temp': 0.8578890837685911, 'alpha_loss': -22.03028188275464, 'alpha': 1.2648297003927949, 'critic_loss': 934.2261527177226, 'actor_loss': 18.413712948043912, 'time_step': 0.04153351080899983, 'td_error': 1.5698612643154886, 'init_value': -17.263029098510742, 'ave_value': -16.726245117051842} step=2422
2022-04-22 06:52.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:52.19 [info     ] CQL_20220422065022: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.0003607342008910427, 'time_algorithm_update': 0.04069024565591978, 'temp_loss': 4.233383443314216, 'temp': 0.8384639948089688, 'alpha_loss': -22.8520168679298, 'alpha': 1.3129240598292709, 'critic_loss': 1193.712552285608, 'actor_loss': 14.584668352424753, 'time_step': 0.04113244183490731, 'td_error': 1.4226049679220987, 'init_value': -12.759079933166504, 'ave_value': -12.424490423595921} step=2768
2022-04-22 06:52.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:52.33 [info     ] CQL_20220422065022: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.00034824203204557384, 'time_algorithm_update': 0.04064835633845688, 'temp_loss': 4.138982392459935, 'temp': 0.8195472032348544, 'alpha_loss': -23.710087368254026, 'alpha': 1.3630887066697799, 'critic_loss': 1487.4473520620709, 'actor_loss': 8.994164663932226, 'time_step': 0.04108419308083595, 'td_error': 1.321619206834635, 'init_value': -7.7454142570495605, 'ave_value': -7.608753762848855} step=3114
2022-04-22 06:52.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:52.48 [info     ] CQL_20220422065022: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.0003603200691972854, 'time_algorithm_update': 0.040339045441908646, 'temp_loss': 4.046653388552583, 'temp': 0.8011073506291891, 'alpha_loss': -24.613747695966953, 'alpha': 1.4153746373391565, 'critic_loss': 1797.4452408807126, 'actor_loss': 5.151402884136045, 'time_step': 0.04078662326570191, 'td_error': 1.2999748454573727, 'init_value': -5.842546463012695, 'ave_value': -5.781063278120465} step=3460
2022-04-22 06:52.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:53.03 [info     ] CQL_20220422065022: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.0003584244347721166, 'time_algorithm_update': 0.03999187009182969, 'temp_loss': 3.9558999097416168, 'temp': 0.7831264200238134, 'alpha_loss': -25.561382089736146, 'alpha': 1.4698555366152284, 'critic_loss': 2092.068495910292, 'actor_loss': 4.1587979490357325, 'time_step': 0.04043320906644612, 'td_error': 1.2976622774534803, 'init_value': -5.5012617111206055, 'ave_value': -5.45886108946495} step=3806
2022-04-22 06:53.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:53.17 [info     ] CQL_20220422065022: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.00035264312876442266, 'time_algorithm_update': 0.0397997860274563, 'temp_loss': 3.8666249630768177, 'temp': 0.7655830367796683, 'alpha_loss': -26.548136898547927, 'alpha': 1.526590219811897, 'critic_loss': 2368.5526722814307, 'actor_loss': 4.000226808421185, 'time_step': 0.04023221531355312, 'td_error': 1.2983207585809058, 'init_value': -5.417944431304932, 'ave_value': -5.384566451226891} step=4152
2022-04-22 06:53.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:53.32 [info     ] CQL_20220422065022: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.0003475798347781848, 'time_algorithm_update': 0.040014806510396086, 'temp_loss': 3.7804728339862272, 'temp': 0.7484607171116536, 'alpha_loss': -27.572908506228057, 'alpha': 1.5856372231692937, 'critic_loss': 2642.6853972859467, 'actor_loss': 4.004969126227274, 'time_step': 0.040448589132011284, 'td_error': 1.300652389697148, 'init_value': -5.481645584106445, 'ave_value': -5.455026300556008} step=4498
2022-04-22 06:53.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:53.46 [info     ] CQL_20220422065022: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.000350541462098932, 'time_algorithm_update': 0.04005233263004722, 'temp_loss': 3.695621548360483, 'temp': 0.731740470221966, 'alpha_loss': -28.641511856476008, 'alpha': 1.6470632063860149, 'critic_loss': 2914.9672893899024, 'actor_loss': 4.090292878233629, 'time_step': 0.040479832301939155, 'td_error': 1.3019678564719896, 'init_value': -5.481975555419922, 'ave_value': -5.460140244188666} step=4844
2022-04-22 06:53.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:54.01 [info     ] CQL_20220422065022: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.00035368086974744853, 'time_algorithm_update': 0.04003796412076564, 'temp_loss': 3.6130269118127107, 'temp': 0.7154093946335632, 'alpha_loss': -29.755194068644087, 'alpha': 1.7109559571122848, 'critic_loss': 3194.786289457641, 'actor_loss': 4.249287351707503, 'time_step': 0.04047489717516596, 'td_error': 1.3062065080322998, 'init_value': -5.684345245361328, 'ave_value': -5.664759414260452} step=5190
2022-04-22 06:54.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:54.15 [info     ] CQL_20220422065022: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.00035855398012723535, 'time_algorithm_update': 0.03973515529853071, 'temp_loss': 3.5332130173038196, 'temp': 0.6994532813571092, 'alpha_loss': -30.909383828929393, 'alpha': 1.777381502479487, 'critic_loss': 3468.4316808446983, 'actor_loss': 4.413796098246051, 'time_step': 0.04017649771850233, 'td_error': 1.3100320317994205, 'init_value': -5.885227680206299, 'ave_value': -5.87082174182673} step=5536
2022-04-22 06:54.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:54.30 [info     ] CQL_20220422065022: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.00034453414079081804, 'time_algorithm_update': 0.039945084235571714, 'temp_loss': 3.454203687651309, 'temp': 0.6838602382323645, 'alpha_loss': -32.106854979013434, 'alpha': 1.846427652188119, 'critic_loss': 3764.2666558943733, 'actor_loss': 4.643035439397559, 'time_step': 0.04037242679926702, 'td_error': 1.3133231920392288, 'init_value': -6.034357070922852, 'ave_value': -6.0212940132861} step=5882
2022-04-22 06:54.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:54.44 [info     ] CQL_20220422065022: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.00033996491073873004, 'time_algorithm_update': 0.039973372668889215, 'temp_loss': 3.3766482530990776, 'temp': 0.6686233960824206, 'alpha_loss': -33.35700720858712, 'alpha': 1.918195045407797, 'critic_loss': 4030.0843477635026, 'actor_loss': 4.86222065390879, 'time_step': 0.04039384312712388, 'td_error': 1.3170389757420475, 'init_value': -6.205178737640381, 'ave_value': -6.194221848749674} step=6228
2022-04-22 06:54.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:54.59 [info     ] CQL_20220422065022: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.00034680394078954796, 'time_algorithm_update': 0.04010217727264228, 'temp_loss': 3.301448395486512, 'temp': 0.6537316689601523, 'alpha_loss': -34.64994643327129, 'alpha': 1.9927814764783562, 'critic_loss': 4298.712518063584, 'actor_loss': 5.129801149313161, 'time_step': 0.04052771929371564, 'td_error': 1.3220220901557396, 'init_value': -6.461966037750244, 'ave_value': -6.453307634826631} step=6574
2022-04-22 06:54.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:55.13 [info     ] CQL_20220422065022: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.0003537463314960458, 'time_algorithm_update': 0.04082416936841314, 'temp_loss': 3.22786068365064, 'temp': 0.6391750181341447, 'alpha_loss': -36.002087664742, 'alpha': 2.0702890343748765, 'critic_loss': 4583.027149001987, 'actor_loss': 5.441885525091535, 'time_step': 0.04126227659986198, 'td_error': 1.329059699054635, 'init_value': -6.831512451171875, 'ave_value': -6.823088440549153} step=6920
2022-04-22 06:55.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:55.27 [info     ] CQL_20220422065022: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.00034913644625272363, 'time_algorithm_update': 0.03874330437941358, 'temp_loss': 3.155885697100204, 'temp': 0.6249458174829539, 'alpha_loss': -37.40331436995137, 'alpha': 2.1508370045292584, 'critic_loss': 4851.39299866781, 'actor_loss': 5.756411534513353, 'time_step': 0.03917216152125011, 'td_error': 1.3342657393870792, 'init_value': -7.080124855041504, 'ave_value': -7.073034053619804} step=7266
2022-04-22 06:55.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:55.40 [info     ] CQL_20220422065022: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.0003510320806778924, 'time_algorithm_update': 0.03570634919094902, 'temp_loss': 3.086140784914094, 'temp': 0.6110333417881431, 'alpha_loss': -38.85895582430624, 'alpha': 2.234522437084617, 'critic_loss': 5101.157597712699, 'actor_loss': 6.105106656951023, 'time_step': 0.036141556811470514, 'td_error': 1.3413699047475465, 'init_value': -7.418570041656494, 'ave_value': -7.412102924562167} step=7612
2022-04-22 06:55.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:55.53 [info     ] CQL_20220422065022: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.0003446381905175358, 'time_algorithm_update': 0.035363446081304824, 'temp_loss': 3.017172969834653, 'temp': 0.5974316741689781, 'alpha_loss': -40.36930865910701, 'alpha': 2.321480598063827, 'critic_loss': 5302.296822784953, 'actor_loss': 6.4721701655084685, 'time_step': 0.035789378805656656, 'td_error': 1.3493994451392786, 'init_value': -7.8018903732299805, 'ave_value': -7.795554928442718} step=7958
2022-04-22 06:55.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:56.06 [info     ] CQL_20220422065022: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.0003486279118267787, 'time_algorithm_update': 0.03604608257381902, 'temp_loss': 2.9499175293597184, 'temp': 0.584135019882566, 'alpha_loss': -41.94167075405231, 'alpha': 2.4118212034247515, 'critic_loss': 5528.785950765445, 'actor_loss': 6.889002069572493, 'time_step': 0.0364760305151085, 'td_error': 1.3584762590467614, 'init_value': -8.230709075927734, 'ave_value': -8.223621137970824} step=8304
2022-04-22 06:56.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:56.19 [info     ] CQL_20220422065022: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.00035022724570566517, 'time_algorithm_update': 0.03612969376448262, 'temp_loss': 2.883677564604434, 'temp': 0.5711370766851943, 'alpha_loss': -43.57244982195727, 'alpha': 2.505689911070587, 'critic_loss': 5774.219022364975, 'actor_loss': 7.298564286590311, 'time_step': 0.036567687299210214, 'td_error': 1.3673943004209128, 'init_value': -8.62215518951416, 'ave_value': -8.614930007739224} step=8650
2022-04-22 06:56.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:56.32 [info     ] CQL_20220422065022: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.0003433241320483257, 'time_algorithm_update': 0.03584388087939665, 'temp_loss': 2.81999571199362, 'temp': 0.558426973619902, 'alpha_loss': -45.2702695151974, 'alpha': 2.6032146814930646, 'critic_loss': 6008.435847464324, 'actor_loss': 7.706416765389415, 'time_step': 0.03627049371686285, 'td_error': 1.3761316932610637, 'init_value': -8.976268768310547, 'ave_value': -8.971634212654877} step=8996
2022-04-22 06:56.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:56.45 [info     ] CQL_20220422065022: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.00032589959271381357, 'time_algorithm_update': 0.03340910555999403, 'temp_loss': 2.7579417711048455, 'temp': 0.5459974559745348, 'alpha_loss': -47.0266918667479, 'alpha': 2.704527754315062, 'critic_loss': 6246.106165891438, 'actor_loss': 8.15304829343895, 'time_step': 0.03381535290293611, 'td_error': 1.3849003047831452, 'init_value': -9.33377742767334, 'ave_value': -9.330201071480994} step=9342
2022-04-22 06:56.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:56.57 [info     ] CQL_20220422065022: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.00034865685281036907, 'time_algorithm_update': 0.035048362147601354, 'temp_loss': 2.6965322108627054, 'temp': 0.5338450696082474, 'alpha_loss': -48.859914845813904, 'alpha': 2.8097929582430448, 'critic_loss': 6436.87967536353, 'actor_loss': 8.560367485002287, 'time_step': 0.0354784334326066, 'td_error': 1.3946773693369932, 'init_value': -9.726537704467773, 'ave_value': -9.72364111891738} step=9688
2022-04-22 06:56.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:57.10 [info     ] CQL_20220422065022: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.00034136648122974904, 'time_algorithm_update': 0.034990399559109195, 'temp_loss': 2.6364887704738993, 'temp': 0.5219635160672182, 'alpha_loss': -50.75311524054908, 'alpha': 2.9191586406244707, 'critic_loss': 6524.830090825957, 'actor_loss': 9.01103440323317, 'time_step': 0.03541586578236839, 'td_error': 1.4066420276502434, 'init_value': -10.19944953918457, 'ave_value': -10.19601152943683} step=10034
2022-04-22 06:57.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:57.23 [info     ] CQL_20220422065022: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.00034129964133907607, 'time_algorithm_update': 0.03604239259841125, 'temp_loss': 2.5770450527268336, 'temp': 0.5103499904877877, 'alpha_loss': -52.74064393677463, 'alpha': 3.0327824381734594, 'critic_loss': 6729.526838534141, 'actor_loss': 9.486862345237952, 'time_step': 0.036463251003640236, 'td_error': 1.417401378494054, 'init_value': -10.600944519042969, 'ave_value': -10.598449866926574} step=10380
2022-04-22 06:57.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:57.37 [info     ] CQL_20220422065022: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.0003425820025405443, 'time_algorithm_update': 0.03833023523319663, 'temp_loss': 2.520067368628662, 'temp': 0.49899332035828187, 'alpha_loss': -54.790348736536984, 'alpha': 3.150836944580078, 'critic_loss': 6785.506769610278, 'actor_loss': 9.886282678284397, 'time_step': 0.03875337239634784, 'td_error': 1.4317520627616935, 'init_value': -11.145086288452148, 'ave_value': -11.140349145837732} step=10726
2022-04-22 06:57.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:57.52 [info     ] CQL_20220422065022: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.0003586986850451872, 'time_algorithm_update': 0.040512277900828106, 'temp_loss': 2.4641828364719545, 'temp': 0.48789024990418056, 'alpha_loss': -56.922100353792224, 'alpha': 3.273484482930575, 'critic_loss': 6826.627482331557, 'actor_loss': 10.345261810831941, 'time_step': 0.04095059943336972, 'td_error': 1.4429965611545839, 'init_value': -11.533021926879883, 'ave_value': -11.529209554065398} step=11072
2022-04-22 06:57.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:58.07 [info     ] CQL_20220422065022: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.0003513001293116222, 'time_algorithm_update': 0.0410282232857853, 'temp_loss': 2.409212131720747, 'temp': 0.47703249313238727, 'alpha_loss': -59.130542247970666, 'alpha': 3.400894105089882, 'critic_loss': 6900.758639473446, 'actor_loss': 10.793736490900116, 'time_step': 0.041460028962592856, 'td_error': 1.4528981753724335, 'init_value': -11.855005264282227, 'ave_value': -11.853796445503795} step=11418
2022-04-22 06:58.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:58.21 [info     ] CQL_20220422065022: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.0003617712528030307, 'time_algorithm_update': 0.0406767998127579, 'temp_loss': 2.35549516760545, 'temp': 0.4664176909220701, 'alpha_loss': -61.44460965856651, 'alpha': 3.533278518329466, 'critic_loss': 6935.158778901734, 'actor_loss': 11.290512162136894, 'time_step': 0.04111843508792061, 'td_error': 1.4693560582641172, 'init_value': -12.420202255249023, 'ave_value': -12.41677791003991} step=11764
2022-04-22 06:58.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:58.36 [info     ] CQL_20220422065022: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.0003579283036248532, 'time_algorithm_update': 0.040860586083693314, 'temp_loss': 2.303619605268357, 'temp': 0.4560392825417436, 'alpha_loss': -63.838609386730745, 'alpha': 3.670831084251404, 'critic_loss': 6945.8611982929915, 'actor_loss': 11.711723583971144, 'time_step': 0.0412964359184221, 'td_error': 1.4791397509504718, 'init_value': -12.718536376953125, 'ave_value': -12.718184256157981} step=12110
2022-04-22 06:58.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:58.51 [info     ] CQL_20220422065022: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.00036900374241647006, 'time_algorithm_update': 0.04021907748514517, 'temp_loss': 2.2523401961850293, 'temp': 0.44588928518956794, 'alpha_loss': -66.31024701333459, 'alpha': 3.813718809557788, 'critic_loss': 6841.136566338511, 'actor_loss': 12.182351784899055, 'time_step': 0.04067142023516528, 'td_error': 1.4982036846085924, 'init_value': -13.342246055603027, 'ave_value': -13.338891624172133} step=12456
2022-04-22 06:58.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:59.05 [info     ] CQL_20220422065022: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.00036070939433367955, 'time_algorithm_update': 0.03967212597069713, 'temp_loss': 2.201904675864071, 'temp': 0.4359668311528388, 'alpha_loss': -68.90008635327995, 'alpha': 3.962157729044126, 'critic_loss': 6860.7163368181, 'actor_loss': 12.646645554228325, 'time_step': 0.040116941997770626, 'td_error': 1.5068495199516532, 'init_value': -13.582076072692871, 'ave_value': -13.582672839212215} step=12802
2022-04-22 06:59.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:59.20 [info     ] CQL_20220422065022: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.00036511462547875553, 'time_algorithm_update': 0.04028630739002559, 'temp_loss': 2.1530610233373038, 'temp': 0.4262660492879118, 'alpha_loss': -71.57781259172914, 'alpha': 4.116386685068208, 'critic_loss': 6368.276902038927, 'actor_loss': 12.981896728449474, 'time_step': 0.040733232663546, 'td_error': 1.5235980816291859, 'init_value': -14.097452163696289, 'ave_value': -14.095992163291413} step=13148
2022-04-22 06:59.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:59.35 [info     ] CQL_20220422065022: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.00035088875390201633, 'time_algorithm_update': 0.04009014540325011, 'temp_loss': 2.104821613758286, 'temp': 0.41678076405401177, 'alpha_loss': -74.36009353020287, 'alpha': 4.2766073508069695, 'critic_loss': 6144.698994366419, 'actor_loss': 13.490898215012743, 'time_step': 0.04052330579371811, 'td_error': 1.5404511209429625, 'init_value': -14.590961456298828, 'ave_value': -14.589205627477583} step=13494
2022-04-22 06:59.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 06:59.49 [info     ] CQL_20220422065022: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.0003516839418797135, 'time_algorithm_update': 0.04044824459649235, 'temp_loss': 2.057705029586836, 'temp': 0.40750814026835336, 'alpha_loss': -77.26567465721527, 'alpha': 4.443073239629668, 'critic_loss': 5971.789213500271, 'actor_loss': 14.02739034498358, 'time_step': 0.04088138638204233, 'td_error': 1.5593735913591849, 'init_value': -15.141059875488281, 'ave_value': -15.137392599631283} step=13840
2022-04-22 06:59.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:00.04 [info     ] CQL_20220422065022: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.00035496460909099247, 'time_algorithm_update': 0.04046897598773758, 'temp_loss': 2.01213036727354, 'temp': 0.3984418199586041, 'alpha_loss': -80.2691471783412, 'alpha': 4.616029480289172, 'critic_loss': 6165.95337196306, 'actor_loss': 14.630506234361945, 'time_step': 0.040905078711537264, 'td_error': 1.5789301413071517, 'init_value': -15.6780424118042, 'ave_value': -15.674042168586046} step=14186
2022-04-22 07:00.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:00.18 [info     ] CQL_20220422065022: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.00035628624734161907, 'time_algorithm_update': 0.03984193099027424, 'temp_loss': 1.9679201007578415, 'temp': 0.3895751521249727, 'alpha_loss': -83.39652770378686, 'alpha': 4.795724167300097, 'critic_loss': 6297.309257022218, 'actor_loss': 15.14497683365221, 'time_step': 0.040279799114072944, 'td_error': 1.597464580296213, 'init_value': -16.176395416259766, 'ave_value': -16.17213664880122} step=14532
2022-04-22 07:00.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:00.33 [info     ] CQL_20220422065022: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.00035273959870972386, 'time_algorithm_update': 0.040254843717365596, 'temp_loss': 1.9240517178711865, 'temp': 0.3809046178539364, 'alpha_loss': -86.63049411222424, 'alpha': 4.982381415504941, 'critic_loss': 6301.519212314848, 'actor_loss': 15.589650873503933, 'time_step': 0.04068813227504664, 'td_error': 1.6116859168234252, 'init_value': -16.526845932006836, 'ave_value': -16.52577699649498} step=14878
2022-04-22 07:00.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:00.48 [info     ] CQL_20220422065022: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.00035448639379071363, 'time_algorithm_update': 0.04031441873208636, 'temp_loss': 1.8812188433773944, 'temp': 0.3724286412400317, 'alpha_loss': -90.01109686614461, 'alpha': 5.176313641443418, 'critic_loss': 6164.045799652276, 'actor_loss': 16.033591978811803, 'time_step': 0.04075485295643007, 'td_error': 1.6260163237629797, 'init_value': -16.878753662109375, 'ave_value': -16.880380882816667} step=15224
2022-04-22 07:00.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:01.02 [info     ] CQL_20220422065022: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.0003531916293105638, 'time_algorithm_update': 0.04033432530529926, 'temp_loss': 1.8389837645381861, 'temp': 0.36414102503674567, 'alpha_loss': -93.53505510539678, 'alpha': 5.377830745167815, 'critic_loss': 5747.430359239524, 'actor_loss': 16.408048260418667, 'time_step': 0.040769402691394604, 'td_error': 1.6427574166426535, 'init_value': -17.299047470092773, 'ave_value': -17.30088871991595} step=15570
2022-04-22 07:01.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:01.17 [info     ] CQL_20220422065022: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.0003516102112786618, 'time_algorithm_update': 0.03994642654595348, 'temp_loss': 1.7981297442678772, 'temp': 0.35603876084606084, 'alpha_loss': -97.1441559102494, 'alpha': 5.587175322405865, 'critic_loss': 5115.148424799043, 'actor_loss': 16.724793533369297, 'time_step': 0.040383832303085766, 'td_error': 1.6544526716825136, 'init_value': -17.57936668395996, 'ave_value': -17.582668684285192} step=15916
2022-04-22 07:01.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:01.31 [info     ] CQL_20220422065022: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.0003543299746651181, 'time_algorithm_update': 0.04016748398025601, 'temp_loss': 1.7580640822476734, 'temp': 0.3481165386004255, 'alpha_loss': -100.93411160066638, 'alpha': 5.804648961634994, 'critic_loss': 4499.901259934971, 'actor_loss': 17.120267912142538, 'time_step': 0.040603735543399876, 'td_error': 1.6694416121683073, 'init_value': -17.938024520874023, 'ave_value': -17.94191621669156} step=16262
2022-04-22 07:01.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:01.46 [info     ] CQL_20220422065022: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.00035053250417543976, 'time_algorithm_update': 0.04024356982611507, 'temp_loss': 1.7194231848496233, 'temp': 0.3403693106822196, 'alpha_loss': -104.837977547177, 'alpha': 6.030604720804733, 'critic_loss': 4440.006058356666, 'actor_loss': 17.74107217513068, 'time_step': 0.04067405248652993, 'td_error': 1.697410814985461, 'init_value': -18.63100814819336, 'ave_value': -18.630262528532995} step=16608
2022-04-22 07:01.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:02.01 [info     ] CQL_20220422065022: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.000356543270838743, 'time_algorithm_update': 0.040490631423244584, 'temp_loss': 1.6807988320471923, 'temp': 0.33279470470599354, 'alpha_loss': -108.93179634402942, 'alpha': 6.265284784956474, 'critic_loss': 4321.047275785766, 'actor_loss': 18.245387612050667, 'time_step': 0.040926321393492594, 'td_error': 1.7210727039537408, 'init_value': -19.18749237060547, 'ave_value': -19.18421730079488} step=16954
2022-04-22 07:02.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:02.15 [info     ] CQL_20220422065022: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.00035147653149731587, 'time_algorithm_update': 0.04019374723379322, 'temp_loss': 1.643512152178439, 'temp': 0.32538955843862083, 'alpha_loss': -113.17869861139728, 'alpha': 6.509184153782839, 'critic_loss': 4543.82084029534, 'actor_loss': 18.94238664373497, 'time_step': 0.04062560872535485, 'td_error': 1.7511733989017502, 'init_value': -19.873620986938477, 'ave_value': -19.868693638707068} step=17300
2022-04-22 07:02.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422065022/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.519

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 07:02.16 [info     ] FQE_20220422070215: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00012643940477486117, 'time_algorithm_update': 0.004822871771203466, 'loss': 0.007590344607695399, 'time_step': 0.005009549209870488, 'init_value': -0.059994906187057495, 'ave_value': -0.006884118039496579, 'soft_opc': nan} step=166




2022-04-22 07:02.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.17 [info     ] FQE_20220422070215: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00012447173336902297, 'time_algorithm_update': 0.004704216876661921, 'loss': 0.004796825979797013, 'time_step': 0.004888666681496494, 'init_value': -0.16208210587501526, 'ave_value': -0.05387935707731559, 'soft_opc': nan} step=332




2022-04-22 07:02.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.18 [info     ] FQE_20220422070215: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00013151800776102455, 'time_algorithm_update': 0.004901739488165063, 'loss': 0.003777644391513867, 'time_step': 0.005091033786176199, 'init_value': -0.19214566051959991, 'ave_value': -0.06690125621452525, 'soft_opc': nan} step=498




2022-04-22 07:02.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.19 [info     ] FQE_20220422070215: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00012341177607157143, 'time_algorithm_update': 0.004790573235017708, 'loss': 0.0035778734410821223, 'time_step': 0.004966794726360275, 'init_value': -0.2115517258644104, 'ave_value': -0.0691288781545318, 'soft_opc': nan} step=664




2022-04-22 07:02.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.20 [info     ] FQE_20220422070215: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.0001286196421427899, 'time_algorithm_update': 0.004863371332007718, 'loss': 0.003179112187704543, 'time_step': 0.005051744989601962, 'init_value': -0.2340085357427597, 'ave_value': -0.0787821800406049, 'soft_opc': nan} step=830




2022-04-22 07:02.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.21 [info     ] FQE_20220422070215: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00012139096317521061, 'time_algorithm_update': 0.003928804972085608, 'loss': 0.0027427452368134386, 'time_step': 0.004103637603392084, 'init_value': -0.2329055666923523, 'ave_value': -0.07414648742037448, 'soft_opc': nan} step=996




2022-04-22 07:02.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.22 [info     ] FQE_20220422070215: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00012603868921119045, 'time_algorithm_update': 0.00498387325240905, 'loss': 0.00262884884029178, 'time_step': 0.005167020372597568, 'init_value': -0.20363947749137878, 'ave_value': -0.0468210870509145, 'soft_opc': nan} step=1162




2022-04-22 07:02.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.23 [info     ] FQE_20220422070215: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00012355827423463385, 'time_algorithm_update': 0.004810494112681194, 'loss': 0.0023546627247902417, 'time_step': 0.004993688629334231, 'init_value': -0.20174360275268555, 'ave_value': -0.05675070146853859, 'soft_opc': nan} step=1328




2022-04-22 07:02.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.24 [info     ] FQE_20220422070215: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00012828068560864552, 'time_algorithm_update': 0.004928142191415809, 'loss': 0.002133583819706846, 'time_step': 0.0051148081400308265, 'init_value': -0.19112634658813477, 'ave_value': -0.0555049359873944, 'soft_opc': nan} step=1494




2022-04-22 07:02.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.24 [info     ] FQE_20220422070215: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.0001238067466092397, 'time_algorithm_update': 0.004713424717087343, 'loss': 0.0021722193506507046, 'time_step': 0.004895509007465409, 'init_value': -0.12824979424476624, 'ave_value': -0.014702067863162574, 'soft_opc': nan} step=1660




2022-04-22 07:02.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.25 [info     ] FQE_20220422070215: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.000132467373307929, 'time_algorithm_update': 0.004952333059655614, 'loss': 0.0021454562862257822, 'time_step': 0.005148663578263248, 'init_value': -0.13941708207130432, 'ave_value': -0.036094110169624154, 'soft_opc': nan} step=1826




2022-04-22 07:02.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.26 [info     ] FQE_20220422070215: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00012645951236586972, 'time_algorithm_update': 0.004646388881177787, 'loss': 0.0024931355399158165, 'time_step': 0.00483238984303302, 'init_value': -0.11408282816410065, 'ave_value': -0.020466169505170335, 'soft_opc': nan} step=1992




2022-04-22 07:02.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.27 [info     ] FQE_20220422070215: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.0001241112329873694, 'time_algorithm_update': 0.004556698971484081, 'loss': 0.002729440435973355, 'time_step': 0.004740223827132259, 'init_value': -0.1204129308462143, 'ave_value': -0.03364096999520788, 'soft_opc': nan} step=2158




2022-04-22 07:02.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.28 [info     ] FQE_20220422070215: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.0001271604055381683, 'time_algorithm_update': 0.004778995571366276, 'loss': 0.0032801460562391677, 'time_step': 0.004965918609894902, 'init_value': -0.10050798952579498, 'ave_value': -0.028405397731038902, 'soft_opc': nan} step=2324




2022-04-22 07:02.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.29 [info     ] FQE_20220422070215: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00012764011520937266, 'time_algorithm_update': 0.004058850816933505, 'loss': 0.003518383987514437, 'time_step': 0.004247787487075989, 'init_value': -0.04539771378040314, 'ave_value': 0.013627277310467787, 'soft_opc': nan} step=2490




2022-04-22 07:02.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.30 [info     ] FQE_20220422070215: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00012582612324909992, 'time_algorithm_update': 0.004751456789223544, 'loss': 0.004131269719941172, 'time_step': 0.004935962608061641, 'init_value': -0.06886322796344757, 'ave_value': -0.016993423160333355, 'soft_opc': nan} step=2656




2022-04-22 07:02.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.31 [info     ] FQE_20220422070215: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00012850330536624035, 'time_algorithm_update': 0.004949862698474562, 'loss': 0.004751660118927248, 'time_step': 0.0051372568291353895, 'init_value': -0.090451680123806, 'ave_value': -0.0370207470304727, 'soft_opc': nan} step=2822




2022-04-22 07:02.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.32 [info     ] FQE_20220422070215: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00012783688234995646, 'time_algorithm_update': 0.004842090319438153, 'loss': 0.0050092961880061435, 'time_step': 0.005027980689542839, 'init_value': -0.09143011271953583, 'ave_value': -0.04141684461233986, 'soft_opc': nan} step=2988




2022-04-22 07:02.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.32 [info     ] FQE_20220422070215: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.0001253492860908968, 'time_algorithm_update': 0.00476013177848724, 'loss': 0.005789110757528344, 'time_step': 0.004945052675454013, 'init_value': -0.09999702870845795, 'ave_value': -0.04073031575561644, 'soft_opc': nan} step=3154




2022-04-22 07:02.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.33 [info     ] FQE_20220422070215: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00012012418494167098, 'time_algorithm_update': 0.004682473389499159, 'loss': 0.0061188873861907946, 'time_step': 0.004859675844031644, 'init_value': -0.10298570990562439, 'ave_value': -0.023699255487336233, 'soft_opc': nan} step=3320




2022-04-22 07:02.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.34 [info     ] FQE_20220422070215: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.0001253995550684182, 'time_algorithm_update': 0.004903527627508324, 'loss': 0.006902474382820718, 'time_step': 0.005090134689606816, 'init_value': -0.1124030351638794, 'ave_value': -0.035476514390829175, 'soft_opc': nan} step=3486




2022-04-22 07:02.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.35 [info     ] FQE_20220422070215: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00013090472623526332, 'time_algorithm_update': 0.004882541047521384, 'loss': 0.007526136594452806, 'time_step': 0.005074516836419163, 'init_value': -0.16336919367313385, 'ave_value': -0.08862773330949314, 'soft_opc': nan} step=3652




2022-04-22 07:02.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.36 [info     ] FQE_20220422070215: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00012552738189697266, 'time_algorithm_update': 0.004769899758947901, 'loss': 0.007994406031871057, 'time_step': 0.0049533255128975376, 'init_value': -0.21926160156726837, 'ave_value': -0.12170879065923326, 'soft_opc': nan} step=3818




2022-04-22 07:02.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.37 [info     ] FQE_20220422070215: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00012160352913730116, 'time_algorithm_update': 0.004071553069424917, 'loss': 0.008674663346553929, 'time_step': 0.0042500036308564335, 'init_value': -0.233187735080719, 'ave_value': -0.12112850391029104, 'soft_opc': nan} step=3984




2022-04-22 07:02.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.38 [info     ] FQE_20220422070215: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00012532343347388577, 'time_algorithm_update': 0.004914473338299487, 'loss': 0.009057327971375739, 'time_step': 0.005101795656135283, 'init_value': -0.2483416199684143, 'ave_value': -0.10798122547902503, 'soft_opc': nan} step=4150




2022-04-22 07:02.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.39 [info     ] FQE_20220422070215: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00012986344027231974, 'time_algorithm_update': 0.0047851571117539, 'loss': 0.009297768074313616, 'time_step': 0.004976914589663586, 'init_value': -0.38639765977859497, 'ave_value': -0.23333806722919953, 'soft_opc': nan} step=4316




2022-04-22 07:02.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.40 [info     ] FQE_20220422070215: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.0001261392271662333, 'time_algorithm_update': 0.00479884319994823, 'loss': 0.009981626992924588, 'time_step': 0.004984730697539915, 'init_value': -0.4173336625099182, 'ave_value': -0.25866336748171764, 'soft_opc': nan} step=4482




2022-04-22 07:02.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.40 [info     ] FQE_20220422070215: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001363467021160815, 'time_algorithm_update': 0.004818657794630671, 'loss': 0.010624203024115077, 'time_step': 0.005011694977082402, 'init_value': -0.5254124999046326, 'ave_value': -0.3308056435589721, 'soft_opc': nan} step=4648




2022-04-22 07:02.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.41 [info     ] FQE_20220422070215: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00012825052422213266, 'time_algorithm_update': 0.004793293504829866, 'loss': 0.01069468195975831, 'time_step': 0.004984663193484387, 'init_value': -0.6076748371124268, 'ave_value': -0.37319894802026654, 'soft_opc': nan} step=4814




2022-04-22 07:02.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.42 [info     ] FQE_20220422070215: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.0001296508743102292, 'time_algorithm_update': 0.00478151763778135, 'loss': 0.011561943621007187, 'time_step': 0.004975577434861517, 'init_value': -0.7190915942192078, 'ave_value': -0.46939801235121115, 'soft_opc': nan} step=4980




2022-04-22 07:02.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.43 [info     ] FQE_20220422070215: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00012511517628129707, 'time_algorithm_update': 0.0047251747315188485, 'loss': 0.011785256569778704, 'time_step': 0.004909248237150261, 'init_value': -0.7640150785446167, 'ave_value': -0.4932059226138098, 'soft_opc': nan} step=5146




2022-04-22 07:02.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.44 [info     ] FQE_20220422070215: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00012431230889745504, 'time_algorithm_update': 0.00441065897424537, 'loss': 0.012072398163888783, 'time_step': 0.004594634814434741, 'init_value': -0.8561957478523254, 'ave_value': -0.5548498449477929, 'soft_opc': nan} step=5312




2022-04-22 07:02.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.45 [info     ] FQE_20220422070215: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00012386276061276356, 'time_algorithm_update': 0.004384542086038245, 'loss': 0.012769662661138496, 'time_step': 0.004568585430283144, 'init_value': -0.8639232516288757, 'ave_value': -0.5291621427664215, 'soft_opc': nan} step=5478




2022-04-22 07:02.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.46 [info     ] FQE_20220422070215: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00012626274522528592, 'time_algorithm_update': 0.004696718181472227, 'loss': 0.013378167484609231, 'time_step': 0.004880708384226604, 'init_value': -0.9479385614395142, 'ave_value': -0.5867786624275886, 'soft_opc': nan} step=5644




2022-04-22 07:02.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.47 [info     ] FQE_20220422070215: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00013448962246079044, 'time_algorithm_update': 0.0050777153796460256, 'loss': 0.01386804967425786, 'time_step': 0.005271355789828013, 'init_value': -1.1151189804077148, 'ave_value': -0.7220056781931234, 'soft_opc': nan} step=5810




2022-04-22 07:02.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.48 [info     ] FQE_20220422070215: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00012664191694144742, 'time_algorithm_update': 0.00473513086158109, 'loss': 0.014332708965423125, 'time_step': 0.00492109878953681, 'init_value': -1.1081387996673584, 'ave_value': -0.6954860101663717, 'soft_opc': nan} step=5976




2022-04-22 07:02.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.49 [info     ] FQE_20220422070215: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00012895572616393306, 'time_algorithm_update': 0.0048397894365241724, 'loss': 0.01486867766740384, 'time_step': 0.005026159516300063, 'init_value': -1.2549879550933838, 'ave_value': -0.7995490236895847, 'soft_opc': nan} step=6142




2022-04-22 07:02.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.49 [info     ] FQE_20220422070215: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00012507352484277933, 'time_algorithm_update': 0.004724977964378265, 'loss': 0.014859620108990088, 'time_step': 0.004908162427235799, 'init_value': -1.3564566373825073, 'ave_value': -0.8639861287935091, 'soft_opc': nan} step=6308




2022-04-22 07:02.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.50 [info     ] FQE_20220422070215: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00012955895389419003, 'time_algorithm_update': 0.00467289786740958, 'loss': 0.013675861814864406, 'time_step': 0.004860052143234804, 'init_value': -1.450547218322754, 'ave_value': -0.9229376618339269, 'soft_opc': nan} step=6474




2022-04-22 07:02.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.51 [info     ] FQE_20220422070215: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00012646525739187217, 'time_algorithm_update': 0.004637336156454431, 'loss': 0.015180637457208842, 'time_step': 0.00482137806444283, 'init_value': -1.5299960374832153, 'ave_value': -0.9592563394725592, 'soft_opc': nan} step=6640




2022-04-22 07:02.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.52 [info     ] FQE_20220422070215: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00012635322938482445, 'time_algorithm_update': 0.00452139004167304, 'loss': 0.016843777495304527, 'time_step': 0.004706364080130336, 'init_value': -1.596270203590393, 'ave_value': -1.012377567646337, 'soft_opc': nan} step=6806




2022-04-22 07:02.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.53 [info     ] FQE_20220422070215: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00013517328055508165, 'time_algorithm_update': 0.004727189799389207, 'loss': 0.01787722895675092, 'time_step': 0.004924349037997694, 'init_value': -1.6548134088516235, 'ave_value': -1.0251519080771654, 'soft_opc': nan} step=6972




2022-04-22 07:02.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.54 [info     ] FQE_20220422070215: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.0001288724232868976, 'time_algorithm_update': 0.004819775202188147, 'loss': 0.017945476474187122, 'time_step': 0.005008135933473885, 'init_value': -1.5854463577270508, 'ave_value': -0.9321043245897099, 'soft_opc': nan} step=7138




2022-04-22 07:02.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.55 [info     ] FQE_20220422070215: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.0001291539295610175, 'time_algorithm_update': 0.004704155117632395, 'loss': 0.018624672485608045, 'time_step': 0.004890592701463814, 'init_value': -1.7394280433654785, 'ave_value': -1.0438082312510626, 'soft_opc': nan} step=7304




2022-04-22 07:02.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.56 [info     ] FQE_20220422070215: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00012583904955760543, 'time_algorithm_update': 0.004796700305249317, 'loss': 0.01883152826844872, 'time_step': 0.004981800734278667, 'init_value': -1.7302844524383545, 'ave_value': -1.0134278168733994, 'soft_opc': nan} step=7470




2022-04-22 07:02.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.56 [info     ] FQE_20220422070215: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00012277982321130224, 'time_algorithm_update': 0.004720647651028921, 'loss': 0.019422348186861813, 'time_step': 0.004901401967887419, 'init_value': -1.845597267150879, 'ave_value': -1.1055193473484266, 'soft_opc': nan} step=7636




2022-04-22 07:02.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.57 [info     ] FQE_20220422070215: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.0001271733318466738, 'time_algorithm_update': 0.00476588254951569, 'loss': 0.019732318774387466, 'time_step': 0.004951762865824872, 'init_value': -1.7889554500579834, 'ave_value': -1.0409816365118498, 'soft_opc': nan} step=7802




2022-04-22 07:02.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.58 [info     ] FQE_20220422070215: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00012560063097850387, 'time_algorithm_update': 0.004723952477236828, 'loss': 0.020519619130147958, 'time_step': 0.004915006189461214, 'init_value': -1.8226187229156494, 'ave_value': -1.0535447195121967, 'soft_opc': nan} step=7968




2022-04-22 07:02.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:02.59 [info     ] FQE_20220422070215: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00012647243867437523, 'time_algorithm_update': 0.004788856908499476, 'loss': 0.021223448632083024, 'time_step': 0.004972900252744376, 'init_value': -1.9120581150054932, 'ave_value': -1.1113929529682742, 'soft_opc': nan} step=8134




2022-04-22 07:02.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:03.00 [info     ] FQE_20220422070215: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00012766453156988304, 'time_algorithm_update': 0.004266503345535462, 'loss': 0.02157685687872076, 'time_step': 0.004452128008187535, 'init_value': -1.9687309265136719, 'ave_value': -1.1174804264847358, 'soft_opc': nan} step=8300




2022-04-22 07:03.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070215/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

start
[ 0.00000000e+00  7.95731469e+08 -4.38489108e-01  4.94000047e-02
 -1.56000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -7.49080829e-02  7.04145269e-02]
Read chunk # 40 out of 4999
torch.Size([44400, 6])
2022-04-22 07:03.00 [debug    ] RoundIterator is selected.
2022-04-22 07:03.00 [info     ] Directory is created at d3rlpy_logs/FQE_20220422070300
2022-04-22 07:03.00 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 07:03.00 [debug    ] Building models...
2022-04-22 07:03.00 [debug    ] Models have been built.
2022-04-22 07:03.00 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422070300/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 07:03.02 [info     ] FQE_20220422070300: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00013477649799613066, 'time_algorithm_update': 0.004902669163637383, 'loss': 0.02574469762594374, 'time_step': 0.005099755386973537, 'init_value': -1.120385766029358, 'ave_value': -1.110710270360515, 'soft_opc': nan} step=344




2022-04-22 07:03.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.04 [info     ] FQE_20220422070300: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00012826295786125716, 'time_algorithm_update': 0.004752674075060113, 'loss': 0.021756730077593305, 'time_step': 0.0049404092999391774, 'init_value': -1.871088981628418, 'ave_value': -1.851437964391064, 'soft_opc': nan} step=688




2022-04-22 07:03.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.06 [info     ] FQE_20220422070300: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.0001351958097413529, 'time_algorithm_update': 0.004956313343935235, 'loss': 0.02510123413738383, 'time_step': 0.005153448775757191, 'init_value': -2.7356832027435303, 'ave_value': -2.719428810678624, 'soft_opc': nan} step=1032




2022-04-22 07:03.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.08 [info     ] FQE_20220422070300: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00013127645780873853, 'time_algorithm_update': 0.004628887010175128, 'loss': 0.02680807361422583, 'time_step': 0.0048182059166043305, 'init_value': -3.1597299575805664, 'ave_value': -3.197941324396713, 'soft_opc': nan} step=1376




2022-04-22 07:03.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.10 [info     ] FQE_20220422070300: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.000134016192236612, 'time_algorithm_update': 0.004804349915925847, 'loss': 0.03337864158404341, 'time_step': 0.005002682985261429, 'init_value': -3.7956631183624268, 'ave_value': -3.954427022882947, 'soft_opc': nan} step=1720




2022-04-22 07:03.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.11 [info     ] FQE_20220422070300: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00012939475303472473, 'time_algorithm_update': 0.004725217126136602, 'loss': 0.03888508929806047, 'time_step': 0.004916529322779456, 'init_value': -4.078044891357422, 'ave_value': -4.39030530043282, 'soft_opc': nan} step=2064




2022-04-22 07:03.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.13 [info     ] FQE_20220422070300: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00013300083404363587, 'time_algorithm_update': 0.0047973456770874735, 'loss': 0.04709600824808572, 'time_step': 0.004991222259610198, 'init_value': -4.560927391052246, 'ave_value': -5.135288458982029, 'soft_opc': nan} step=2408




2022-04-22 07:03.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.15 [info     ] FQE_20220422070300: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00013276726700538812, 'time_algorithm_update': 0.004756912935611813, 'loss': 0.05650613935079512, 'time_step': 0.004952626865963603, 'init_value': -4.589385986328125, 'ave_value': -5.518594036539932, 'soft_opc': nan} step=2752




2022-04-22 07:03.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.17 [info     ] FQE_20220422070300: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00012516559556473132, 'time_algorithm_update': 0.004266066606654678, 'loss': 0.06434243229261144, 'time_step': 0.004449616337931434, 'init_value': -4.624672889709473, 'ave_value': -5.960151120497183, 'soft_opc': nan} step=3096




2022-04-22 07:03.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.19 [info     ] FQE_20220422070300: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.0001328545947407567, 'time_algorithm_update': 0.004857263592786567, 'loss': 0.07993706370173238, 'time_step': 0.005051987115726914, 'init_value': -4.615503311157227, 'ave_value': -6.482500962974285, 'soft_opc': nan} step=3440




2022-04-22 07:03.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.21 [info     ] FQE_20220422070300: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00013179487960283145, 'time_algorithm_update': 0.0048242190549539965, 'loss': 0.08752204776135121, 'time_step': 0.005017663157263467, 'init_value': -4.6434454917907715, 'ave_value': -6.9788217002255335, 'soft_opc': nan} step=3784




2022-04-22 07:03.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.22 [info     ] FQE_20220422070300: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00013263419617054074, 'time_algorithm_update': 0.004811665346456128, 'loss': 0.10671946602375355, 'time_step': 0.005007118679756342, 'init_value': -4.61330509185791, 'ave_value': -7.477872429058786, 'soft_opc': nan} step=4128




2022-04-22 07:03.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.24 [info     ] FQE_20220422070300: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.0001276218613912893, 'time_algorithm_update': 0.004429708386576453, 'loss': 0.11873281348041843, 'time_step': 0.004616173200829085, 'init_value': -4.609551429748535, 'ave_value': -7.899971127109015, 'soft_opc': nan} step=4472




2022-04-22 07:03.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.26 [info     ] FQE_20220422070300: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00013213310130806856, 'time_algorithm_update': 0.004804649325304253, 'loss': 0.13472026090549175, 'time_step': 0.004996624796889549, 'init_value': -4.726998329162598, 'ave_value': -8.538767484762824, 'soft_opc': nan} step=4816




2022-04-22 07:03.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.28 [info     ] FQE_20220422070300: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.0001367101835650067, 'time_algorithm_update': 0.004756330750709356, 'loss': 0.15865630123677643, 'time_step': 0.004950291195581126, 'init_value': -4.874804973602295, 'ave_value': -9.074331525919186, 'soft_opc': nan} step=5160




2022-04-22 07:03.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.30 [info     ] FQE_20220422070300: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.0001352235328319461, 'time_algorithm_update': 0.004838656547457673, 'loss': 0.17433894819818263, 'time_step': 0.00503658416659333, 'init_value': -5.128725051879883, 'ave_value': -9.67622166094559, 'soft_opc': nan} step=5504




2022-04-22 07:03.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.31 [info     ] FQE_20220422070300: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00012960406236870345, 'time_algorithm_update': 0.004506466000579124, 'loss': 0.18889865631692457, 'time_step': 0.004695863224739252, 'init_value': -4.807343482971191, 'ave_value': -9.740957029412609, 'soft_opc': nan} step=5848




2022-04-22 07:03.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.33 [info     ] FQE_20220422070300: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.0001333120257355446, 'time_algorithm_update': 0.004712815201559732, 'loss': 0.2105191004419223, 'time_step': 0.004908165266347486, 'init_value': -4.9812116622924805, 'ave_value': -10.353262950039493, 'soft_opc': nan} step=6192




2022-04-22 07:03.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.35 [info     ] FQE_20220422070300: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00012877444888270177, 'time_algorithm_update': 0.004773245301357535, 'loss': 0.23305185677938509, 'time_step': 0.004960930624673533, 'init_value': -5.248842239379883, 'ave_value': -10.978112657238972, 'soft_opc': nan} step=6536




2022-04-22 07:03.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.37 [info     ] FQE_20220422070300: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00013152250023775323, 'time_algorithm_update': 0.0048208860463874285, 'loss': 0.24791461595897238, 'time_step': 0.00501287953798161, 'init_value': -5.181009292602539, 'ave_value': -11.337915672573706, 'soft_opc': nan} step=6880




2022-04-22 07:03.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.39 [info     ] FQE_20220422070300: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00012600075366885164, 'time_algorithm_update': 0.004643291234970093, 'loss': 0.26900472637092654, 'time_step': 0.004829401886740396, 'init_value': -5.553182125091553, 'ave_value': -12.16579708880661, 'soft_opc': nan} step=7224




2022-04-22 07:03.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.41 [info     ] FQE_20220422070300: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00013594571934189906, 'time_algorithm_update': 0.0046720989914827565, 'loss': 0.2800380798045892, 'time_step': 0.004872532084930775, 'init_value': -5.613842964172363, 'ave_value': -12.54281454826831, 'soft_opc': nan} step=7568




2022-04-22 07:03.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.42 [info     ] FQE_20220422070300: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00013549106065617052, 'time_algorithm_update': 0.004993461592252864, 'loss': 0.2896185700976571, 'time_step': 0.005192539719648139, 'init_value': -5.5680060386657715, 'ave_value': -12.871835486470461, 'soft_opc': nan} step=7912




2022-04-22 07:03.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.44 [info     ] FQE_20220422070300: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00013298420018927995, 'time_algorithm_update': 0.004770320515299953, 'loss': 0.2986647490910146, 'time_step': 0.004965948504070903, 'init_value': -5.848428726196289, 'ave_value': -13.39387474261929, 'soft_opc': nan} step=8256




2022-04-22 07:03.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.46 [info     ] FQE_20220422070300: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.0001301418903262116, 'time_algorithm_update': 0.004775448593982431, 'loss': 0.31160703338790946, 'time_step': 0.0049652540406515434, 'init_value': -6.009958744049072, 'ave_value': -13.713790848727028, 'soft_opc': nan} step=8600




2022-04-22 07:03.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.48 [info     ] FQE_20220422070300: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00012897613436676735, 'time_algorithm_update': 0.004378439382065174, 'loss': 0.32583856608632, 'time_step': 0.004567181648210038, 'init_value': -6.071115970611572, 'ave_value': -13.941767696040548, 'soft_opc': nan} step=8944




2022-04-22 07:03.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.50 [info     ] FQE_20220422070300: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00013400856838669887, 'time_algorithm_update': 0.004830490711123445, 'loss': 0.3433643949575462, 'time_step': 0.005026951085689456, 'init_value': -6.153717994689941, 'ave_value': -14.203960675570846, 'soft_opc': nan} step=9288




2022-04-22 07:03.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.52 [info     ] FQE_20220422070300: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.0001272704712180204, 'time_algorithm_update': 0.004753856464873913, 'loss': 0.3616609591124363, 'time_step': 0.004942301400872164, 'init_value': -6.665336608886719, 'ave_value': -14.860795315708295, 'soft_opc': nan} step=9632




2022-04-22 07:03.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.53 [info     ] FQE_20220422070300: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00013316855874172476, 'time_algorithm_update': 0.004885247280431348, 'loss': 0.37641568038460993, 'time_step': 0.005082124194433523, 'init_value': -7.118058681488037, 'ave_value': -15.321265531472257, 'soft_opc': nan} step=9976




2022-04-22 07:03.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.55 [info     ] FQE_20220422070300: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.0001279441423194353, 'time_algorithm_update': 0.004396069188450658, 'loss': 0.3970555292583222, 'time_step': 0.004584050455758738, 'init_value': -7.5376386642456055, 'ave_value': -15.639833879174228, 'soft_opc': nan} step=10320




2022-04-22 07:03.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.57 [info     ] FQE_20220422070300: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00013075664986011594, 'time_algorithm_update': 0.004743946153064107, 'loss': 0.42796593668415794, 'time_step': 0.004935202210448509, 'init_value': -8.45289134979248, 'ave_value': -16.380557227127152, 'soft_opc': nan} step=10664




2022-04-22 07:03.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:03.59 [info     ] FQE_20220422070300: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.0001335282658421716, 'time_algorithm_update': 0.004897260388662649, 'loss': 0.45852560443846985, 'time_step': 0.005095433357150056, 'init_value': -9.019064903259277, 'ave_value': -16.759825173967627, 'soft_opc': nan} step=11008




2022-04-22 07:03.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:04.01 [info     ] FQE_20220422070300: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00012854850569436716, 'time_algorithm_update': 0.004820370396902395, 'loss': 0.4975923045300121, 'time_step': 0.005011363778003427, 'init_value': -9.220292091369629, 'ave_value': -16.798094354956703, 'soft_opc': nan} step=11352




2022-04-22 07:04.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:04.03 [info     ] FQE_20220422070300: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00013313113256942395, 'time_algorithm_update': 0.004971163910488749, 'loss': 0.5272674658877212, 'time_step': 0.00516581466031629, 'init_value': -10.142980575561523, 'ave_value': -17.470267205686635, 'soft_opc': nan} step=11696




2022-04-22 07:04.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:04.04 [info     ] FQE_20220422070300: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00012924920680911043, 'time_algorithm_update': 0.00429536644802537, 'loss': 0.5795626910078491, 'time_step': 0.004485182290853456, 'init_value': -11.069928169250488, 'ave_value': -18.227124160957764, 'soft_opc': nan} step=12040




2022-04-22 07:04.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:04.06 [info     ] FQE_20220422070300: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00013124526933182116, 'time_algorithm_update': 0.004763888758282328, 'loss': 0.6157100077198688, 'time_step': 0.004955058874085892, 'init_value': -11.843464851379395, 'ave_value': -18.468267470031037, 'soft_opc': nan} step=12384




2022-04-22 07:04.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:04.08 [info     ] FQE_20220422070300: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00012923049372296, 'time_algorithm_update': 0.00471749555232913, 'loss': 0.6548645086153302, 'time_step': 0.004907605952994768, 'init_value': -13.130316734313965, 'ave_value': -19.23107415932264, 'soft_opc': nan} step=12728




2022-04-22 07:04.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:04.10 [info     ] FQE_20220422070300: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00013216359670772108, 'time_algorithm_update': 0.004797224388566128, 'loss': 0.6838962044133696, 'time_step': 0.00499084938404172, 'init_value': -13.817951202392578, 'ave_value': -19.676652660929, 'soft_opc': nan} step=13072




2022-04-22 07:04.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:04.12 [info     ] FQE_20220422070300: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.0001290281151616296, 'time_algorithm_update': 0.004251376834026602, 'loss': 0.7331474082392835, 'time_step': 0.00444073663201443, 'init_value': -15.001976013183594, 'ave_value': -20.253948155360984, 'soft_opc': nan} step=13416




2022-04-22 07:04.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:04.13 [info     ] FQE_20220422070300: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00013092229532641033, 'time_algorithm_update': 0.004737623209177062, 'loss': 0.7603798226323412, 'time_step': 0.004931632862534634, 'init_value': -15.607074737548828, 'ave_value': -20.446878248621367, 'soft_opc': nan} step=13760




2022-04-22 07:04.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:04.15 [info     ] FQE_20220422070300: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00013349569121072458, 'time_algorithm_update': 0.004803779513336891, 'loss': 0.8081948822689091, 'time_step': 0.004998284716938817, 'init_value': -16.212364196777344, 'ave_value': -20.479602251454835, 'soft_opc': nan} step=14104




2022-04-22 07:04.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:04.17 [info     ] FQE_20220422070300: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00012885623199995175, 'time_algorithm_update': 0.004800630170245504, 'loss': 0.8369890870201553, 'time_step': 0.004989931749743085, 'init_value': -16.814449310302734, 'ave_value': -20.63937957226005, 'soft_opc': nan} step=14448




2022-04-22 07:04.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:04.19 [info     ] FQE_20220422070300: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.0001291681167691253, 'time_algorithm_update': 0.004624529633411142, 'loss': 0.8735011399420368, 'time_step': 0.004816558471945829, 'init_value': -17.256216049194336, 'ave_value': -20.50296163101605, 'soft_opc': nan} step=14792




2022-04-22 07:04.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:04.21 [info     ] FQE_20220422070300: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001302915950154149, 'time_algorithm_update': 0.004641868347345397, 'loss': 0.9125375153908376, 'time_step': 0.004830537147300188, 'init_value': -17.940519332885742, 'ave_value': -20.86279568519141, 'soft_opc': nan} step=15136




2022-04-22 07:04.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:04.23 [info     ] FQE_20220422070300: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00013622433640236078, 'time_algorithm_update': 0.004903946505036465, 'loss': 0.9565707672993804, 'time_step': 0.005103874345158421, 'init_value': -18.570613861083984, 'ave_value': -20.841399821845464, 'soft_opc': nan} step=15480




2022-04-22 07:04.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:04.24 [info     ] FQE_20220422070300: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00013039417045060977, 'time_algorithm_update': 0.004709075356638709, 'loss': 0.9891587159553066, 'time_step': 0.004900403494058654, 'init_value': -18.847694396972656, 'ave_value': -20.579499263726678, 'soft_opc': nan} step=15824




2022-04-22 07:04.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:04.26 [info     ] FQE_20220422070300: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00013276588085085847, 'time_algorithm_update': 0.004759760790093001, 'loss': 1.0056335370836043, 'time_step': 0.004954077476678893, 'init_value': -19.398378372192383, 'ave_value': -20.796722623912267, 'soft_opc': nan} step=16168




2022-04-22 07:04.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:04.28 [info     ] FQE_20220422070300: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00013362876204557196, 'time_algorithm_update': 0.004433681105458459, 'loss': 1.0271435672915432, 'time_step': 0.004630753467249316, 'init_value': -19.97562026977539, 'ave_value': -20.50051295383632, 'soft_opc': nan} step=16512




2022-04-22 07:04.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:04.30 [info     ] FQE_20220422070300: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00012981198554815247, 'time_algorithm_update': 0.004669083412303481, 'loss': 1.0756198729518367, 'time_step': 0.004858843115873115, 'init_value': -20.810346603393555, 'ave_value': -21.134014880830037, 'soft_opc': nan} step=16856




2022-04-22 07:04.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:04.32 [info     ] FQE_20220422070300: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.0001313949740210245, 'time_algorithm_update': 0.0048385983289674275, 'loss': 1.099700627913482, 'time_step': 0.005040272723796756, 'init_value': -21.001848220825195, 'ave_value': -20.772640610370484, 'soft_opc': nan} step=17200




2022-04-22 07:04.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422070300/model_17200.pt
search iteration:  28
using hyper params:  [0.0019261155206568828, 0.004470368346022396, 7.660494542906452e-05, 1]
2022-04-22 07:04.32 [debug    ] RoundIterator is selected.
2022-04-22 07:04.32 [info     ] Directory is created at d3rlpy_logs/CQL_20220422070432
2022-04-22 07:04.32 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 07:04.32 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 07:04.32 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422070432/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.0019261155206568828, 'actor_optim_factory': {'opt

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:04.46 [info     ] CQL_20220422070432: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.0003067668462764321, 'time_algorithm_update': 0.03999652683390358, 'temp_loss': 4.795497334072356, 'temp': 0.9861296926964225, 'alpha_loss': -17.61838822557747, 'alpha': 1.0177861055886814, 'critic_loss': 25.921348571777344, 'actor_loss': -1.8295859803009584, 'time_step': 0.04038540751947833, 'td_error': 1.2105999135983714, 'init_value': 0.21277138590812683, 'ave_value': 0.37392126533210335} step=346
2022-04-22 07:04.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:05.00 [info     ] CQL_20220422070432: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.00030349031349138027, 'time_algorithm_update': 0.037958657121382695, 'temp_loss': 4.846459731890287, 'temp': 0.9597493611663752, 'alpha_loss': -18.34814589836694, 'alpha': 1.0543738762078259, 'critic_loss': 30.776123625694673, 'actor_loss': -1.8290676002557567, 'time_step': 0.03834545750149412, 'td_error': 1.2085894040344738, 'init_value': 0.07152049243450165, 'ave_value': 0.34263938681817324} step=692
2022-04-22 07:05.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:05.13 [info     ] CQL_20220422070432: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.0002933010200544589, 'time_algorithm_update': 0.03548915468888476, 'temp_loss': 4.720501144497381, 'temp': 0.9348650696994252, 'alpha_loss': -19.017053438748928, 'alpha': 1.0927192896087734, 'critic_loss': 40.28408603998967, 'actor_loss': -1.507396604801189, 'time_step': 0.03586703573348205, 'td_error': 1.2042934374482281, 'init_value': -0.2729014754295349, 'ave_value': 0.08454983316224127} step=1038
2022-04-22 07:05.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:05.26 [info     ] CQL_20220422070432: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.0002978371746967294, 'time_algorithm_update': 0.0354877414041861, 'temp_loss': 4.599265851037351, 'temp': 0.9110033884213838, 'alpha_loss': -19.718303730033035, 'alpha': 1.1329340293917354, 'critic_loss': 52.24474290616251, 'actor_loss': -1.0377094532023965, 'time_step': 0.03587056653348008, 'td_error': 1.201973253165372, 'init_value': -0.5762571096420288, 'ave_value': -0.1107565158694601} step=1384
2022-04-22 07:05.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:05.38 [info     ] CQL_20220422070432: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.0002921647419130182, 'time_algorithm_update': 0.035253965785737675, 'temp_loss': 4.483271066853077, 'temp': 0.8879988761995569, 'alpha_loss': -20.452392715939208, 'alpha': 1.1750759527173344, 'critic_loss': 65.96793336813161, 'actor_loss': -0.5480990136160672, 'time_step': 0.03563047213361442, 'td_error': 1.210962555308032, 'init_value': -1.1883244514465332, 'ave_value': -0.685017866152703} step=1730
2022-04-22 07:05.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:05.51 [info     ] CQL_20220422070432: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.00029778342715577584, 'time_algorithm_update': 0.03503797646891864, 'temp_loss': 4.371812606822549, 'temp': 0.8657577378901443, 'alpha_loss': -21.216964142860014, 'alpha': 1.2191752932664286, 'critic_loss': 81.40818643294318, 'actor_loss': -0.03965375072866506, 'time_step': 0.0354208140014913, 'td_error': 1.2108014914412777, 'init_value': -1.4192990064620972, 'ave_value': -0.8418534205529632} step=2076
2022-04-22 07:05.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:06.04 [info     ] CQL_20220422070432: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.0002948803708732473, 'time_algorithm_update': 0.035845128787046224, 'temp_loss': 4.262801281978629, 'temp': 0.8442143163929096, 'alpha_loss': -22.016324655169008, 'alpha': 1.2652773002668611, 'critic_loss': 99.31644422057047, 'actor_loss': 0.3551814380624046, 'time_step': 0.03622249233929408, 'td_error': 1.217169623193608, 'init_value': -1.64860999584198, 'ave_value': -1.0609685545201661} step=2422
2022-04-22 07:06.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:06.17 [info     ] CQL_20220422070432: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.00029007754573932273, 'time_algorithm_update': 0.035238816558970196, 'temp_loss': 4.157161760881457, 'temp': 0.8233153265335657, 'alpha_loss': -22.85824807668697, 'alpha': 1.313420532066698, 'critic_loss': 121.10327902556844, 'actor_loss': 0.5730411672932392, 'time_step': 0.03561350996094632, 'td_error': 1.2154354380287626, 'init_value': -1.7781392335891724, 'ave_value': -1.2135144630474408} step=2768
2022-04-22 07:06.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:06.30 [info     ] CQL_20220422070432: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.00028942706267957744, 'time_algorithm_update': 0.035203508558989946, 'temp_loss': 4.054895248716277, 'temp': 0.8030211830759324, 'alpha_loss': -23.725985163209067, 'alpha': 1.3636496542506136, 'critic_loss': 151.79504791436167, 'actor_loss': 0.46552714943412066, 'time_step': 0.03557778714020128, 'td_error': 1.208629034853864, 'init_value': -1.5285598039627075, 'ave_value': -1.0459096903312597} step=3114
2022-04-22 07:06.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:06.42 [info     ] CQL_20220422070432: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.0002839530823547716, 'time_algorithm_update': 0.03536800704250446, 'temp_loss': 3.954996346049226, 'temp': 0.7832932871890206, 'alpha_loss': -24.633164141219474, 'alpha': 1.416004905121864, 'critic_loss': 195.02942004782616, 'actor_loss': -0.0030878787709540025, 'time_step': 0.03573523573792739, 'td_error': 1.2122997973491356, 'init_value': -1.1491727828979492, 'ave_value': -0.7823355671804401} step=3460
2022-04-22 07:06.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:06.55 [info     ] CQL_20220422070432: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.0002980280473742182, 'time_algorithm_update': 0.035150204779784804, 'temp_loss': 3.8584681645983214, 'temp': 0.764101067030361, 'alpha_loss': -25.574933537169, 'alpha': 1.470533031259658, 'critic_loss': 249.55592972419166, 'actor_loss': -0.7081668595917997, 'time_step': 0.035531852286675075, 'td_error': 1.2180203742082032, 'init_value': -0.24750874936580658, 'ave_value': -0.02194453430379268} step=3806
2022-04-22 07:06.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:07.08 [info     ] CQL_20220422070432: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.00030853500255959574, 'time_algorithm_update': 0.03475559860295643, 'temp_loss': 3.764094759963151, 'temp': 0.7454206833260597, 'alpha_loss': -26.562550302185763, 'alpha': 1.5273061849478353, 'critic_loss': 313.58766791724054, 'actor_loss': -1.4308237216376156, 'time_step': 0.03515474437978226, 'td_error': 1.2257544941818272, 'init_value': 0.36957699060440063, 'ave_value': 0.4901746944999062} step=4152
2022-04-22 07:07.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:07.20 [info     ] CQL_20220422070432: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.00032201254298921267, 'time_algorithm_update': 0.034609995825442275, 'temp_loss': 3.6722664061309285, 'temp': 0.7272241876993565, 'alpha_loss': -27.58631975802383, 'alpha': 1.586387594655759, 'critic_loss': 377.6266908149499, 'actor_loss': -2.071032965114351, 'time_step': 0.03503026507493388, 'td_error': 1.2315214205975165, 'init_value': 0.9669604301452637, 'ave_value': 1.0228848285898884} step=4498
2022-04-22 07:07.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:07.32 [info     ] CQL_20220422070432: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.0003328812604694697, 'time_algorithm_update': 0.033572905325476146, 'temp_loss': 3.5829620216623206, 'temp': 0.7094974097488933, 'alpha_loss': -28.655183378671634, 'alpha': 1.6478475339150842, 'critic_loss': 439.88283781371365, 'actor_loss': -2.6087685788987, 'time_step': 0.033997919518134495, 'td_error': 1.23338252158193, 'init_value': 1.471291184425354, 'ave_value': 1.5116968832291828} step=4844
2022-04-22 07:07.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:07.44 [info     ] CQL_20220422070432: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.0003185733894392245, 'time_algorithm_update': 0.033606929586112846, 'temp_loss': 3.4968527190258047, 'temp': 0.6922155506693559, 'alpha_loss': -29.76344485089958, 'alpha': 1.7117649006705753, 'critic_loss': 504.7492410295961, 'actor_loss': -3.0980065799172904, 'time_step': 0.03401473698588465, 'td_error': 1.2348940238392991, 'init_value': 2.0063908100128174, 'ave_value': 2.0281706853408052} step=5190
2022-04-22 07:07.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:07.57 [info     ] CQL_20220422070432: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.00031728138124322616, 'time_algorithm_update': 0.03475259218601822, 'temp_loss': 3.4106195482904513, 'temp': 0.6753716263812402, 'alpha_loss': -30.920842005338283, 'alpha': 1.778216123236397, 'critic_loss': 578.62958257598, 'actor_loss': -3.553083240641335, 'time_step': 0.03515773150273141, 'td_error': 1.2350750803909254, 'init_value': 2.4654204845428467, 'ave_value': 2.4798314953144267} step=5536
2022-04-22 07:07.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:08.09 [info     ] CQL_20220422070432: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.0003249383386159908, 'time_algorithm_update': 0.034451967718973325, 'temp_loss': 3.3274019239954864, 'temp': 0.6589501818825054, 'alpha_loss': -32.124342504953376, 'alpha': 1.8473025295086678, 'critic_loss': 665.0549081791343, 'actor_loss': -3.948958961260801, 'time_step': 0.034865834120381085, 'td_error': 1.2353400474194223, 'init_value': 2.8640143871307373, 'ave_value': 2.878211395341449} step=5882
2022-04-22 07:08.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:08.22 [info     ] CQL_20220422070432: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.00032768842112811315, 'time_algorithm_update': 0.03390308336026407, 'temp_loss': 3.246124685844245, 'temp': 0.6429365623548541, 'alpha_loss': -33.36811380992735, 'alpha': 1.919104626757561, 'critic_loss': 762.6825379829186, 'actor_loss': -4.3244663346020475, 'time_step': 0.03431901559664335, 'td_error': 1.2355118912823544, 'init_value': 3.171269178390503, 'ave_value': 3.186571875507089} step=6228
2022-04-22 07:08.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:08.34 [info     ] CQL_20220422070432: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.00032403910091157595, 'time_algorithm_update': 0.03458444920578444, 'temp_loss': 3.168422546000839, 'temp': 0.6273137765123665, 'alpha_loss': -34.66628405400094, 'alpha': 1.9937233645791952, 'critic_loss': 872.8978737186145, 'actor_loss': -4.641198122432466, 'time_step': 0.0350044683224893, 'td_error': 1.2359401247966728, 'init_value': 3.5268008708953857, 'ave_value': 3.5392854293413554} step=6574
2022-04-22 07:08.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:08.47 [info     ] CQL_20220422070432: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.00031685277905767363, 'time_algorithm_update': 0.03480800590074131, 'temp_loss': 3.0922687908128506, 'temp': 0.6120724676316873, 'alpha_loss': -36.01892337909324, 'alpha': 2.07127430328744, 'critic_loss': 993.8706555669708, 'actor_loss': -4.961169233211892, 'time_step': 0.035218375955702944, 'td_error': 1.2369631539057395, 'init_value': 3.856562614440918, 'ave_value': 3.8707802945033474} step=6920
2022-04-22 07:08.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:08.59 [info     ] CQL_20220422070432: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.00031954911402884244, 'time_algorithm_update': 0.03373035874669952, 'temp_loss': 3.0162141970816374, 'temp': 0.5972073272818086, 'alpha_loss': -37.416642492217136, 'alpha': 2.1518562163920762, 'critic_loss': 1138.577657534208, 'actor_loss': -5.186048012937425, 'time_step': 0.034128300027351156, 'td_error': 1.2371890747845078, 'init_value': 4.04072904586792, 'ave_value': 4.0502861769067104} step=7266
2022-04-22 07:08.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:09.11 [info     ] CQL_20220422070432: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.0003084626501006198, 'time_algorithm_update': 0.03311858700879047, 'temp_loss': 2.943862578772396, 'temp': 0.5827038691223012, 'alpha_loss': -38.87449741914782, 'alpha': 2.235581422816811, 'critic_loss': 1296.3721676865066, 'actor_loss': -5.326518372993249, 'time_step': 0.0335064010123986, 'td_error': 1.2375457547803945, 'init_value': 4.164117813110352, 'ave_value': 4.175526265275032} step=7612
2022-04-22 07:09.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:09.23 [info     ] CQL_20220422070432: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.0003241514194907481, 'time_algorithm_update': 0.03491689428428694, 'temp_loss': 2.8717639508274937, 'temp': 0.5685546716858196, 'alpha_loss': -40.389597732896746, 'alpha': 2.3225797711080207, 'critic_loss': 1469.224657709199, 'actor_loss': -5.403902960650494, 'time_step': 0.03532528532722782, 'td_error': 1.2404430964610516, 'init_value': 4.399835109710693, 'ave_value': 4.40384292116432} step=7958
2022-04-22 07:09.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:09.35 [info     ] CQL_20220422070432: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.0003127748566555839, 'time_algorithm_update': 0.03396150211378329, 'temp_loss': 2.801831931048046, 'temp': 0.5547543378234598, 'alpha_loss': -41.95474423976303, 'alpha': 2.412962581380943, 'critic_loss': 1644.9398983641167, 'actor_loss': -5.507652940088614, 'time_step': 0.03435851452667589, 'td_error': 1.2395101600544132, 'init_value': 4.419058799743652, 'ave_value': 4.42687166204041} step=8304
2022-04-22 07:09.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:09.48 [info     ] CQL_20220422070432: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.0003292801752255831, 'time_algorithm_update': 0.03377292128656641, 'temp_loss': 2.734267892176016, 'temp': 0.5412870269979355, 'alpha_loss': -43.59580955064366, 'alpha': 2.5068754736398686, 'critic_loss': 1855.5908764083952, 'actor_loss': -5.5697849885576725, 'time_step': 0.03419164494972009, 'td_error': 1.240714299162602, 'init_value': 4.565323352813721, 'ave_value': 4.571438685318145} step=8650
2022-04-22 07:09.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:10.00 [info     ] CQL_20220422070432: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.00031881042987625037, 'time_algorithm_update': 0.034098785047586254, 'temp_loss': 2.6681091916354407, 'temp': 0.5281477026167632, 'alpha_loss': -45.28873263893789, 'alpha': 2.604445822666146, 'critic_loss': 2066.829006658124, 'actor_loss': -5.665555322790421, 'time_step': 0.03449898579217106, 'td_error': 1.2426173228869313, 'init_value': 4.781067371368408, 'ave_value': 4.782297527671369} step=8996
2022-04-22 07:10.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:10.12 [info     ] CQL_20220422070432: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.00032959370254781207, 'time_algorithm_update': 0.033989587960215666, 'temp_loss': 2.6036436192562125, 'temp': 0.5153262765076808, 'alpha_loss': -47.055155230395364, 'alpha': 2.705824384799582, 'critic_loss': 2294.366158722453, 'actor_loss': -5.7531621952277385, 'time_step': 0.034403717586759884, 'td_error': 1.2408526602925012, 'init_value': 4.770895004272461, 'ave_value': 4.778012766686589} step=9342
2022-04-22 07:10.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:10.25 [info     ] CQL_20220422070432: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.00032317224954594076, 'time_algorithm_update': 0.035164556062290436, 'temp_loss': 2.5396348079504993, 'temp': 0.5028180158723986, 'alpha_loss': -48.8849700089824, 'alpha': 2.811159025037909, 'critic_loss': 2501.9861764191205, 'actor_loss': -5.858536728544731, 'time_step': 0.03557708842216888, 'td_error': 1.2430979219972338, 'init_value': 4.970068454742432, 'ave_value': 4.97215434450174} step=9688
2022-04-22 07:10.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:10.38 [info     ] CQL_20220422070432: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.00032142476539391313, 'time_algorithm_update': 0.03530222901030083, 'temp_loss': 2.4782733648498625, 'temp': 0.4906153715931611, 'alpha_loss': -50.785885562786476, 'alpha': 2.9205892699302276, 'critic_loss': 2747.261013141257, 'actor_loss': -5.925699810072176, 'time_step': 0.03571286642482515, 'td_error': 1.2412358321148684, 'init_value': 4.946984767913818, 'ave_value': 4.957730751299869} step=10034
2022-04-22 07:10.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:10.50 [info     ] CQL_20220422070432: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.00032411007522847613, 'time_algorithm_update': 0.03397212690011615, 'temp_loss': 2.4179986556830433, 'temp': 0.4787076343001658, 'alpha_loss': -52.76752465308746, 'alpha': 3.034286379814148, 'critic_loss': 3025.216867435874, 'actor_loss': -5.990410241088426, 'time_step': 0.03439023315561989, 'td_error': 1.242044054418451, 'init_value': 5.013054847717285, 'ave_value': 5.022340704446271} step=10380
2022-04-22 07:10.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:11.02 [info     ] CQL_20220422070432: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.0003079713424506215, 'time_algorithm_update': 0.03392985790451138, 'temp_loss': 2.359400291663374, 'temp': 0.46709026025898887, 'alpha_loss': -54.81668455752334, 'alpha': 3.1524111728447712, 'critic_loss': 3328.089065463557, 'actor_loss': -6.02931332036939, 'time_step': 0.034325160043088, 'td_error': 1.243075278859982, 'init_value': 5.118231296539307, 'ave_value': 5.1241050735589395} step=10726
2022-04-22 07:11.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:11.14 [info     ] CQL_20220422070432: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.00031694442550570976, 'time_algorithm_update': 0.03387470741492475, 'temp_loss': 2.3023072653423156, 'temp': 0.45575352771089256, 'alpha_loss': -56.95099617842305, 'alpha': 3.2751255979427714, 'critic_loss': 3651.901068009393, 'actor_loss': -6.006256033230379, 'time_step': 0.03428236873163653, 'td_error': 1.2435208069557027, 'init_value': 5.0882792472839355, 'ave_value': 5.091243167260407} step=11072
2022-04-22 07:11.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:11.27 [info     ] CQL_20220422070432: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.00031716423916678895, 'time_algorithm_update': 0.034538251127121763, 'temp_loss': 2.246141577042596, 'temp': 0.4446926029086802, 'alpha_loss': -59.17091335450983, 'alpha': 3.4026226204943795, 'critic_loss': 3919.6972839708274, 'actor_loss': -5.942860170595908, 'time_step': 0.034944820266238526, 'td_error': 1.244057170593554, 'init_value': 5.123065948486328, 'ave_value': 5.123714477951462} step=11418
2022-04-22 07:11.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:11.39 [info     ] CQL_20220422070432: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.0003161223637575359, 'time_algorithm_update': 0.03394458266351953, 'temp_loss': 2.191884596912847, 'temp': 0.4339008370059074, 'alpha_loss': -61.46795090912394, 'alpha': 3.5350713137257306, 'critic_loss': 4189.449944115788, 'actor_loss': -5.917955266257931, 'time_step': 0.034347625826135535, 'td_error': 1.242702706571082, 'init_value': 5.010214328765869, 'ave_value': 5.015887017880869} step=11764
2022-04-22 07:11.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:11.51 [info     ] CQL_20220422070432: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.00031255848834969407, 'time_algorithm_update': 0.03358744954787238, 'temp_loss': 2.138708180085772, 'temp': 0.42336933263119936, 'alpha_loss': -63.86063525029, 'alpha': 3.672680939553101, 'critic_loss': 4447.683418759032, 'actor_loss': -5.920068385284071, 'time_step': 0.03398966720338502, 'td_error': 1.242807274898767, 'init_value': 5.052022933959961, 'ave_value': 5.055696096460985} step=12110
2022-04-22 07:11.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:12.04 [info     ] CQL_20220422070432: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.00033093876921372605, 'time_algorithm_update': 0.03549052249489492, 'temp_loss': 2.086539390459226, 'temp': 0.4130942435622904, 'alpha_loss': -66.35520037612474, 'alpha': 3.815650075846325, 'critic_loss': 4771.986908135386, 'actor_loss': -5.884097939970865, 'time_step': 0.03591512600121471, 'td_error': 1.2445022079153698, 'init_value': 5.139013767242432, 'ave_value': 5.138574481926127} step=12456
2022-04-22 07:12.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:12.16 [info     ] CQL_20220422070432: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.00031309803097234295, 'time_algorithm_update': 0.03435631018842576, 'temp_loss': 2.036216086045855, 'temp': 0.4030694572222715, 'alpha_loss': -68.93297217484843, 'alpha': 3.964197931951181, 'critic_loss': 5142.084383749549, 'actor_loss': -5.813674298325026, 'time_step': 0.03475903982371953, 'td_error': 1.243668114742393, 'init_value': 5.045332431793213, 'ave_value': 5.04664808800572} step=12802
2022-04-22 07:12.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:12.29 [info     ] CQL_20220422070432: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.00035603818176798736, 'time_algorithm_update': 0.0347876824395505, 'temp_loss': 1.9862411897306498, 'temp': 0.3932873333637425, 'alpha_loss': -71.61122962642956, 'alpha': 4.118517655168654, 'critic_loss': 5362.156616916546, 'actor_loss': -5.829317954234305, 'time_step': 0.035232506735476454, 'td_error': 1.2463647981992498, 'init_value': 5.270157337188721, 'ave_value': 5.264235862887488} step=13148
2022-04-22 07:12.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:12.41 [info     ] CQL_20220422070432: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.0003284925670293025, 'time_algorithm_update': 0.03472782766198836, 'temp_loss': 1.9376945685100004, 'temp': 0.38374497659633616, 'alpha_loss': -74.39715820929908, 'alpha': 4.278813098896445, 'critic_loss': 5074.315941959448, 'actor_loss': -6.003715658463495, 'time_step': 0.035147720678693296, 'td_error': 1.2458835045696002, 'init_value': 5.3404459953308105, 'ave_value': 5.337191938053889} step=13494
2022-04-22 07:12.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:12.54 [info     ] CQL_20220422070432: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.0003268477544619169, 'time_algorithm_update': 0.034417429411342376, 'temp_loss': 1.8914865520648185, 'temp': 0.37443208927019483, 'alpha_loss': -77.29316940748623, 'alpha': 4.445375042843681, 'critic_loss': 4445.149021744039, 'actor_loss': -6.24899585123007, 'time_step': 0.034834854864660716, 'td_error': 1.246677677743342, 'init_value': 5.523770332336426, 'ave_value': 5.523184841602772} step=13840
2022-04-22 07:12.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:13.06 [info     ] CQL_20220422070432: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.0003189137905319302, 'time_algorithm_update': 0.03425272076116132, 'temp_loss': 1.8449576889848434, 'temp': 0.36534506831899544, 'alpha_loss': -80.30259193023505, 'alpha': 4.618423291024445, 'critic_loss': 4069.529589702628, 'actor_loss': -6.375740934658602, 'time_step': 0.03465905423798313, 'td_error': 1.2488814404710742, 'init_value': 5.787525177001953, 'ave_value': 5.783131713378819} step=14186
2022-04-22 07:13.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:13.18 [info     ] CQL_20220422070432: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.0003181206697673467, 'time_algorithm_update': 0.03461446651833595, 'temp_loss': 1.8006561353716548, 'temp': 0.35647814613201717, 'alpha_loss': -83.43951098491691, 'alpha': 4.798218301265915, 'critic_loss': 3584.73172049991, 'actor_loss': -6.563480796152457, 'time_step': 0.03502346394379015, 'td_error': 1.2493619594494985, 'init_value': 5.9369893074035645, 'ave_value': 5.935301516529733} step=14532
2022-04-22 07:13.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:13.31 [info     ] CQL_20220422070432: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.00031464706266546525, 'time_algorithm_update': 0.03428533656059662, 'temp_loss': 1.7566424346383596, 'temp': 0.3478273852712157, 'alpha_loss': -86.6857003890021, 'alpha': 4.985020607193081, 'critic_loss': 3239.4880476935064, 'actor_loss': -6.7360149739105575, 'time_step': 0.034688452075671596, 'td_error': 1.2513962065336823, 'init_value': 6.185314178466797, 'ave_value': 6.180638563310778} step=14878
2022-04-22 07:13.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:13.43 [info     ] CQL_20220422070432: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.00033700810691524795, 'time_algorithm_update': 0.034790291262499856, 'temp_loss': 1.7142051206847835, 'temp': 0.3393852511754615, 'alpha_loss': -90.0601524397128, 'alpha': 5.179088260397057, 'critic_loss': 2876.6260090205024, 'actor_loss': -6.938373226650877, 'time_step': 0.03522674541252886, 'td_error': 1.2524798722032564, 'init_value': 6.375349044799805, 'ave_value': 6.3729835540098625} step=15224
2022-04-22 07:13.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:13.56 [info     ] CQL_20220422070432: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.000338280821122186, 'time_algorithm_update': 0.03593261117880055, 'temp_loss': 1.6725382498233994, 'temp': 0.33114837015295306, 'alpha_loss': -93.57381119480023, 'alpha': 5.380718826558549, 'critic_loss': 2559.1416192027186, 'actor_loss': -7.156925893243337, 'time_step': 0.03636437551134584, 'td_error': 1.2549506373392085, 'init_value': 6.65546989440918, 'ave_value': 6.652653299207764} step=15570
2022-04-22 07:13.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:14.09 [info     ] CQL_20220422070432: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.0003198605741379578, 'time_algorithm_update': 0.03519278316828557, 'temp_loss': 1.6319807158040174, 'temp': 0.3231110650335433, 'alpha_loss': -97.21192546249125, 'alpha': 5.590198336309091, 'critic_loss': 2328.602500254019, 'actor_loss': -7.346630928833361, 'time_step': 0.03560622923636023, 'td_error': 1.2545236864745426, 'init_value': 6.738397598266602, 'ave_value': 6.7385536208543915} step=15916
2022-04-22 07:14.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:14.21 [info     ] CQL_20220422070432: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.0003346680216706557, 'time_algorithm_update': 0.03475370641388645, 'temp_loss': 1.5928689310316406, 'temp': 0.3152677981839704, 'alpha_loss': -101.00181332626784, 'alpha': 5.807824118288956, 'critic_loss': 2268.070904505735, 'actor_loss': -7.464913319990125, 'time_step': 0.03517516706720253, 'td_error': 1.2566857829507123, 'init_value': 6.970996379852295, 'ave_value': 6.970248209870557} step=16262
2022-04-22 07:14.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:14.34 [info     ] CQL_20220422070432: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.00031798767905703856, 'time_algorithm_update': 0.035210051288494484, 'temp_loss': 1.5535365708301523, 'temp': 0.3076162526890033, 'alpha_loss': -104.92459913485312, 'alpha': 6.033907672573377, 'critic_loss': 1983.0077207708634, 'actor_loss': -7.751186435622287, 'time_step': 0.03561494460684716, 'td_error': 1.2577383378513456, 'init_value': 7.191542148590088, 'ave_value': 7.192652165262323} step=16608
2022-04-22 07:14.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:14.47 [info     ] CQL_20220422070432: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.0003296253998155539, 'time_algorithm_update': 0.034796257928616736, 'temp_loss': 1.5160490250311836, 'temp': 0.30015027471360445, 'alpha_loss': -109.00744743567671, 'alpha': 6.268801374931556, 'critic_loss': 1850.2373925357886, 'actor_loss': -7.933492989898417, 'time_step': 0.035213072864995525, 'td_error': 1.2599570297391849, 'init_value': 7.4092793464660645, 'ave_value': 7.409219818241849} step=16954
2022-04-22 07:14.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:14.59 [info     ] CQL_20220422070432: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.0003207508539188804, 'time_algorithm_update': 0.03406673153011785, 'temp_loss': 1.47935559708259, 'temp': 0.29286533318503055, 'alpha_loss': -113.24702005992735, 'alpha': 6.512849148987345, 'critic_loss': 1852.6815753561914, 'actor_loss': -8.05875868466548, 'time_step': 0.03447061467032901, 'td_error': 1.2621314298071138, 'init_value': 7.5785603523254395, 'ave_value': 7.577098132694655} step=17300
2022-04-22 07:14.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422070432/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.519100

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 07:15.00 [info     ] FQE_20220422071459: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.0001241356493478798, 'time_algorithm_update': 0.002380929797528738, 'loss': 0.00436027608643544, 'time_step': 0.0025653250246162876, 'init_value': -0.2200082391500473, 'ave_value': -0.1752125734547237, 'soft_opc': nan} step=166




2022-04-22 07:15.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.00 [info     ] FQE_20220422071459: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00011802725045077772, 'time_algorithm_update': 0.0021881813026336304, 'loss': 0.0024175484144375718, 'time_step': 0.002360417182187, 'init_value': -0.32086247205734253, 'ave_value': -0.23149950222389118, 'soft_opc': nan} step=332




2022-04-22 07:15.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.00 [info     ] FQE_20220422071459: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.0001159102083688759, 'time_algorithm_update': 0.001946664718260248, 'loss': 0.0020103151061437487, 'time_step': 0.0021122378039072796, 'init_value': -0.3627833127975464, 'ave_value': -0.2519315598448655, 'soft_opc': nan} step=498




2022-04-22 07:15.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.01 [info     ] FQE_20220422071459: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00012343762868858245, 'time_algorithm_update': 0.0023486010999564664, 'loss': 0.0019249203288787414, 'time_step': 0.0025348950581378245, 'init_value': -0.43119826912879944, 'ave_value': -0.29097611333939943, 'soft_opc': nan} step=664




2022-04-22 07:15.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.01 [info     ] FQE_20220422071459: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00013489321053746235, 'time_algorithm_update': 0.00258077770830637, 'loss': 0.0018256743268827986, 'time_step': 0.0027778938592198385, 'init_value': -0.49649712443351746, 'ave_value': -0.3322383552084903, 'soft_opc': nan} step=830




2022-04-22 07:15.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.02 [info     ] FQE_20220422071459: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.000127779432089932, 'time_algorithm_update': 0.0025414056088550983, 'loss': 0.0017386046778242361, 'time_step': 0.002726284854383354, 'init_value': -0.5188719630241394, 'ave_value': -0.3430936305439687, 'soft_opc': nan} step=996




2022-04-22 07:15.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.02 [info     ] FQE_20220422071459: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00012213925281202937, 'time_algorithm_update': 0.0024371865284012026, 'loss': 0.001717920640921108, 'time_step': 0.0026162259549979703, 'init_value': -0.5835815668106079, 'ave_value': -0.38668546906194173, 'soft_opc': nan} step=1162




2022-04-22 07:15.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.03 [info     ] FQE_20220422071459: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00011900677738419499, 'time_algorithm_update': 0.0021123713757618366, 'loss': 0.0016333062912853636, 'time_step': 0.0022840729678969785, 'init_value': -0.6227740049362183, 'ave_value': -0.4072689899184682, 'soft_opc': nan} step=1328




2022-04-22 07:15.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.03 [info     ] FQE_20220422071459: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00012230585856610034, 'time_algorithm_update': 0.002299633370824607, 'loss': 0.0016258354582795761, 'time_step': 0.002478846584457949, 'init_value': -0.6773096323013306, 'ave_value': -0.4545516087799459, 'soft_opc': nan} step=1494




2022-04-22 07:15.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.04 [info     ] FQE_20220422071459: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00013250902474644673, 'time_algorithm_update': 0.0025012636759194984, 'loss': 0.0016170782843571022, 'time_step': 0.00269895145692021, 'init_value': -0.7406046390533447, 'ave_value': -0.4979466195042069, 'soft_opc': nan} step=1660




2022-04-22 07:15.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.04 [info     ] FQE_20220422071459: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.0001286900187113199, 'time_algorithm_update': 0.0025091171264648438, 'loss': 0.0016179512614659202, 'time_step': 0.002698326685342444, 'init_value': -0.8101140260696411, 'ave_value': -0.5566406395588372, 'soft_opc': nan} step=1826




2022-04-22 07:15.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.05 [info     ] FQE_20220422071459: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00012183189392089844, 'time_algorithm_update': 0.00229238602052252, 'loss': 0.0016154473768799644, 'time_step': 0.0024739130433783472, 'init_value': -0.8491439819335938, 'ave_value': -0.5801567651465669, 'soft_opc': nan} step=1992




2022-04-22 07:15.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.05 [info     ] FQE_20220422071459: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00012775214321642038, 'time_algorithm_update': 0.0023608064076986656, 'loss': 0.0016700128645282687, 'time_step': 0.0025456583643534095, 'init_value': -0.9086126089096069, 'ave_value': -0.6268936886980727, 'soft_opc': nan} step=2158




2022-04-22 07:15.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.06 [info     ] FQE_20220422071459: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00013203506010124483, 'time_algorithm_update': 0.0024539620043283485, 'loss': 0.001611394652008382, 'time_step': 0.002642951815961355, 'init_value': -0.9734976887702942, 'ave_value': -0.6783364854954385, 'soft_opc': nan} step=2324




2022-04-22 07:15.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.06 [info     ] FQE_20220422071459: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00011915183929075678, 'time_algorithm_update': 0.0020283446254500425, 'loss': 0.0016749255525380239, 'time_step': 0.0022058688014386647, 'init_value': -0.9998247623443604, 'ave_value': -0.6895325054859256, 'soft_opc': nan} step=2490




2022-04-22 07:15.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.07 [info     ] FQE_20220422071459: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00013080993330622293, 'time_algorithm_update': 0.002584310899297875, 'loss': 0.001663245846123255, 'time_step': 0.002779083079602345, 'init_value': -1.06522536277771, 'ave_value': -0.7401517719768727, 'soft_opc': nan} step=2656




2022-04-22 07:15.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.07 [info     ] FQE_20220422071459: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00011982831610254494, 'time_algorithm_update': 0.0022771775004375413, 'loss': 0.00172122482824734, 'time_step': 0.0024545724133411087, 'init_value': -1.114969253540039, 'ave_value': -0.7663305721572927, 'soft_opc': nan} step=2822




2022-04-22 07:15.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.07 [info     ] FQE_20220422071459: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00013514455542506943, 'time_algorithm_update': 0.002507861838283309, 'loss': 0.001706983190007418, 'time_step': 0.002708867371800434, 'init_value': -1.1661840677261353, 'ave_value': -0.8135031128990221, 'soft_opc': nan} step=2988




2022-04-22 07:15.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.08 [info     ] FQE_20220422071459: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00012347784387059958, 'time_algorithm_update': 0.00236631014260901, 'loss': 0.001782542773268567, 'time_step': 0.002549231770526932, 'init_value': -1.244521141052246, 'ave_value': -0.8591158675785, 'soft_opc': nan} step=3154




2022-04-22 07:15.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.08 [info     ] FQE_20220422071459: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00011377162243946488, 'time_algorithm_update': 0.0019936863198337785, 'loss': 0.0018699591802657548, 'time_step': 0.0021579552845782542, 'init_value': -1.3028877973556519, 'ave_value': -0.9071285783506191, 'soft_opc': nan} step=3320




2022-04-22 07:15.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.09 [info     ] FQE_20220422071459: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00013382750821400838, 'time_algorithm_update': 0.002551488129489393, 'loss': 0.001963087361437796, 'time_step': 0.0027474954903843893, 'init_value': -1.3836047649383545, 'ave_value': -0.9703738897904619, 'soft_opc': nan} step=3486




2022-04-22 07:15.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.09 [info     ] FQE_20220422071459: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00012512235756380013, 'time_algorithm_update': 0.0025032270385558345, 'loss': 0.0020202761663247392, 'time_step': 0.0026901701846754693, 'init_value': -1.4279577732086182, 'ave_value': -0.975219422129092, 'soft_opc': nan} step=3652




2022-04-22 07:15.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.10 [info     ] FQE_20220422071459: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00013150220893951784, 'time_algorithm_update': 0.0024171909653996847, 'loss': 0.0020994882107044695, 'time_step': 0.0026093146887170263, 'init_value': -1.5008032321929932, 'ave_value': -1.017909801033166, 'soft_opc': nan} step=3818




2022-04-22 07:15.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.10 [info     ] FQE_20220422071459: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00012328682175601822, 'time_algorithm_update': 0.0023255089679396295, 'loss': 0.002238502913731128, 'time_step': 0.0025050539568246127, 'init_value': -1.5499074459075928, 'ave_value': -1.0445519077482524, 'soft_opc': nan} step=3984




2022-04-22 07:15.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.11 [info     ] FQE_20220422071459: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00012034105967326337, 'time_algorithm_update': 0.002175828060471868, 'loss': 0.002350943824009835, 'time_step': 0.0023521271096654684, 'init_value': -1.6030769348144531, 'ave_value': -1.063474722931514, 'soft_opc': nan} step=4150




2022-04-22 07:15.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.11 [info     ] FQE_20220422071459: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00011673031083072524, 'time_algorithm_update': 0.0021486282348632812, 'loss': 0.0024441250491920993, 'time_step': 0.002317955695002912, 'init_value': -1.6842529773712158, 'ave_value': -1.1242119749319983, 'soft_opc': nan} step=4316




2022-04-22 07:15.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.12 [info     ] FQE_20220422071459: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00012073459395443101, 'time_algorithm_update': 0.0020298972187272036, 'loss': 0.002652690867039218, 'time_step': 0.0022033065198415734, 'init_value': -1.7350353002548218, 'ave_value': -1.1676995598115363, 'soft_opc': nan} step=4482




2022-04-22 07:15.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.12 [info     ] FQE_20220422071459: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00014549134725547698, 'time_algorithm_update': 0.002944779683308429, 'loss': 0.002640158187139342, 'time_step': 0.0031628393265138188, 'init_value': -1.7912826538085938, 'ave_value': -1.185535631872512, 'soft_opc': nan} step=4648




2022-04-22 07:15.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.13 [info     ] FQE_20220422071459: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00012101897274155214, 'time_algorithm_update': 0.0021843249539294876, 'loss': 0.002920957418061871, 'time_step': 0.00236044159854751, 'init_value': -1.868729829788208, 'ave_value': -1.2617617241232781, 'soft_opc': nan} step=4814




2022-04-22 07:15.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.13 [info     ] FQE_20220422071459: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00012506203479077443, 'time_algorithm_update': 0.002402615834431476, 'loss': 0.0030852295409226006, 'time_step': 0.002583894384912698, 'init_value': -1.8994135856628418, 'ave_value': -1.2651331909739219, 'soft_opc': nan} step=4980




2022-04-22 07:15.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.14 [info     ] FQE_20220422071459: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00012263763381774166, 'time_algorithm_update': 0.002365260239107063, 'loss': 0.003072002956802194, 'time_step': 0.0025441833289272815, 'init_value': -1.900622010231018, 'ave_value': -1.2496295033475837, 'soft_opc': nan} step=5146




2022-04-22 07:15.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.14 [info     ] FQE_20220422071459: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00011925668601530143, 'time_algorithm_update': 0.0021850100482802793, 'loss': 0.0031284381151942715, 'time_step': 0.002360157219760389, 'init_value': -1.9418511390686035, 'ave_value': -1.273708508036158, 'soft_opc': nan} step=5312




2022-04-22 07:15.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.14 [info     ] FQE_20220422071459: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00011478849204189806, 'time_algorithm_update': 0.002012491226196289, 'loss': 0.0032419681493669793, 'time_step': 0.0021789418645651944, 'init_value': -1.9694998264312744, 'ave_value': -1.281598864146718, 'soft_opc': nan} step=5478




2022-04-22 07:15.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.15 [info     ] FQE_20220422071459: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00011684377509427357, 'time_algorithm_update': 0.0020958314459007905, 'loss': 0.0032062883220938406, 'time_step': 0.0022685226187648543, 'init_value': -2.049478530883789, 'ave_value': -1.3338928895893398, 'soft_opc': nan} step=5644




2022-04-22 07:15.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.15 [info     ] FQE_20220422071459: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00011950803090290851, 'time_algorithm_update': 0.0022175915269966586, 'loss': 0.0034032007695122407, 'time_step': 0.0023920507316129752, 'init_value': -2.109151601791382, 'ave_value': -1.3889748581357904, 'soft_opc': nan} step=5810




2022-04-22 07:15.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.16 [info     ] FQE_20220422071459: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00011682654001626624, 'time_algorithm_update': 0.001998128661190171, 'loss': 0.003481422404229854, 'time_step': 0.002168760242232357, 'init_value': -2.1226320266723633, 'ave_value': -1.396945820064158, 'soft_opc': nan} step=5976




2022-04-22 07:15.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.16 [info     ] FQE_20220422071459: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00011126966361539909, 'time_algorithm_update': 0.0019412299236619328, 'loss': 0.0036075589463047415, 'time_step': 0.0021054816533284016, 'init_value': -2.1493866443634033, 'ave_value': -1.4023580977307246, 'soft_opc': nan} step=6142




2022-04-22 07:15.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.17 [info     ] FQE_20220422071459: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00012334139950304147, 'time_algorithm_update': 0.0023673873349844693, 'loss': 0.0037319026971572495, 'time_step': 0.0025503060903893896, 'init_value': -2.2181596755981445, 'ave_value': -1.4399806098425174, 'soft_opc': nan} step=6308




2022-04-22 07:15.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.17 [info     ] FQE_20220422071459: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00011428149349718208, 'time_algorithm_update': 0.0019217054527926158, 'loss': 0.004039296587222222, 'time_step': 0.002088260937886066, 'init_value': -2.2611517906188965, 'ave_value': -1.4767423958660246, 'soft_opc': nan} step=6474




2022-04-22 07:15.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.18 [info     ] FQE_20220422071459: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00013012196644242988, 'time_algorithm_update': 0.002503840320081596, 'loss': 0.004083540805170862, 'time_step': 0.002699547503367964, 'init_value': -2.3049240112304688, 'ave_value': -1.541078462069099, 'soft_opc': nan} step=6640




2022-04-22 07:15.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.18 [info     ] FQE_20220422071459: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.0001233715608895543, 'time_algorithm_update': 0.0021628385566803345, 'loss': 0.00420913527315028, 'time_step': 0.002346034509589873, 'init_value': -2.3484370708465576, 'ave_value': -1.55973804997834, 'soft_opc': nan} step=6806




2022-04-22 07:15.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.18 [info     ] FQE_20220422071459: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00011978666466402721, 'time_algorithm_update': 0.002114663641136813, 'loss': 0.004370786186345539, 'time_step': 0.0022887192576764577, 'init_value': -2.3891336917877197, 'ave_value': -1.594122313479851, 'soft_opc': nan} step=6972




2022-04-22 07:15.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.19 [info     ] FQE_20220422071459: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00012039850993328784, 'time_algorithm_update': 0.0021542353802416698, 'loss': 0.0044857653375807586, 'time_step': 0.002328986144927611, 'init_value': -2.424652576446533, 'ave_value': -1.6205605750102214, 'soft_opc': nan} step=7138




2022-04-22 07:15.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.19 [info     ] FQE_20220422071459: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00012102759028055582, 'time_algorithm_update': 0.0021792865661253414, 'loss': 0.0045743536913111045, 'time_step': 0.0023574886551822522, 'init_value': -2.476949691772461, 'ave_value': -1.665125915025537, 'soft_opc': nan} step=7304




2022-04-22 07:15.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.20 [info     ] FQE_20220422071459: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00013191872332469527, 'time_algorithm_update': 0.0025756402188036815, 'loss': 0.0048353168289373485, 'time_step': 0.0027710328619164155, 'init_value': -2.4939208030700684, 'ave_value': -1.6595711446862231, 'soft_opc': nan} step=7470




2022-04-22 07:15.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.20 [info     ] FQE_20220422071459: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00012018307145819606, 'time_algorithm_update': 0.0022566519587872975, 'loss': 0.004845504718762542, 'time_step': 0.002431389797164733, 'init_value': -2.524945020675659, 'ave_value': -1.6945606174757117, 'soft_opc': nan} step=7636




2022-04-22 07:15.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.21 [info     ] FQE_20220422071459: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00012090550847800381, 'time_algorithm_update': 0.002242675746779844, 'loss': 0.005038298871957259, 'time_step': 0.0024168103574270226, 'init_value': -2.58540940284729, 'ave_value': -1.7394892103525366, 'soft_opc': nan} step=7802




2022-04-22 07:15.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.21 [info     ] FQE_20220422071459: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.0001259223524346409, 'time_algorithm_update': 0.0023098537720829607, 'loss': 0.005232960851834975, 'time_step': 0.0025003861231976247, 'init_value': -2.6115458011627197, 'ave_value': -1.7640540398699274, 'soft_opc': nan} step=7968




2022-04-22 07:15.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.22 [info     ] FQE_20220422071459: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.0001249356442187206, 'time_algorithm_update': 0.0022581226854439243, 'loss': 0.005343915655660577, 'time_step': 0.0024407082293407024, 'init_value': -2.6148734092712402, 'ave_value': -1.7561973009023581, 'soft_opc': nan} step=8134




2022-04-22 07:15.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:15.22 [info     ] FQE_20220422071459: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.0001271144453301487, 'time_algorithm_update': 0.00249603282974427, 'loss': 0.005427426744129015, 'time_step': 0.002680508487195854, 'init_value': -2.6120405197143555, 'ave_value': -1.7561015699436335, 'soft_opc': nan} step=8300




2022-04-22 07:15.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071459/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-22 07:15.23 [debug    ] RoundIterator is selected.
2022-04-22 07:15.23 [info     ] Directory is created at d3rlpy_logs/FQE_20220422071523
2022-04-22 07:15.23 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 07:15.23 [debug    ] Building models...
2022-04-22 07:15.23 [debug    ] Models have been built.
2022-04-22 07:15.23 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422071523/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}},

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 07:15.24 [info     ] FQE_20220422071523: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00012779374455296717, 'time_algorithm_update': 0.002186506986618042, 'loss': 0.021677865974366838, 'time_step': 0.002375091924223789, 'init_value': -1.024860143661499, 'ave_value': -1.0060657370265003, 'soft_opc': nan} step=344




2022-04-22 07:15.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.25 [info     ] FQE_20220422071523: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00012890959894934365, 'time_algorithm_update': 0.0023011197877484697, 'loss': 0.019742980875551354, 'time_step': 0.002486888752427212, 'init_value': -1.9408965110778809, 'ave_value': -1.8884069413080946, 'soft_opc': nan} step=688




2022-04-22 07:15.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.25 [info     ] FQE_20220422071523: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00013694444368051928, 'time_algorithm_update': 0.002237850843473922, 'loss': 0.022256521941270938, 'time_step': 0.0024350354837816817, 'init_value': -2.9311559200286865, 'ave_value': -2.868105073930981, 'soft_opc': nan} step=1032




2022-04-22 07:15.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.26 [info     ] FQE_20220422071523: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00012726977814075558, 'time_algorithm_update': 0.0022280826125034067, 'loss': 0.0248222537447027, 'time_step': 0.0024140151434166486, 'init_value': -3.7316782474517822, 'ave_value': -3.6764799283968435, 'soft_opc': nan} step=1376




2022-04-22 07:15.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.27 [info     ] FQE_20220422071523: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00013705672219742175, 'time_algorithm_update': 0.0024317717829415966, 'loss': 0.03138996647984916, 'time_step': 0.0026301367338313615, 'init_value': -4.762981414794922, 'ave_value': -4.711143863442782, 'soft_opc': nan} step=1720




2022-04-22 07:15.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.28 [info     ] FQE_20220422071523: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00013201111970945846, 'time_algorithm_update': 0.0022822479869044105, 'loss': 0.037285776454197284, 'time_step': 0.0024735983027968297, 'init_value': -5.276021957397461, 'ave_value': -5.276692061864578, 'soft_opc': nan} step=2064




2022-04-22 07:15.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.29 [info     ] FQE_20220422071523: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001250110393346742, 'time_algorithm_update': 0.0020642155824705613, 'loss': 0.04411615346569221, 'time_step': 0.002245856578959975, 'init_value': -6.068634510040283, 'ave_value': -6.145463482431463, 'soft_opc': nan} step=2408




2022-04-22 07:15.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.30 [info     ] FQE_20220422071523: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00013285806012708088, 'time_algorithm_update': 0.002367686393649079, 'loss': 0.0561052824825395, 'time_step': 0.002560489399488582, 'init_value': -6.409966945648193, 'ave_value': -6.653015582787024, 'soft_opc': nan} step=2752




2022-04-22 07:15.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.31 [info     ] FQE_20220422071523: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00012623570686162903, 'time_algorithm_update': 0.002235228239103805, 'loss': 0.06668210874019234, 'time_step': 0.0024201384810514227, 'init_value': -6.867557525634766, 'ave_value': -7.266739722898414, 'soft_opc': nan} step=3096




2022-04-22 07:15.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.32 [info     ] FQE_20220422071523: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.0001229331936947135, 'time_algorithm_update': 0.002119723447533541, 'loss': 0.08049913786817342, 'time_step': 0.0022988125335338503, 'init_value': -7.586932182312012, 'ave_value': -8.20621556839696, 'soft_opc': nan} step=3440




2022-04-22 07:15.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.33 [info     ] FQE_20220422071523: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00012824909631596056, 'time_algorithm_update': 0.00227943547936373, 'loss': 0.09096965499677111, 'time_step': 0.002467547045197598, 'init_value': -7.953535079956055, 'ave_value': -8.747939445736172, 'soft_opc': nan} step=3784




2022-04-22 07:15.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.34 [info     ] FQE_20220422071523: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00013070189675619436, 'time_algorithm_update': 0.002345880103665729, 'loss': 0.10659557578240543, 'time_step': 0.0025392160859218863, 'init_value': -8.598098754882812, 'ave_value': -9.693300528998847, 'soft_opc': nan} step=4128




2022-04-22 07:15.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.35 [info     ] FQE_20220422071523: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00013800831728203353, 'time_algorithm_update': 0.002556538166001786, 'loss': 0.12076706799061232, 'time_step': 0.00275676125703856, 'init_value': -9.00210952758789, 'ave_value': -10.268166839297827, 'soft_opc': nan} step=4472




2022-04-22 07:15.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.36 [info     ] FQE_20220422071523: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00012754700904668762, 'time_algorithm_update': 0.002172967029172321, 'loss': 0.1391972403047433, 'time_step': 0.0023595572904098867, 'init_value': -9.662210464477539, 'ave_value': -11.16079528393509, 'soft_opc': nan} step=4816




2022-04-22 07:15.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.37 [info     ] FQE_20220422071523: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.0001269086848857791, 'time_algorithm_update': 0.0022678777228954225, 'loss': 0.15544935699476492, 'time_step': 0.002455836811731028, 'init_value': -9.864609718322754, 'ave_value': -11.606878114041981, 'soft_opc': nan} step=5160




2022-04-22 07:15.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.38 [info     ] FQE_20220422071523: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00012884306353191997, 'time_algorithm_update': 0.0022908310557520667, 'loss': 0.17448356165279916, 'time_step': 0.0024786286575849666, 'init_value': -10.46356201171875, 'ave_value': -12.57620525851443, 'soft_opc': nan} step=5504




2022-04-22 07:15.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.38 [info     ] FQE_20220422071523: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.0001299436702284702, 'time_algorithm_update': 0.0023074954055076423, 'loss': 0.19606893963646144, 'time_step': 0.0024984499742818434, 'init_value': -10.637484550476074, 'ave_value': -12.988692444522638, 'soft_opc': nan} step=5848




2022-04-22 07:15.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.39 [info     ] FQE_20220422071523: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00013596304627351983, 'time_algorithm_update': 0.0024827289026837017, 'loss': 0.21120338920547171, 'time_step': 0.002681029397387837, 'init_value': -10.897072792053223, 'ave_value': -13.598312579967955, 'soft_opc': nan} step=6192




2022-04-22 07:15.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.40 [info     ] FQE_20220422071523: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00013499758964361145, 'time_algorithm_update': 0.002436433420624844, 'loss': 0.23034059988378092, 'time_step': 0.0026333581569582916, 'init_value': -11.128543853759766, 'ave_value': -14.228165046884133, 'soft_opc': nan} step=6536




2022-04-22 07:15.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.41 [info     ] FQE_20220422071523: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.0001320395358773165, 'time_algorithm_update': 0.0023955654266268707, 'loss': 0.24967479778367074, 'time_step': 0.0025902099387590275, 'init_value': -11.370538711547852, 'ave_value': -14.822120528715152, 'soft_opc': nan} step=6880




2022-04-22 07:15.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.42 [info     ] FQE_20220422071523: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00013083496759104174, 'time_algorithm_update': 0.0023728699185127434, 'loss': 0.27414057870005626, 'time_step': 0.002564487762229387, 'init_value': -11.89193344116211, 'ave_value': -15.6504038190251, 'soft_opc': nan} step=7224




2022-04-22 07:15.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.43 [info     ] FQE_20220422071523: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.0001251233178515767, 'time_algorithm_update': 0.002159269743187483, 'loss': 0.29662113915587407, 'time_step': 0.0023406723210977953, 'init_value': -12.084433555603027, 'ave_value': -16.12403300940185, 'soft_opc': nan} step=7568




2022-04-22 07:15.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.44 [info     ] FQE_20220422071523: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00013416866923487462, 'time_algorithm_update': 0.00224916186443595, 'loss': 0.3184496510048332, 'time_step': 0.0024455252081848856, 'init_value': -12.533279418945312, 'ave_value': -16.920129356405756, 'soft_opc': nan} step=7912




2022-04-22 07:15.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.45 [info     ] FQE_20220422071523: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00012887771739516148, 'time_algorithm_update': 0.002241700194602789, 'loss': 0.34305531764402986, 'time_step': 0.00243238931478456, 'init_value': -12.649267196655273, 'ave_value': -17.312584399438656, 'soft_opc': nan} step=8256




2022-04-22 07:15.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.46 [info     ] FQE_20220422071523: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00012883266737294752, 'time_algorithm_update': 0.0022688611995342165, 'loss': 0.3555102823248003, 'time_step': 0.002456938111504843, 'init_value': -13.026187896728516, 'ave_value': -18.162107777904282, 'soft_opc': nan} step=8600




2022-04-22 07:15.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.47 [info     ] FQE_20220422071523: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00012536450873973758, 'time_algorithm_update': 0.0021272364050842997, 'loss': 0.37894291668686325, 'time_step': 0.002309531666511713, 'init_value': -13.071723937988281, 'ave_value': -18.589655053763238, 'soft_opc': nan} step=8944




2022-04-22 07:15.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.48 [info     ] FQE_20220422071523: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00013057783592578976, 'time_algorithm_update': 0.002431226331134175, 'loss': 0.40232252246743544, 'time_step': 0.0026236037875330726, 'init_value': -13.653834342956543, 'ave_value': -19.463130359646975, 'soft_opc': nan} step=9288




2022-04-22 07:15.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.49 [info     ] FQE_20220422071523: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00012262823969818825, 'time_algorithm_update': 0.002186204111853311, 'loss': 0.42263712220673644, 'time_step': 0.0023648177468499473, 'init_value': -14.151229858398438, 'ave_value': -20.332108818786637, 'soft_opc': nan} step=9632




2022-04-22 07:15.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.50 [info     ] FQE_20220422071523: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00013608502787212993, 'time_algorithm_update': 0.002447826917781386, 'loss': 0.4424719534337867, 'time_step': 0.0026469202928764875, 'init_value': -14.646876335144043, 'ave_value': -21.181374368044708, 'soft_opc': nan} step=9976




2022-04-22 07:15.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.51 [info     ] FQE_20220422071523: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.0001301439695580061, 'time_algorithm_update': 0.0022977646007094274, 'loss': 0.4676395387139691, 'time_step': 0.0024898377961890643, 'init_value': -14.920350074768066, 'ave_value': -21.75552394157863, 'soft_opc': nan} step=10320




2022-04-22 07:15.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.52 [info     ] FQE_20220422071523: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00012461044067560241, 'time_algorithm_update': 0.0021535650242206664, 'loss': 0.469197349503723, 'time_step': 0.0023369137630906214, 'init_value': -14.959827423095703, 'ave_value': -22.018282279640705, 'soft_opc': nan} step=10664




2022-04-22 07:15.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.53 [info     ] FQE_20220422071523: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00013028119885644248, 'time_algorithm_update': 0.0024584621884102043, 'loss': 0.47881286002175755, 'time_step': 0.002649842999702276, 'init_value': -15.155458450317383, 'ave_value': -22.626366853257558, 'soft_opc': nan} step=11008




2022-04-22 07:15.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.54 [info     ] FQE_20220422071523: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00013710523760595986, 'time_algorithm_update': 0.002495997866918874, 'loss': 0.47996013263933535, 'time_step': 0.002696472545002782, 'init_value': -14.949125289916992, 'ave_value': -22.70989825855773, 'soft_opc': nan} step=11352




2022-04-22 07:15.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.55 [info     ] FQE_20220422071523: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00013874159302822378, 'time_algorithm_update': 0.0024859267611836277, 'loss': 0.47728479332236445, 'time_step': 0.002689035825951155, 'init_value': -15.022476196289062, 'ave_value': -23.043411204192015, 'soft_opc': nan} step=11696




2022-04-22 07:15.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.56 [info     ] FQE_20220422071523: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00013198824815971908, 'time_algorithm_update': 0.002384275197982788, 'loss': 0.49552269739932703, 'time_step': 0.002577936926553416, 'init_value': -15.381555557250977, 'ave_value': -23.77961250299269, 'soft_opc': nan} step=12040




2022-04-22 07:15.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.56 [info     ] FQE_20220422071523: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.0001279399838558463, 'time_algorithm_update': 0.0022672823695249335, 'loss': 0.4967624843283015, 'time_step': 0.002453736094541328, 'init_value': -15.785146713256836, 'ave_value': -24.420009477476817, 'soft_opc': nan} step=12384




2022-04-22 07:15.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.57 [info     ] FQE_20220422071523: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00013085090836813284, 'time_algorithm_update': 0.002295627150424691, 'loss': 0.4993541832190267, 'time_step': 0.0024867549885151, 'init_value': -15.949292182922363, 'ave_value': -24.783023828241202, 'soft_opc': nan} step=12728




2022-04-22 07:15.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.58 [info     ] FQE_20220422071523: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.0001293476237807163, 'time_algorithm_update': 0.0022175949673319973, 'loss': 0.4959501471359629, 'time_step': 0.0024050432582234226, 'init_value': -16.343833923339844, 'ave_value': -25.37718496722681, 'soft_opc': nan} step=13072




2022-04-22 07:15.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:15.59 [info     ] FQE_20220422071523: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00013245260992715525, 'time_algorithm_update': 0.0023669240086577658, 'loss': 0.4900069036575164, 'time_step': 0.002561245546784512, 'init_value': -16.484561920166016, 'ave_value': -25.72760354830875, 'soft_opc': nan} step=13416




2022-04-22 07:15.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:16.00 [info     ] FQE_20220422071523: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00013522491898647574, 'time_algorithm_update': 0.002442731413730355, 'loss': 0.4889712138724145, 'time_step': 0.0026397483293400252, 'init_value': -16.709308624267578, 'ave_value': -26.218389382931562, 'soft_opc': nan} step=13760




2022-04-22 07:16.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:16.01 [info     ] FQE_20220422071523: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00013413540152616279, 'time_algorithm_update': 0.0024860168612280556, 'loss': 0.4919678516209472, 'time_step': 0.002680985733520153, 'init_value': -16.952484130859375, 'ave_value': -26.54068352161227, 'soft_opc': nan} step=14104




2022-04-22 07:16.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:16.02 [info     ] FQE_20220422071523: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00012839464254157487, 'time_algorithm_update': 0.0023226031037264093, 'loss': 0.49674125940019137, 'time_step': 0.0025109018004217812, 'init_value': -17.069910049438477, 'ave_value': -27.045881644106124, 'soft_opc': nan} step=14448




2022-04-22 07:16.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:16.03 [info     ] FQE_20220422071523: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00013247617455415948, 'time_algorithm_update': 0.0022484389848487323, 'loss': 0.49459931319362893, 'time_step': 0.0024412371391473813, 'init_value': -16.455711364746094, 'ave_value': -26.533010809244335, 'soft_opc': nan} step=14792




2022-04-22 07:16.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:16.04 [info     ] FQE_20220422071523: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00012610887372216514, 'time_algorithm_update': 0.002059278793113176, 'loss': 0.4815208419648454, 'time_step': 0.002242450104203335, 'init_value': -16.522823333740234, 'ave_value': -26.743966850557843, 'soft_opc': nan} step=15136




2022-04-22 07:16.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:16.05 [info     ] FQE_20220422071523: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00013121200162310932, 'time_algorithm_update': 0.002267358608024065, 'loss': 0.4757566843696242, 'time_step': 0.0024588094201198844, 'init_value': -16.310623168945312, 'ave_value': -26.654533775862273, 'soft_opc': nan} step=15480




2022-04-22 07:16.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:16.06 [info     ] FQE_20220422071523: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.0001294398030569387, 'time_algorithm_update': 0.0023000864095466082, 'loss': 0.4646139546665688, 'time_step': 0.00249164950015933, 'init_value': -16.5798282623291, 'ave_value': -27.13976440220266, 'soft_opc': nan} step=15824




2022-04-22 07:16.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:16.07 [info     ] FQE_20220422071523: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00013036644736001658, 'time_algorithm_update': 0.002424646255581878, 'loss': 0.47081451490792164, 'time_step': 0.002618398777274198, 'init_value': -16.8836612701416, 'ave_value': -27.54732016400174, 'soft_opc': nan} step=16168




2022-04-22 07:16.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:16.08 [info     ] FQE_20220422071523: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00013097982073939122, 'time_algorithm_update': 0.002280560343764549, 'loss': 0.477448763859155, 'time_step': 0.002474938714227011, 'init_value': -16.60260581970215, 'ave_value': -27.478238634993364, 'soft_opc': nan} step=16512




2022-04-22 07:16.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:16.09 [info     ] FQE_20220422071523: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00013508006583812625, 'time_algorithm_update': 0.0024491631707479786, 'loss': 0.48456695300884284, 'time_step': 0.0026462667210157527, 'init_value': -16.943157196044922, 'ave_value': -27.8694840548409, 'soft_opc': nan} step=16856




2022-04-22 07:16.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:16.10 [info     ] FQE_20220422071523: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00013726880384045979, 'time_algorithm_update': 0.0025100805038629575, 'loss': 0.48436553598392407, 'time_step': 0.0027124105497848155, 'init_value': -16.918811798095703, 'ave_value': -27.94216038998183, 'soft_opc': nan} step=17200




2022-04-22 07:16.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422071523/model_17200.pt
search iteration:  29
using hyper params:  [0.001126130005656269, 0.001834114789554068, 4.138845192468941e-05, 1]
2022-04-22 07:16.10 [debug    ] RoundIterator is selected.
2022-04-22 07:16.10 [info     ] Directory is created at d3rlpy_logs/CQL_20220422071610
2022-04-22 07:16.10 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 07:16.10 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 07:16.10 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422071610/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.001126130005656269, 'actor_optim_factory': {'optim

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:16.23 [info     ] CQL_20220422071610: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.0003358800976262616, 'time_algorithm_update': 0.03576372124556172, 'temp_loss': 4.77228405296458, 'temp': 0.992497058100783, 'alpha_loss': -17.57556047604952, 'alpha': 1.0177609358908812, 'critic_loss': 27.883684003973283, 'actor_loss': -1.647949624096038, 'time_step': 0.036201481185207475, 'td_error': 1.2269188400175242, 'init_value': 0.037971943616867065, 'ave_value': 0.19048239556112495} step=346
2022-04-22 07:16.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:16.36 [info     ] CQL_20220422071610: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.00033090707194598424, 'time_algorithm_update': 0.036067055828998544, 'temp_loss': 4.930544869748154, 'temp': 0.9779128778532061, 'alpha_loss': -18.339714491298434, 'alpha': 1.054384363179951, 'critic_loss': 30.376410489826533, 'actor_loss': -1.6810723012582416, 'time_step': 0.03648971408777843, 'td_error': 1.2101238510255525, 'init_value': -0.17604652047157288, 'ave_value': 0.05411291276888578} step=692
2022-04-22 07:16.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:16.48 [info     ] CQL_20220422071610: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.0003170023074728905, 'time_algorithm_update': 0.0349969670951711, 'temp_loss': 4.867917647940575, 'temp': 0.9639200718072108, 'alpha_loss': -19.010051528842464, 'alpha': 1.0927640006721364, 'critic_loss': 39.349368663192486, 'actor_loss': -1.276597023699325, 'time_step': 0.03540570956434129, 'td_error': 1.209100707047867, 'init_value': -0.5815863609313965, 'ave_value': -0.27445438540889056} step=1038
2022-04-22 07:16.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:17.00 [info     ] CQL_20220422071610: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.0003166708643036771, 'time_algorithm_update': 0.03425064527919527, 'temp_loss': 4.7992323561210855, 'temp': 0.9503109699728861, 'alpha_loss': -19.70902835978249, 'alpha': 1.1329981391829562, 'critic_loss': 50.187494145652465, 'actor_loss': -0.7909755065951044, 'time_step': 0.03465496115601821, 'td_error': 1.206895315407429, 'init_value': -0.904792070388794, 'ave_value': -0.5757799395322517} step=1384
2022-04-22 07:17.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:17.13 [info     ] CQL_20220422071610: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.0003195870129359251, 'time_algorithm_update': 0.03515648910765014, 'temp_loss': 4.7315319797207165, 'temp': 0.9370069048997295, 'alpha_loss': -20.44174949006538, 'alpha': 1.1751463709539072, 'critic_loss': 62.60248286737872, 'actor_loss': -0.2461564940201237, 'time_step': 0.03557613750413663, 'td_error': 1.210219092789784, 'init_value': -1.4238625764846802, 'ave_value': -1.0485565758694004} step=1730
2022-04-22 07:17.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:17.26 [info     ] CQL_20220422071610: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.00033535984899267296, 'time_algorithm_update': 0.03508582280550389, 'temp_loss': 4.66634077832878, 'temp': 0.9239646160189127, 'alpha_loss': -21.207508445475142, 'alpha': 1.2192550894152911, 'critic_loss': 76.74925840521135, 'actor_loss': 0.2732120898331521, 'time_step': 0.03551727912329525, 'td_error': 1.2103245343097264, 'init_value': -1.6104329824447632, 'ave_value': -1.2582089428556311} step=2076
2022-04-22 07:17.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:17.38 [info     ] CQL_20220422071610: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.0003202629916240714, 'time_algorithm_update': 0.03503768981536689, 'temp_loss': 4.601193750524796, 'temp': 0.9111608253049024, 'alpha_loss': -22.007641141814304, 'alpha': 1.2653658507187242, 'critic_loss': 93.28306899318805, 'actor_loss': 0.7086563119826289, 'time_step': 0.035448828184535736, 'td_error': 1.2154303955885117, 'init_value': -2.1827170848846436, 'ave_value': -1.7650237157863145} step=2422
2022-04-22 07:17.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:17.51 [info     ] CQL_20220422071610: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.0003174963714070403, 'time_algorithm_update': 0.03513015281258291, 'temp_loss': 4.538335757448494, 'temp': 0.8985758652921357, 'alpha_loss': -22.849585455966135, 'alpha': 1.313519292484129, 'critic_loss': 113.0001884857354, 'actor_loss': 0.9544443257282235, 'time_step': 0.0355353741287496, 'td_error': 1.2154318158320714, 'init_value': -2.1918678283691406, 'ave_value': -1.8189364213975134} step=2768
2022-04-22 07:17.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:18.04 [info     ] CQL_20220422071610: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.0003215033194922298, 'time_algorithm_update': 0.03513371324263556, 'temp_loss': 4.475078155539628, 'temp': 0.8861965612869043, 'alpha_loss': -23.72031719560568, 'alpha': 1.3637593092256888, 'critic_loss': 137.97781424990967, 'actor_loss': 0.9776414987668826, 'time_step': 0.0355466493981422, 'td_error': 1.2165623246262316, 'init_value': -2.165081024169922, 'ave_value': -1.8135787193896138} step=3114
2022-04-22 07:18.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:18.16 [info     ] CQL_20220422071610: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.0003179477129368424, 'time_algorithm_update': 0.035284766572059235, 'temp_loss': 4.413607077791512, 'temp': 0.8740158410086108, 'alpha_loss': -24.629910309190695, 'alpha': 1.4161270901646916, 'critic_loss': 168.7646072916902, 'actor_loss': 0.7037024571199637, 'time_step': 0.035695720959260976, 'td_error': 1.218956031623367, 'init_value': -1.7172977924346924, 'ave_value': -1.4600466240040213} step=3460
2022-04-22 07:18.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:18.29 [info     ] CQL_20220422071610: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.00032411903315196836, 'time_algorithm_update': 0.03492068762035039, 'temp_loss': 4.352163819219336, 'temp': 0.8620203204582192, 'alpha_loss': -25.577900373866793, 'alpha': 1.4706776073213257, 'critic_loss': 206.94799204920068, 'actor_loss': 0.18002599578706086, 'time_step': 0.03533607893596495, 'td_error': 1.222635502834272, 'init_value': -1.242559790611267, 'ave_value': -1.0268187737792744} step=3806
2022-04-22 07:18.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:18.41 [info     ] CQL_20220422071610: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.00032521327796009925, 'time_algorithm_update': 0.034520043803088235, 'temp_loss': 4.293245461634818, 'temp': 0.850205230230541, 'alpha_loss': -26.563077176926452, 'alpha': 1.5274718372808027, 'critic_loss': 251.565927009362, 'actor_loss': -0.5155835519894699, 'time_step': 0.03494055560558518, 'td_error': 1.2269689870564198, 'init_value': -0.5492628216743469, 'ave_value': -0.4231153670562393} step=4152
2022-04-22 07:18.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:18.54 [info     ] CQL_20220422071610: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.0003279688730405245, 'time_algorithm_update': 0.034699856890419316, 'temp_loss': 4.234519724211941, 'temp': 0.8385619538367828, 'alpha_loss': -27.589928588426183, 'alpha': 1.586566056474785, 'critic_loss': 299.30214704392273, 'actor_loss': -1.17876066103836, 'time_step': 0.035118211900567735, 'td_error': 1.2316037802952349, 'init_value': -0.05015992000699043, 'ave_value': 0.03668860559951869} step=4498
2022-04-22 07:18.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:19.06 [info     ] CQL_20220422071610: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.0003194622910780714, 'time_algorithm_update': 0.0347125330412319, 'temp_loss': 4.177571019685337, 'temp': 0.8270861625326851, 'alpha_loss': -28.661505153413454, 'alpha': 1.648044764306504, 'critic_loss': 346.5829065576454, 'actor_loss': -1.7438647874517936, 'time_step': 0.03511778950002152, 'td_error': 1.2340337747390713, 'init_value': 0.39670076966285706, 'ave_value': 0.46204567873517505} step=4844
2022-04-22 07:19.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:19.19 [info     ] CQL_20220422071610: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.0003121567599346183, 'time_algorithm_update': 0.03546133344573093, 'temp_loss': 4.1199218110542075, 'temp': 0.8157731919963925, 'alpha_loss': -29.767827111172537, 'alpha': 1.711976286648326, 'critic_loss': 392.2268391868283, 'actor_loss': -2.2168994015351884, 'time_step': 0.03586091540452373, 'td_error': 1.2351728087904488, 'init_value': 0.8584637641906738, 'ave_value': 0.8953229260761176} step=5190
2022-04-22 07:19.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:19.32 [info     ] CQL_20220422071610: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.00031673494791019857, 'time_algorithm_update': 0.03582205868869848, 'temp_loss': 4.063942228438537, 'temp': 0.8046194151302294, 'alpha_loss': -30.92587573542071, 'alpha': 1.7784431242529368, 'critic_loss': 438.3157833738823, 'actor_loss': -2.640196480502972, 'time_step': 0.03622915634530128, 'td_error': 1.2356244106110406, 'init_value': 1.247176170349121, 'ave_value': 1.2780150837895994} step=5536
2022-04-22 07:19.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:19.45 [info     ] CQL_20220422071610: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.0003274031457184367, 'time_algorithm_update': 0.035055465091859674, 'temp_loss': 4.006994330125048, 'temp': 0.7936256002484029, 'alpha_loss': -32.123064509706, 'alpha': 1.8475371540626349, 'critic_loss': 485.66706980446173, 'actor_loss': -3.024151344519819, 'time_step': 0.03547752937140492, 'td_error': 1.2358426550113373, 'init_value': 1.670336127281189, 'ave_value': 1.687375058433692} step=5882
2022-04-22 07:19.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:19.57 [info     ] CQL_20220422071610: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.00032679814134719057, 'time_algorithm_update': 0.035246135871534405, 'temp_loss': 3.9534424026577457, 'temp': 0.7827836431864369, 'alpha_loss': -33.37746132867185, 'alpha': 1.91935246803857, 'critic_loss': 535.8123367397772, 'actor_loss': -3.4118162076597267, 'time_step': 0.035663862448896286, 'td_error': 1.2362589071431012, 'init_value': 2.0831050872802734, 'ave_value': 2.0943712680472077} step=6228
2022-04-22 07:19.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:20.10 [info     ] CQL_20220422071610: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.0003231619134803728, 'time_algorithm_update': 0.03487657191436415, 'temp_loss': 3.898360161422994, 'temp': 0.7720921949155068, 'alpha_loss': -34.67657437627715, 'alpha': 1.9939918011599194, 'critic_loss': 589.506377115415, 'actor_loss': -3.747209489001015, 'time_step': 0.03529071049883187, 'td_error': 1.2360934201082325, 'init_value': 2.427661180496216, 'ave_value': 2.4403260231018065} step=6574
2022-04-22 07:20.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:20.22 [info     ] CQL_20220422071610: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.00032366424626697695, 'time_algorithm_update': 0.034742207885477584, 'temp_loss': 3.8454461807460456, 'temp': 0.761549113975095, 'alpha_loss': -36.02132128015419, 'alpha': 2.0715495437555917, 'critic_loss': 649.7026944022647, 'actor_loss': -4.0983213434329615, 'time_step': 0.03515611494207658, 'td_error': 1.2362872584957647, 'init_value': 2.766406297683716, 'ave_value': 2.7752259716483416} step=6920
2022-04-22 07:20.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:20.35 [info     ] CQL_20220422071610: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.00031355350692837227, 'time_algorithm_update': 0.03461891585002745, 'temp_loss': 3.7941170889518165, 'temp': 0.7511484018295487, 'alpha_loss': -37.42030685072, 'alpha': 2.1521339278689697, 'critic_loss': 714.2574956816745, 'actor_loss': -4.414576225887144, 'time_step': 0.03502480694324295, 'td_error': 1.236544758394765, 'init_value': 3.0951926708221436, 'ave_value': 3.1039926045875856} step=7266
2022-04-22 07:20.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:20.47 [info     ] CQL_20220422071610: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.0003204469735911816, 'time_algorithm_update': 0.034737348556518555, 'temp_loss': 3.7408189118942086, 'temp': 0.740891062627638, 'alpha_loss': -38.877740617432345, 'alpha': 2.235868402988235, 'critic_loss': 783.2109796601223, 'actor_loss': -4.727245774572295, 'time_step': 0.03515034879563172, 'td_error': 1.237374947864121, 'init_value': 3.4657704830169678, 'ave_value': 3.4734516906986985} step=7612
2022-04-22 07:20.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:21.00 [info     ] CQL_20220422071610: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.0003334876429827916, 'time_algorithm_update': 0.0347353840149896, 'temp_loss': 3.6899218352543826, 'temp': 0.7307767630312484, 'alpha_loss': -40.39111838588825, 'alpha': 2.322872737239551, 'critic_loss': 852.707042716142, 'actor_loss': -5.025534445150739, 'time_step': 0.03516687547540389, 'td_error': 1.238240459104537, 'init_value': 3.777039051055908, 'ave_value': 3.783184194813525} step=7958
2022-04-22 07:21.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:21.12 [info     ] CQL_20220422071610: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.00031397108397731893, 'time_algorithm_update': 0.03459959154184154, 'temp_loss': 3.640360727475558, 'temp': 0.7207997518812301, 'alpha_loss': -41.96677635722077, 'alpha': 2.413271664884049, 'critic_loss': 922.7473095138638, 'actor_loss': -5.299159373851181, 'time_step': 0.03499886479680938, 'td_error': 1.2389572565321931, 'init_value': 4.056578159332275, 'ave_value': 4.062186169047783} step=8304
2022-04-22 07:21.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:21.25 [info     ] CQL_20220422071610: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.0003353067905227573, 'time_algorithm_update': 0.03476328794666798, 'temp_loss': 3.5905911763968494, 'temp': 0.7109583781633763, 'alpha_loss': -43.59671576174697, 'alpha': 2.507198236581218, 'critic_loss': 983.3515210454864, 'actor_loss': -5.575148939397294, 'time_step': 0.035188077502167986, 'td_error': 1.2407587688238624, 'init_value': 4.428694725036621, 'ave_value': 4.431654759818991} step=8650
2022-04-22 07:21.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:21.38 [info     ] CQL_20220422071610: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.0003238137746821938, 'time_algorithm_update': 0.03516457328906638, 'temp_loss': 3.54114291364747, 'temp': 0.7012521722757747, 'alpha_loss': -45.292211389266, 'alpha': 2.6047745233326287, 'critic_loss': 990.2349341949287, 'actor_loss': -5.904358336002152, 'time_step': 0.03557200445605151, 'td_error': 1.2420201134590232, 'init_value': 4.745686054229736, 'ave_value': 4.7481421838048625} step=8996
2022-04-22 07:21.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:21.50 [info     ] CQL_20220422071610: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.00032709375282243496, 'time_algorithm_update': 0.03483794741547866, 'temp_loss': 3.494240275697212, 'temp': 0.6916767866969797, 'alpha_loss': -47.0551702686817, 'alpha': 2.7061576402256255, 'critic_loss': 986.9847360952741, 'actor_loss': -6.212264842380678, 'time_step': 0.03525920686005168, 'td_error': 1.242750065159705, 'init_value': 4.9727325439453125, 'ave_value': 4.975777456781886} step=9342
2022-04-22 07:21.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:22.03 [info     ] CQL_20220422071610: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.00033418842822830115, 'time_algorithm_update': 0.03576544254501431, 'temp_loss': 3.4456283129708614, 'temp': 0.6822328514101877, 'alpha_loss': -48.89070645370924, 'alpha': 2.8115008635327996, 'critic_loss': 1020.1263355409479, 'actor_loss': -6.414214547658931, 'time_step': 0.03618674677920479, 'td_error': 1.2451521952878593, 'init_value': 5.317660331726074, 'ave_value': 5.317932284935327} step=9688
2022-04-22 07:22.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:22.16 [info     ] CQL_20220422071610: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.0003168865435385291, 'time_algorithm_update': 0.03529343784199974, 'temp_loss': 3.399371001072702, 'temp': 0.6729187999846619, 'alpha_loss': -50.788442115563186, 'alpha': 2.920947794969371, 'critic_loss': 984.8900624534298, 'actor_loss': -6.748119472768265, 'time_step': 0.03570153258439433, 'td_error': 1.246857462747373, 'init_value': 5.6174211502075195, 'ave_value': 5.617530486051253} step=10034
2022-04-22 07:22.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:22.28 [info     ] CQL_20220422071610: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.00031366375829443077, 'time_algorithm_update': 0.03419853634916978, 'temp_loss': 3.3527580044862164, 'temp': 0.6637312764721799, 'alpha_loss': -52.772419725539365, 'alpha': 3.034655482093723, 'critic_loss': 967.9458050149025, 'actor_loss': -7.011596828526844, 'time_step': 0.034595586660969464, 'td_error': 1.2480965783283737, 'init_value': 5.872481822967529, 'ave_value': 5.873178896784161} step=10380
2022-04-22 07:22.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:22.41 [info     ] CQL_20220422071610: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.00032151916812610075, 'time_algorithm_update': 0.03646326685227411, 'temp_loss': 3.3073158374411524, 'temp': 0.6546689170288902, 'alpha_loss': -54.82431675243929, 'alpha': 3.152795020555485, 'critic_loss': 943.7865477104408, 'actor_loss': -7.296225130213478, 'time_step': 0.036874234331825566, 'td_error': 1.2497906385049429, 'init_value': 6.155011177062988, 'ave_value': 6.156575935358343} step=10726
2022-04-22 07:22.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:22.54 [info     ] CQL_20220422071610: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.00033190967030607895, 'time_algorithm_update': 0.03533743158241228, 'temp_loss': 3.2619620244627052, 'temp': 0.6457307094094381, 'alpha_loss': -56.959997981959, 'alpha': 3.275536092719591, 'critic_loss': 925.6002147873013, 'actor_loss': -7.577266250731628, 'time_step': 0.035760222142831435, 'td_error': 1.2515659025816928, 'init_value': 6.429945945739746, 'ave_value': 6.432381611403948} step=11072
2022-04-22 07:22.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:23.06 [info     ] CQL_20220422071610: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.0003162601779651091, 'time_algorithm_update': 0.03483663128979633, 'temp_loss': 3.217130056006371, 'temp': 0.6369157126873215, 'alpha_loss': -59.17742213210619, 'alpha': 3.4030550132597113, 'critic_loss': 906.4082309965453, 'actor_loss': -7.849716748805404, 'time_step': 0.03523962277208449, 'td_error': 1.2543345030269155, 'init_value': 6.763134479522705, 'ave_value': 6.7643401739437925} step=11418
2022-04-22 07:23.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:23.19 [info     ] CQL_20220422071610: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.0003126081014644204, 'time_algorithm_update': 0.034216598968285356, 'temp_loss': 3.173105241246306, 'temp': 0.6282213020186893, 'alpha_loss': -61.47922074863676, 'alpha': 3.5355364263402245, 'critic_loss': 900.327634778326, 'actor_loss': -8.12988054821257, 'time_step': 0.03461285891560461, 'td_error': 1.2566060179480665, 'init_value': 7.039330005645752, 'ave_value': 7.040525768491656} step=11764
2022-04-22 07:23.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:23.31 [info     ] CQL_20220422071610: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.0003190529828815791, 'time_algorithm_update': 0.035551727162620235, 'temp_loss': 3.129451706230296, 'temp': 0.6196463237263564, 'alpha_loss': -63.87858958602641, 'alpha': 3.6731803816867012, 'critic_loss': 885.1026127986136, 'actor_loss': -8.427211518921604, 'time_step': 0.03595740326567192, 'td_error': 1.2598662515464238, 'init_value': 7.370389461517334, 'ave_value': 7.370800919507905} step=12110
2022-04-22 07:23.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:23.44 [info     ] CQL_20220422071610: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.00032945244298504956, 'time_algorithm_update': 0.035687334964730144, 'temp_loss': 3.086648231985941, 'temp': 0.6111887155929742, 'alpha_loss': -66.36075133395333, 'alpha': 3.8161856011848228, 'critic_loss': 899.9808739458206, 'actor_loss': -8.661296612954553, 'time_step': 0.036111037166132405, 'td_error': 1.2598393763460756, 'init_value': 7.488007545471191, 'ave_value': 7.490988701471733} step=12456
2022-04-22 07:23.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:23.57 [info     ] CQL_20220422071610: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.0003293463260452182, 'time_algorithm_update': 0.035830956663010435, 'temp_loss': 3.044453649851628, 'temp': 0.6028468667427239, 'alpha_loss': -68.93802327503359, 'alpha': 3.964746082449235, 'critic_loss': 958.9183862939735, 'actor_loss': -8.849393502825258, 'time_step': 0.03625180266496074, 'td_error': 1.2635467704968446, 'init_value': 7.8100690841674805, 'ave_value': 7.812209671331616} step=12802
2022-04-22 07:23.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:24.10 [info     ] CQL_20220422071610: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.00032641777413428864, 'time_algorithm_update': 0.03512114320876281, 'temp_loss': 3.003538109663594, 'temp': 0.5946169263021105, 'alpha_loss': -71.62712987999006, 'alpha': 4.1190922301628685, 'critic_loss': 1010.7751831760296, 'actor_loss': -9.08159680724833, 'time_step': 0.03553933766535941, 'td_error': 1.2655330900578163, 'init_value': 8.015213966369629, 'ave_value': 8.017042443852901} step=13148
2022-04-22 07:24.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:24.22 [info     ] CQL_20220422071610: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.0003389437074606129, 'time_algorithm_update': 0.03489101415424678, 'temp_loss': 2.961882248090182, 'temp': 0.5865000486373901, 'alpha_loss': -74.41946543434452, 'alpha': 4.2794581396731335, 'critic_loss': 1064.4961800106687, 'actor_loss': -9.278399762390666, 'time_step': 0.03532229269171037, 'td_error': 1.2686407741816355, 'init_value': 8.280521392822266, 'ave_value': 8.281571847853247} step=13494
2022-04-22 07:24.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:24.36 [info     ] CQL_20220422071610: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.0003304364364271219, 'time_algorithm_update': 0.03651360280251916, 'temp_loss': 2.9218898777327786, 'temp': 0.578493477636679, 'alpha_loss': -77.30470708064261, 'alpha': 4.446037682494676, 'critic_loss': 1129.7883787651283, 'actor_loss': -9.471173755006294, 'time_step': 0.036934251730152634, 'td_error': 1.2694708854407144, 'init_value': 8.385111808776855, 'ave_value': 8.387000148225136} step=13840
2022-04-22 07:24.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:24.48 [info     ] CQL_20220422071610: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.000321856812934655, 'time_algorithm_update': 0.03582656590235716, 'temp_loss': 2.882188230580677, 'temp': 0.5705953400603608, 'alpha_loss': -80.32647597031786, 'alpha': 4.619119799895094, 'critic_loss': 1214.3542349931133, 'actor_loss': -9.6150343735094, 'time_step': 0.036234504914697194, 'td_error': 1.2708615142833815, 'init_value': 8.530129432678223, 'ave_value': 8.533429059986823} step=14186
2022-04-22 07:24.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:25.01 [info     ] CQL_20220422071610: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.00032827413151029905, 'time_algorithm_update': 0.035282609090639676, 'temp_loss': 2.8432737395942556, 'temp': 0.5628047649226436, 'alpha_loss': -83.44402531530126, 'alpha': 4.798943286686274, 'critic_loss': 1298.0557861328125, 'actor_loss': -9.76994082831234, 'time_step': 0.03570027778603438, 'td_error': 1.2741120612965136, 'init_value': 8.755802154541016, 'ave_value': 8.757249097611567} step=14532
2022-04-22 07:25.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:25.14 [info     ] CQL_20220422071610: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.00031955118124195607, 'time_algorithm_update': 0.034392765491684046, 'temp_loss': 2.803607717414812, 'temp': 0.5551213127684731, 'alpha_loss': -86.6910043176199, 'alpha': 4.985746565581747, 'critic_loss': 1386.190598680794, 'actor_loss': -9.89500978502924, 'time_step': 0.034794480814410084, 'td_error': 1.2755317215832487, 'init_value': 8.888631820678711, 'ave_value': 8.89108280147518} step=14878
2022-04-22 07:25.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:25.26 [info     ] CQL_20220422071610: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.0003242292845180269, 'time_algorithm_update': 0.03461361482653315, 'temp_loss': 2.765459439658016, 'temp': 0.5475442225188878, 'alpha_loss': -90.0657641239938, 'alpha': 5.1798399349168545, 'critic_loss': 1459.7778172134665, 'actor_loss': -10.061437193368901, 'time_step': 0.03502142360444703, 'td_error': 1.276721926017577, 'init_value': 8.99849796295166, 'ave_value': 9.00167855452225} step=15224
2022-04-22 07:25.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:25.39 [info     ] CQL_20220422071610: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.0003201954626623606, 'time_algorithm_update': 0.03587107851326121, 'temp_loss': 2.7278707640708526, 'temp': 0.5400698689711576, 'alpha_loss': -93.57574035115324, 'alpha': 5.381489172147188, 'critic_loss': 1564.6704930652772, 'actor_loss': -10.147903538852757, 'time_step': 0.03627558043926438, 'td_error': 1.2787190869836063, 'init_value': 9.133857727050781, 'ave_value': 9.135537940034826} step=15570
2022-04-22 07:25.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:25.52 [info     ] CQL_20220422071610: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.0003191473856137667, 'time_algorithm_update': 0.0349600494252464, 'temp_loss': 2.6902940817651033, 'temp': 0.5326981497982334, 'alpha_loss': -97.22055629223068, 'alpha': 5.590995831296623, 'critic_loss': 1628.213775105559, 'actor_loss': -10.286915117605574, 'time_step': 0.03536677842884395, 'td_error': 1.2817268364295449, 'init_value': 9.341414451599121, 'ave_value': 9.34179666997585} step=15916
2022-04-22 07:25.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:26.04 [info     ] CQL_20220422071610: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.00031511976539744116, 'time_algorithm_update': 0.03520077570325377, 'temp_loss': 2.653231547057973, 'temp': 0.5254270153583129, 'alpha_loss': -101.01196200861408, 'alpha': 5.808663377872092, 'critic_loss': 1684.11454675928, 'actor_loss': -10.408170755198926, 'time_step': 0.03559788940958894, 'td_error': 1.2815871778967587, 'init_value': 9.371230125427246, 'ave_value': 9.37428049957543} step=16262
2022-04-22 07:26.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:26.16 [info     ] CQL_20220422071610: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.00030294112387420125, 'time_algorithm_update': 0.0324209742463393, 'temp_loss': 2.617753924661978, 'temp': 0.5182549491783098, 'alpha_loss': -104.94914307346234, 'alpha': 6.034797431416594, 'critic_loss': 1752.8882097012736, 'actor_loss': -10.497029943962318, 'time_step': 0.032799235658149496, 'td_error': 1.2840002977583407, 'init_value': 9.525774955749512, 'ave_value': 9.52664395183565} step=16608
2022-04-22 07:26.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:26.27 [info     ] CQL_20220422071610: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.0002877477965602985, 'time_algorithm_update': 0.030505348492219957, 'temp_loss': 2.582732044203433, 'temp': 0.5111785419069963, 'alpha_loss': -109.01542943612688, 'alpha': 6.2697205143856864, 'critic_loss': 1825.5664626986995, 'actor_loss': -10.587322863540209, 'time_step': 0.030872276063599338, 'td_error': 1.2854385012890477, 'init_value': 9.624470710754395, 'ave_value': 9.625284578450882} step=16954
2022-04-22 07:26.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:26.40 [info     ] CQL_20220422071610: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.000314001403102985, 'time_algorithm_update': 0.03540570680805714, 'temp_loss': 2.5463416162942876, 'temp': 0.5042001403480596, 'alpha_loss': -113.26654769368254, 'alpha': 6.513798888708125, 'critic_loss': 1896.3439863789288, 'actor_loss': -10.673983050219585, 'time_step': 0.03579912847177142, 'td_error': 1.2875689372923051, 'init_value': 9.763232231140137, 'ave_value': 9.763428837092016} step=17300
2022-04-22 07:26.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422071610/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.51910

Epoch 1/50:   0%|          | 0/177 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 07:26.40 [info     ] FQE_20220422072640: epoch=1 step=177 epoch=1 metrics={'time_sample_batch': 0.00012761724870757193, 'time_algorithm_update': 0.0023892410730911515, 'loss': 0.006400911544586432, 'time_step': 0.0025738322802182647, 'init_value': -0.6107600927352905, 'ave_value': -0.588051371066062, 'soft_opc': nan} step=177




2022-04-22 07:26.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_177.pt


Epoch 2/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.41 [info     ] FQE_20220422072640: epoch=2 step=354 epoch=2 metrics={'time_sample_batch': 0.00012892922439144156, 'time_algorithm_update': 0.0023535160021593343, 'loss': 0.004170445062346377, 'time_step': 0.0025445552869031657, 'init_value': -0.6695983409881592, 'ave_value': -0.6172600154225175, 'soft_opc': nan} step=354




2022-04-22 07:26.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_354.pt


Epoch 3/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.41 [info     ] FQE_20220422072640: epoch=3 step=531 epoch=3 metrics={'time_sample_batch': 0.00012877836065777276, 'time_algorithm_update': 0.002304454307771672, 'loss': 0.003595736515734576, 'time_step': 0.0024955757593704483, 'init_value': -0.7187917232513428, 'ave_value': -0.6441542143578286, 'soft_opc': nan} step=531




2022-04-22 07:26.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_531.pt


Epoch 4/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.42 [info     ] FQE_20220422072640: epoch=4 step=708 epoch=4 metrics={'time_sample_batch': 0.00012800922501558638, 'time_algorithm_update': 0.002399688386647715, 'loss': 0.003172824382229503, 'time_step': 0.002585829988037799, 'init_value': -0.7550730109214783, 'ave_value': -0.6595228195906402, 'soft_opc': nan} step=708




2022-04-22 07:26.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_708.pt


Epoch 5/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.42 [info     ] FQE_20220422072640: epoch=5 step=885 epoch=5 metrics={'time_sample_batch': 0.00012487476154909296, 'time_algorithm_update': 0.0022537129073493226, 'loss': 0.002867474861963952, 'time_step': 0.002438977613287457, 'init_value': -0.7867569923400879, 'ave_value': -0.6707182767691913, 'soft_opc': nan} step=885




2022-04-22 07:26.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_885.pt


Epoch 6/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.43 [info     ] FQE_20220422072640: epoch=6 step=1062 epoch=6 metrics={'time_sample_batch': 0.00013482772697836665, 'time_algorithm_update': 0.0024793808069606287, 'loss': 0.0026193242217545623, 'time_step': 0.0026782515358790165, 'init_value': -0.8140840530395508, 'ave_value': -0.6657151024978798, 'soft_opc': nan} step=1062




2022-04-22 07:26.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_1062.pt


Epoch 7/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.43 [info     ] FQE_20220422072640: epoch=7 step=1239 epoch=7 metrics={'time_sample_batch': 0.00012556173033633476, 'time_algorithm_update': 0.00232066138316009, 'loss': 0.0024816333470829254, 'time_step': 0.002504799999086197, 'init_value': -0.9010255932807922, 'ave_value': -0.7327077314839349, 'soft_opc': nan} step=1239




2022-04-22 07:26.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_1239.pt


Epoch 8/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.44 [info     ] FQE_20220422072640: epoch=8 step=1416 epoch=8 metrics={'time_sample_batch': 0.00012101291936669646, 'time_algorithm_update': 0.0020527718430858547, 'loss': 0.0023345392711044913, 'time_step': 0.002229674387786348, 'init_value': -0.9326784014701843, 'ave_value': -0.7286794382262158, 'soft_opc': nan} step=1416




2022-04-22 07:26.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_1416.pt


Epoch 9/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.44 [info     ] FQE_20220422072640: epoch=9 step=1593 epoch=9 metrics={'time_sample_batch': 0.00011780975902147886, 'time_algorithm_update': 0.0020442588181145446, 'loss': 0.002407068696128215, 'time_step': 0.0022165856118929587, 'init_value': -0.995462954044342, 'ave_value': -0.7667527300012005, 'soft_opc': nan} step=1593




2022-04-22 07:26.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_1593.pt


Epoch 10/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.45 [info     ] FQE_20220422072640: epoch=10 step=1770 epoch=10 metrics={'time_sample_batch': 0.0001329661762647036, 'time_algorithm_update': 0.002618807183820649, 'loss': 0.002534298474853069, 'time_step': 0.0028127088385113217, 'init_value': -1.029667615890503, 'ave_value': -0.774650306753568, 'soft_opc': nan} step=1770




2022-04-22 07:26.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_1770.pt


Epoch 11/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.45 [info     ] FQE_20220422072640: epoch=11 step=1947 epoch=11 metrics={'time_sample_batch': 0.00012574492201293256, 'time_algorithm_update': 0.00239417916637356, 'loss': 0.0028398877837153224, 'time_step': 0.0025777412673174327, 'init_value': -1.114409327507019, 'ave_value': -0.8343467020952666, 'soft_opc': nan} step=1947




2022-04-22 07:26.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_1947.pt


Epoch 12/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.46 [info     ] FQE_20220422072640: epoch=12 step=2124 epoch=12 metrics={'time_sample_batch': 0.00012152343146545066, 'time_algorithm_update': 0.0021326568840587205, 'loss': 0.003029783118067646, 'time_step': 0.0023113495885989086, 'init_value': -1.1820670366287231, 'ave_value': -0.8690044923319115, 'soft_opc': nan} step=2124




2022-04-22 07:26.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_2124.pt


Epoch 13/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.46 [info     ] FQE_20220422072640: epoch=13 step=2301 epoch=13 metrics={'time_sample_batch': 0.00013671621764447056, 'time_algorithm_update': 0.0027763857006353173, 'loss': 0.0031968709005774287, 'time_step': 0.0029789768369857875, 'init_value': -1.2159082889556885, 'ave_value': -0.8793761475173919, 'soft_opc': nan} step=2301




2022-04-22 07:26.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_2301.pt


Epoch 14/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.47 [info     ] FQE_20220422072640: epoch=14 step=2478 epoch=14 metrics={'time_sample_batch': 0.00012479259469414834, 'time_algorithm_update': 0.002159338212956143, 'loss': 0.0034944078793596625, 'time_step': 0.0023410131702315336, 'init_value': -1.3029335737228394, 'ave_value': -0.9259598886197036, 'soft_opc': nan} step=2478




2022-04-22 07:26.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_2478.pt


Epoch 15/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.47 [info     ] FQE_20220422072640: epoch=15 step=2655 epoch=15 metrics={'time_sample_batch': 0.00013637003925560557, 'time_algorithm_update': 0.002568841654028596, 'loss': 0.0037807494459152855, 'time_step': 0.0027691415474239717, 'init_value': -1.3624228239059448, 'ave_value': -0.9595819763772122, 'soft_opc': nan} step=2655




2022-04-22 07:26.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_2655.pt


Epoch 16/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.48 [info     ] FQE_20220422072640: epoch=16 step=2832 epoch=16 metrics={'time_sample_batch': 0.00013338643952278095, 'time_algorithm_update': 0.002555313756910421, 'loss': 0.004246666031366078, 'time_step': 0.002758034205032607, 'init_value': -1.4271726608276367, 'ave_value': -1.0003776590678248, 'soft_opc': nan} step=2832




2022-04-22 07:26.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_2832.pt


Epoch 17/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.48 [info     ] FQE_20220422072640: epoch=17 step=3009 epoch=17 metrics={'time_sample_batch': 0.00013336084656796212, 'time_algorithm_update': 0.0023883089507366977, 'loss': 0.004553598696089713, 'time_step': 0.0025847470019496766, 'init_value': -1.418689250946045, 'ave_value': -0.9557375640404207, 'soft_opc': nan} step=3009




2022-04-22 07:26.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_3009.pt


Epoch 18/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.49 [info     ] FQE_20220422072640: epoch=18 step=3186 epoch=18 metrics={'time_sample_batch': 0.00013491258782855536, 'time_algorithm_update': 0.002568776998142738, 'loss': 0.004953635092903623, 'time_step': 0.002766163335681635, 'init_value': -1.401723861694336, 'ave_value': -0.9344537900990104, 'soft_opc': nan} step=3186




2022-04-22 07:26.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_3186.pt


Epoch 19/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.49 [info     ] FQE_20220422072640: epoch=19 step=3363 epoch=19 metrics={'time_sample_batch': 0.0001222629331599521, 'time_algorithm_update': 0.002191368469410697, 'loss': 0.005730036451773308, 'time_step': 0.002368928348950747, 'init_value': -1.4400309324264526, 'ave_value': -0.9432279314141016, 'soft_opc': nan} step=3363




2022-04-22 07:26.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_3363.pt


Epoch 20/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.50 [info     ] FQE_20220422072640: epoch=20 step=3540 epoch=20 metrics={'time_sample_batch': 0.00012908412911797647, 'time_algorithm_update': 0.0022232680670959126, 'loss': 0.006223364515456313, 'time_step': 0.002408305130435922, 'init_value': -1.5233449935913086, 'ave_value': -1.0097485765121184, 'soft_opc': nan} step=3540




2022-04-22 07:26.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_3540.pt


Epoch 21/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.50 [info     ] FQE_20220422072640: epoch=21 step=3717 epoch=21 metrics={'time_sample_batch': 0.00011881461924752273, 'time_algorithm_update': 0.0021060509870281328, 'loss': 0.006614137995536939, 'time_step': 0.002279928175069518, 'init_value': -1.5588425397872925, 'ave_value': -1.026147140603367, 'soft_opc': nan} step=3717




2022-04-22 07:26.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_3717.pt


Epoch 22/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.51 [info     ] FQE_20220422072640: epoch=22 step=3894 epoch=22 metrics={'time_sample_batch': 0.00013386192968336202, 'time_algorithm_update': 0.00238786848251429, 'loss': 0.007259214916174421, 'time_step': 0.002586775580368473, 'init_value': -1.4978747367858887, 'ave_value': -0.9547733728843811, 'soft_opc': nan} step=3894




2022-04-22 07:26.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_3894.pt


Epoch 23/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.51 [info     ] FQE_20220422072640: epoch=23 step=4071 epoch=23 metrics={'time_sample_batch': 0.0001268683180297162, 'time_algorithm_update': 0.0023219976048011563, 'loss': 0.008022381424559228, 'time_step': 0.0025082873759296655, 'init_value': -1.5705674886703491, 'ave_value': -0.9939567059379172, 'soft_opc': nan} step=4071




2022-04-22 07:26.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_4071.pt


Epoch 24/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.52 [info     ] FQE_20220422072640: epoch=24 step=4248 epoch=24 metrics={'time_sample_batch': 0.00012404635801153667, 'time_algorithm_update': 0.002241527966860324, 'loss': 0.008960608765548673, 'time_step': 0.0024225038323698743, 'init_value': -1.597060203552246, 'ave_value': -1.0068756071185028, 'soft_opc': nan} step=4248




2022-04-22 07:26.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_4248.pt


Epoch 25/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.52 [info     ] FQE_20220422072640: epoch=25 step=4425 epoch=25 metrics={'time_sample_batch': 0.00013043247373764125, 'time_algorithm_update': 0.002496200766267076, 'loss': 0.009037992969947199, 'time_step': 0.002692447543817725, 'init_value': -1.6435059309005737, 'ave_value': -1.0280371616463047, 'soft_opc': nan} step=4425




2022-04-22 07:26.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_4425.pt


Epoch 26/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.53 [info     ] FQE_20220422072640: epoch=26 step=4602 epoch=26 metrics={'time_sample_batch': 0.00013407475530764477, 'time_algorithm_update': 0.0026317518309684797, 'loss': 0.00968440490745871, 'time_step': 0.002829407568031785, 'init_value': -1.7156633138656616, 'ave_value': -1.0877089122595551, 'soft_opc': nan} step=4602




2022-04-22 07:26.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_4602.pt


Epoch 27/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.53 [info     ] FQE_20220422072640: epoch=27 step=4779 epoch=27 metrics={'time_sample_batch': 0.00012410293191166246, 'time_algorithm_update': 0.0021838818566273835, 'loss': 0.010400719813888072, 'time_step': 0.0023672014979992883, 'init_value': -1.7132821083068848, 'ave_value': -1.0815510709274996, 'soft_opc': nan} step=4779




2022-04-22 07:26.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_4779.pt


Epoch 28/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.54 [info     ] FQE_20220422072640: epoch=28 step=4956 epoch=28 metrics={'time_sample_batch': 0.00012974550495039946, 'time_algorithm_update': 0.002345813869756494, 'loss': 0.011040057698238409, 'time_step': 0.0025377044569974564, 'init_value': -1.7695108652114868, 'ave_value': -1.1268946663831716, 'soft_opc': nan} step=4956




2022-04-22 07:26.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_4956.pt


Epoch 29/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.54 [info     ] FQE_20220422072640: epoch=29 step=5133 epoch=29 metrics={'time_sample_batch': 0.00012029227563890361, 'time_algorithm_update': 0.001965390760346321, 'loss': 0.011719021720595994, 'time_step': 0.002136542972198314, 'init_value': -1.80961275100708, 'ave_value': -1.1528825162185563, 'soft_opc': nan} step=5133




2022-04-22 07:26.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_5133.pt


Epoch 30/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.54 [info     ] FQE_20220422072640: epoch=30 step=5310 epoch=30 metrics={'time_sample_batch': 0.0001202195377673133, 'time_algorithm_update': 0.002088184410569358, 'loss': 0.01251692179160319, 'time_step': 0.0022625963566666944, 'init_value': -1.8536545038223267, 'ave_value': -1.1738096293654885, 'soft_opc': nan} step=5310




2022-04-22 07:26.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_5310.pt


Epoch 31/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.55 [info     ] FQE_20220422072640: epoch=31 step=5487 epoch=31 metrics={'time_sample_batch': 0.00012851839011671852, 'time_algorithm_update': 0.0023287554918709446, 'loss': 0.012914934406375334, 'time_step': 0.002516720928041275, 'init_value': -1.8916720151901245, 'ave_value': -1.1988382808215268, 'soft_opc': nan} step=5487




2022-04-22 07:26.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_5487.pt


Epoch 32/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.55 [info     ] FQE_20220422072640: epoch=32 step=5664 epoch=32 metrics={'time_sample_batch': 0.00012332436728612178, 'time_algorithm_update': 0.00223945493752, 'loss': 0.013468190946742514, 'time_step': 0.0024167077016022244, 'init_value': -1.8672112226486206, 'ave_value': -1.137286069720715, 'soft_opc': nan} step=5664




2022-04-22 07:26.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_5664.pt


Epoch 33/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.56 [info     ] FQE_20220422072640: epoch=33 step=5841 epoch=33 metrics={'time_sample_batch': 0.0001368468764138087, 'time_algorithm_update': 0.002401428707575394, 'loss': 0.013925218001984081, 'time_step': 0.0026008220715711345, 'init_value': -1.967185616493225, 'ave_value': -1.2144415279442036, 'soft_opc': nan} step=5841




2022-04-22 07:26.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_5841.pt


Epoch 34/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.56 [info     ] FQE_20220422072640: epoch=34 step=6018 epoch=34 metrics={'time_sample_batch': 0.0001337326179116459, 'time_algorithm_update': 0.002524358404558257, 'loss': 0.015093485638301946, 'time_step': 0.0027247418118061992, 'init_value': -2.030863046646118, 'ave_value': -1.2784348530096334, 'soft_opc': nan} step=6018




2022-04-22 07:26.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_6018.pt


Epoch 35/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.57 [info     ] FQE_20220422072640: epoch=35 step=6195 epoch=35 metrics={'time_sample_batch': 0.0001294033675544006, 'time_algorithm_update': 0.0024172451536534196, 'loss': 0.0159918729444087, 'time_step': 0.0026070573235635703, 'init_value': -2.115692138671875, 'ave_value': -1.3450139833563888, 'soft_opc': nan} step=6195




2022-04-22 07:26.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_6195.pt


Epoch 36/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.57 [info     ] FQE_20220422072640: epoch=36 step=6372 epoch=36 metrics={'time_sample_batch': 0.00012673361826751192, 'time_algorithm_update': 0.0021227456755557304, 'loss': 0.01690839756856341, 'time_step': 0.0023070028272725767, 'init_value': -2.096665620803833, 'ave_value': -1.3141190206693547, 'soft_opc': nan} step=6372




2022-04-22 07:26.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_6372.pt


Epoch 37/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.58 [info     ] FQE_20220422072640: epoch=37 step=6549 epoch=37 metrics={'time_sample_batch': 0.00012639148087151308, 'time_algorithm_update': 0.002135443822138727, 'loss': 0.016674648327121542, 'time_step': 0.0023205751753122795, 'init_value': -2.177025079727173, 'ave_value': -1.3895367289705947, 'soft_opc': nan} step=6549




2022-04-22 07:26.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_6549.pt


Epoch 38/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.58 [info     ] FQE_20220422072640: epoch=38 step=6726 epoch=38 metrics={'time_sample_batch': 0.00012448682623394464, 'time_algorithm_update': 0.002256005497302039, 'loss': 0.017109631253919424, 'time_step': 0.002437928302139886, 'init_value': -2.252669095993042, 'ave_value': -1.436327509005909, 'soft_opc': nan} step=6726




2022-04-22 07:26.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_6726.pt


Epoch 39/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.59 [info     ] FQE_20220422072640: epoch=39 step=6903 epoch=39 metrics={'time_sample_batch': 0.00013451252953480865, 'time_algorithm_update': 0.002450164428538522, 'loss': 0.01769602345927529, 'time_step': 0.0026442869908392093, 'init_value': -2.2534191608428955, 'ave_value': -1.4203073391081156, 'soft_opc': nan} step=6903




2022-04-22 07:26.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_6903.pt


Epoch 40/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:26.59 [info     ] FQE_20220422072640: epoch=40 step=7080 epoch=40 metrics={'time_sample_batch': 0.00012444506930766133, 'time_algorithm_update': 0.002042250444660079, 'loss': 0.017650865252842696, 'time_step': 0.002227001944504215, 'init_value': -2.258546829223633, 'ave_value': -1.4118193247431032, 'soft_opc': nan} step=7080




2022-04-22 07:26.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_7080.pt


Epoch 41/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:27.00 [info     ] FQE_20220422072640: epoch=41 step=7257 epoch=41 metrics={'time_sample_batch': 0.00012914609100859043, 'time_algorithm_update': 0.0023704895191946945, 'loss': 0.01871481278350861, 'time_step': 0.0025594800205553992, 'init_value': -2.2766337394714355, 'ave_value': -1.4419783462320004, 'soft_opc': nan} step=7257




2022-04-22 07:27.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_7257.pt


Epoch 42/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:27.00 [info     ] FQE_20220422072640: epoch=42 step=7434 epoch=42 metrics={'time_sample_batch': 0.00012172144011589094, 'time_algorithm_update': 0.002054636087794762, 'loss': 0.019335696417154314, 'time_step': 0.0022311655141539492, 'init_value': -2.325457811355591, 'ave_value': -1.4890481155130777, 'soft_opc': nan} step=7434




2022-04-22 07:27.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_7434.pt


Epoch 43/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:27.01 [info     ] FQE_20220422072640: epoch=43 step=7611 epoch=43 metrics={'time_sample_batch': 0.00012456764609126723, 'time_algorithm_update': 0.0021806625323107015, 'loss': 0.01993119607957101, 'time_step': 0.002365591835840947, 'init_value': -2.3362128734588623, 'ave_value': -1.4676319551069636, 'soft_opc': nan} step=7611




2022-04-22 07:27.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_7611.pt


Epoch 44/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:27.01 [info     ] FQE_20220422072640: epoch=44 step=7788 epoch=44 metrics={'time_sample_batch': 0.00012921344088969258, 'time_algorithm_update': 0.0024733570335948533, 'loss': 0.02057419801038811, 'time_step': 0.002664368031388622, 'init_value': -2.347543478012085, 'ave_value': -1.4600216317821193, 'soft_opc': nan} step=7788




2022-04-22 07:27.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_7788.pt


Epoch 45/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:27.02 [info     ] FQE_20220422072640: epoch=45 step=7965 epoch=45 metrics={'time_sample_batch': 0.00013412324722203832, 'time_algorithm_update': 0.0024603692825231173, 'loss': 0.021248493055841953, 'time_step': 0.0026550023569225593, 'init_value': -2.3270561695098877, 'ave_value': -1.4158682318918923, 'soft_opc': nan} step=7965




2022-04-22 07:27.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_7965.pt


Epoch 46/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:27.02 [info     ] FQE_20220422072640: epoch=46 step=8142 epoch=46 metrics={'time_sample_batch': 0.00013375012888073247, 'time_algorithm_update': 0.0024382044366524046, 'loss': 0.02132227889585735, 'time_step': 0.0026314608794821186, 'init_value': -2.4169516563415527, 'ave_value': -1.4918732805104853, 'soft_opc': nan} step=8142




2022-04-22 07:27.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_8142.pt


Epoch 47/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:27.03 [info     ] FQE_20220422072640: epoch=47 step=8319 epoch=47 metrics={'time_sample_batch': 0.00012619751321393891, 'time_algorithm_update': 0.0022670077738788843, 'loss': 0.021261706776617064, 'time_step': 0.002454478861921925, 'init_value': -2.449633836746216, 'ave_value': -1.4915374580893788, 'soft_opc': nan} step=8319




2022-04-22 07:27.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_8319.pt


Epoch 48/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:27.03 [info     ] FQE_20220422072640: epoch=48 step=8496 epoch=48 metrics={'time_sample_batch': 0.0001223599169887392, 'time_algorithm_update': 0.002061579860536392, 'loss': 0.022502290423680623, 'time_step': 0.0022411103975974907, 'init_value': -2.4238522052764893, 'ave_value': -1.44457829158205, 'soft_opc': nan} step=8496




2022-04-22 07:27.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_8496.pt


Epoch 49/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:27.04 [info     ] FQE_20220422072640: epoch=49 step=8673 epoch=49 metrics={'time_sample_batch': 0.00012977783289332847, 'time_algorithm_update': 0.0024453731579969157, 'loss': 0.02269040531936591, 'time_step': 0.002634319208436093, 'init_value': -2.5108559131622314, 'ave_value': -1.5275275058395512, 'soft_opc': nan} step=8673




2022-04-22 07:27.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_8673.pt


Epoch 50/50:   0%|          | 0/177 [00:00<?, ?it/s]



2022-04-22 07:27.04 [info     ] FQE_20220422072640: epoch=50 step=8850 epoch=50 metrics={'time_sample_batch': 0.00014363035643841587, 'time_algorithm_update': 0.0026370105096849344, 'loss': 0.023870320360920797, 'time_step': 0.0028451889921716376, 'init_value': -2.469421625137329, 'ave_value': -1.4771389122157395, 'soft_opc': nan} step=8850




2022-04-22 07:27.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072640/model_8850.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-22 07:27.04 [debug    ] RoundIterator is selected.
2022-04-22 07:27.04 [info     ] Directory is created at d3rlpy_logs/FQE_20220422072704
2022-04-22 07:27.04 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 07:27.04 [debug    ] Building models...
2022-04-22 07:27.04 [debug    ] Models have been built.
2022-04-22 07:27.04 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422072704/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}},

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 07:27.05 [info     ] FQE_20220422072704: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00013235557911007903, 'time_algorithm_update': 0.0022675408873447152, 'loss': 0.02114934144476645, 'time_step': 0.002458015153574389, 'init_value': -1.107268214225769, 'ave_value': -1.1169614066277538, 'soft_opc': nan} step=344




2022-04-22 07:27.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.06 [info     ] FQE_20220422072704: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00013545917910198833, 'time_algorithm_update': 0.002644310856974402, 'loss': 0.019778342928390865, 'time_step': 0.0028433453205019927, 'init_value': -1.8324717283248901, 'ave_value': -1.8726961310785095, 'soft_opc': nan} step=688




2022-04-22 07:27.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.07 [info     ] FQE_20220422072704: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00012608946755874987, 'time_algorithm_update': 0.0022371605385181517, 'loss': 0.021909478437263777, 'time_step': 0.002421180869257727, 'init_value': -2.8078603744506836, 'ave_value': -2.9122727577079526, 'soft_opc': nan} step=1032




2022-04-22 07:27.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.08 [info     ] FQE_20220422072704: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00013019595035286836, 'time_algorithm_update': 0.002419681050056635, 'loss': 0.02370795723711422, 'time_step': 0.002610163633213487, 'init_value': -3.327807903289795, 'ave_value': -3.5126096908305144, 'soft_opc': nan} step=1376




2022-04-22 07:27.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.09 [info     ] FQE_20220422072704: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00012881118197773778, 'time_algorithm_update': 0.0023763380771459537, 'loss': 0.030567325743694983, 'time_step': 0.0025671713573988094, 'init_value': -4.096034526824951, 'ave_value': -4.406586937552637, 'soft_opc': nan} step=1720




2022-04-22 07:27.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.10 [info     ] FQE_20220422072704: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00012919653293698332, 'time_algorithm_update': 0.0023639978364456533, 'loss': 0.037649479077392536, 'time_step': 0.0025514683058095533, 'init_value': -4.52088737487793, 'ave_value': -5.007008078372157, 'soft_opc': nan} step=2064




2022-04-22 07:27.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.11 [info     ] FQE_20220422072704: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001287682111873183, 'time_algorithm_update': 0.002311641393705856, 'loss': 0.04763817893297866, 'time_step': 0.002501613871995793, 'init_value': -5.300408840179443, 'ave_value': -5.912425513189655, 'soft_opc': nan} step=2408




2022-04-22 07:27.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.12 [info     ] FQE_20220422072704: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00013094308764435524, 'time_algorithm_update': 0.00227627296780431, 'loss': 0.061453858450011806, 'time_step': 0.0024687751781108766, 'init_value': -5.742871284484863, 'ave_value': -6.499428949518515, 'soft_opc': nan} step=2752




2022-04-22 07:27.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.13 [info     ] FQE_20220422072704: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00012729472892228946, 'time_algorithm_update': 0.0022070518759794012, 'loss': 0.07616904811652074, 'time_step': 0.002393046783846478, 'init_value': -6.107524871826172, 'ave_value': -6.952411117252835, 'soft_opc': nan} step=3096




2022-04-22 07:27.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.14 [info     ] FQE_20220422072704: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.0001252848048542821, 'time_algorithm_update': 0.0022287181643552557, 'loss': 0.09677793535533859, 'time_step': 0.002412181260973908, 'init_value': -6.767887592315674, 'ave_value': -7.78233578254108, 'soft_opc': nan} step=3440




2022-04-22 07:27.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.15 [info     ] FQE_20220422072704: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00012969485549039618, 'time_algorithm_update': 0.0022082231765569644, 'loss': 0.11711779941241582, 'time_step': 0.0023998874564503513, 'init_value': -7.146851539611816, 'ave_value': -8.267609672021825, 'soft_opc': nan} step=3784




2022-04-22 07:27.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.16 [info     ] FQE_20220422072704: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00013017446495765863, 'time_algorithm_update': 0.00236262277115223, 'loss': 0.14928310629261962, 'time_step': 0.0025526867356411246, 'init_value': -7.439884662628174, 'ave_value': -8.765462460824464, 'soft_opc': nan} step=4128




2022-04-22 07:27.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.17 [info     ] FQE_20220422072704: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.0001260853090951609, 'time_algorithm_update': 0.0020982401315556014, 'loss': 0.1808445356396395, 'time_step': 0.0022832266120023505, 'init_value': -7.7366180419921875, 'ave_value': -9.332600457580794, 'soft_opc': nan} step=4472




2022-04-22 07:27.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.18 [info     ] FQE_20220422072704: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.0001314414101977681, 'time_algorithm_update': 0.0023867044337960177, 'loss': 0.21098176317917572, 'time_step': 0.0025801928930504377, 'init_value': -8.121848106384277, 'ave_value': -10.076296709126767, 'soft_opc': nan} step=4816




2022-04-22 07:27.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.19 [info     ] FQE_20220422072704: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00013033664503762888, 'time_algorithm_update': 0.002342066099477369, 'loss': 0.250730573806132, 'time_step': 0.0025362712006236233, 'init_value': -8.118917465209961, 'ave_value': -10.418120534353898, 'soft_opc': nan} step=5160




2022-04-22 07:27.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.20 [info     ] FQE_20220422072704: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.000134020350700201, 'time_algorithm_update': 0.002541594727094783, 'loss': 0.28582501219758805, 'time_step': 0.0027399901733842005, 'init_value': -8.484800338745117, 'ave_value': -11.133596398760272, 'soft_opc': nan} step=5504




2022-04-22 07:27.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.20 [info     ] FQE_20220422072704: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00012217912563057832, 'time_algorithm_update': 0.0020834741204283956, 'loss': 0.3418039615850809, 'time_step': 0.002261895079945409, 'init_value': -8.229323387145996, 'ave_value': -11.31051262201456, 'soft_opc': nan} step=5848




2022-04-22 07:27.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.21 [info     ] FQE_20220422072704: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00012403587962305822, 'time_algorithm_update': 0.002158232899599297, 'loss': 0.37437212129318437, 'time_step': 0.0023391711157421734, 'init_value': -8.269552230834961, 'ave_value': -11.855415193230618, 'soft_opc': nan} step=6192




2022-04-22 07:27.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.22 [info     ] FQE_20220422072704: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00012803354928659838, 'time_algorithm_update': 0.002324044011359991, 'loss': 0.4363456234108483, 'time_step': 0.0025126642959062443, 'init_value': -8.403059005737305, 'ave_value': -12.367111916999912, 'soft_opc': nan} step=6536




2022-04-22 07:27.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.23 [info     ] FQE_20220422072704: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00012721363888230434, 'time_algorithm_update': 0.002207175243732541, 'loss': 0.5027398388876125, 'time_step': 0.002393803624219673, 'init_value': -8.211614608764648, 'ave_value': -12.634649503144209, 'soft_opc': nan} step=6880




2022-04-22 07:27.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.24 [info     ] FQE_20220422072704: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00013002961180930915, 'time_algorithm_update': 0.0024166030939235246, 'loss': 0.5709362484428079, 'time_step': 0.002611286418382512, 'init_value': -8.407295227050781, 'ave_value': -13.47509316124173, 'soft_opc': nan} step=7224




2022-04-22 07:27.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.25 [info     ] FQE_20220422072704: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00012984178787054016, 'time_algorithm_update': 0.0022021448889444043, 'loss': 0.6244457915511935, 'time_step': 0.0023907159650048545, 'init_value': -8.535930633544922, 'ave_value': -13.99668331411793, 'soft_opc': nan} step=7568




2022-04-22 07:27.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.26 [info     ] FQE_20220422072704: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00012661343397096146, 'time_algorithm_update': 0.0022436726924984956, 'loss': 0.7094167631747591, 'time_step': 0.00243014651675557, 'init_value': -8.761190414428711, 'ave_value': -14.748306126645891, 'soft_opc': nan} step=7912




2022-04-22 07:27.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.27 [info     ] FQE_20220422072704: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00012899623360744742, 'time_algorithm_update': 0.0023009749346001203, 'loss': 0.7909740770755466, 'time_step': 0.0024898156177165895, 'init_value': -8.791853904724121, 'ave_value': -15.195001975209495, 'soft_opc': nan} step=8256




2022-04-22 07:27.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.28 [info     ] FQE_20220422072704: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00013124041779096737, 'time_algorithm_update': 0.0023498070794482563, 'loss': 0.8572849558536396, 'time_step': 0.0025441147560297055, 'init_value': -8.904159545898438, 'ave_value': -15.85970128525619, 'soft_opc': nan} step=8600




2022-04-22 07:27.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.29 [info     ] FQE_20220422072704: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00012776879377143327, 'time_algorithm_update': 0.0022126457025838453, 'loss': 0.9601011736058566, 'time_step': 0.002400169538897137, 'init_value': -8.769325256347656, 'ave_value': -16.084298117378275, 'soft_opc': nan} step=8944




2022-04-22 07:27.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.30 [info     ] FQE_20220422072704: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.0001236879548361135, 'time_algorithm_update': 0.0021001828271289204, 'loss': 1.0229568490866832, 'time_step': 0.0022802699443905854, 'init_value': -8.834632873535156, 'ave_value': -16.561600280221267, 'soft_opc': nan} step=9288




2022-04-22 07:27.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.31 [info     ] FQE_20220422072704: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.0001331644002781358, 'time_algorithm_update': 0.002484351396560669, 'loss': 1.0861449768160318, 'time_step': 0.0026791636333909144, 'init_value': -8.723777770996094, 'ave_value': -16.99318095877062, 'soft_opc': nan} step=9632




2022-04-22 07:27.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.32 [info     ] FQE_20220422072704: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00012997971024624136, 'time_algorithm_update': 0.002336830593818842, 'loss': 1.1595866924563294, 'time_step': 0.0025261508864025737, 'init_value': -8.839136123657227, 'ave_value': -17.603344335658306, 'soft_opc': nan} step=9976




2022-04-22 07:27.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.32 [info     ] FQE_20220422072704: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.0001284341479456702, 'time_algorithm_update': 0.0022515266440635505, 'loss': 1.2467936698433966, 'time_step': 0.0024386221586271775, 'init_value': -9.351405143737793, 'ave_value': -18.61233243322973, 'soft_opc': nan} step=10320




2022-04-22 07:27.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.33 [info     ] FQE_20220422072704: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00012566114580908486, 'time_algorithm_update': 0.002310813166374384, 'loss': 1.3317668423771338, 'time_step': 0.002495334591976432, 'init_value': -9.910799980163574, 'ave_value': -19.642713317439497, 'soft_opc': nan} step=10664




2022-04-22 07:27.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.34 [info     ] FQE_20220422072704: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.0001336821289949639, 'time_algorithm_update': 0.0025259769240091015, 'loss': 1.3866424522644212, 'time_step': 0.0027261958565822867, 'init_value': -9.70595645904541, 'ave_value': -19.92235714666344, 'soft_opc': nan} step=11008




2022-04-22 07:27.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.35 [info     ] FQE_20220422072704: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00012199130169180937, 'time_algorithm_update': 0.002195148273955944, 'loss': 1.4333785730197506, 'time_step': 0.002374251914578815, 'init_value': -9.351832389831543, 'ave_value': -20.024188757585446, 'soft_opc': nan} step=11352




2022-04-22 07:27.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.36 [info     ] FQE_20220422072704: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00012365052866381267, 'time_algorithm_update': 0.0021243691444396973, 'loss': 1.4659505569441027, 'time_step': 0.0023039052652758223, 'init_value': -9.146804809570312, 'ave_value': -20.14385427816536, 'soft_opc': nan} step=11696




2022-04-22 07:27.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.37 [info     ] FQE_20220422072704: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00013088556223137435, 'time_algorithm_update': 0.002422569103019182, 'loss': 1.5186616127964023, 'time_step': 0.002612516630527585, 'init_value': -9.372638702392578, 'ave_value': -20.741410605765058, 'soft_opc': nan} step=12040




2022-04-22 07:27.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.38 [info     ] FQE_20220422072704: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00013459075328915617, 'time_algorithm_update': 0.0024992352308228958, 'loss': 1.5481822276379653, 'time_step': 0.002700406451557958, 'init_value': -8.496866226196289, 'ave_value': -20.30387484093977, 'soft_opc': nan} step=12384




2022-04-22 07:27.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.39 [info     ] FQE_20220422072704: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.0001321247843808906, 'time_algorithm_update': 0.002423536638880885, 'loss': 1.5671053456120887, 'time_step': 0.0026168448980464494, 'init_value': -9.059566497802734, 'ave_value': -21.292991442388963, 'soft_opc': nan} step=12728




2022-04-22 07:27.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.40 [info     ] FQE_20220422072704: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00012582471204358479, 'time_algorithm_update': 0.0021903022777202516, 'loss': 1.5613892282857451, 'time_step': 0.0023735872534818426, 'init_value': -9.188240051269531, 'ave_value': -21.63980295140682, 'soft_opc': nan} step=13072




2022-04-22 07:27.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.41 [info     ] FQE_20220422072704: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00012578936510307845, 'time_algorithm_update': 0.00221864290015642, 'loss': 1.5763628622083816, 'time_step': 0.002400595781415008, 'init_value': -9.520992279052734, 'ave_value': -22.06982294952346, 'soft_opc': nan} step=13416




2022-04-22 07:27.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.42 [info     ] FQE_20220422072704: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00012370805407679357, 'time_algorithm_update': 0.002157849627871846, 'loss': 1.546825214722302, 'time_step': 0.002337516740311024, 'init_value': -9.208311080932617, 'ave_value': -21.868156271562952, 'soft_opc': nan} step=13760




2022-04-22 07:27.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.43 [info     ] FQE_20220422072704: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00013884486154068347, 'time_algorithm_update': 0.002523008474083834, 'loss': 1.5510137326972082, 'time_step': 0.002727647853452106, 'init_value': -9.25821304321289, 'ave_value': -22.072756032774073, 'soft_opc': nan} step=14104




2022-04-22 07:27.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.44 [info     ] FQE_20220422072704: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.0001285076141357422, 'time_algorithm_update': 0.0023859857126723887, 'loss': 1.5540567297315182, 'time_step': 0.0025743807471075722, 'init_value': -9.261510848999023, 'ave_value': -22.025445002684922, 'soft_opc': nan} step=14448




2022-04-22 07:27.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.45 [info     ] FQE_20220422072704: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00012736819511236145, 'time_algorithm_update': 0.0021757012189820755, 'loss': 1.5350947847676486, 'time_step': 0.002365202404731928, 'init_value': -9.602429389953613, 'ave_value': -22.286055819700294, 'soft_opc': nan} step=14792




2022-04-22 07:27.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.46 [info     ] FQE_20220422072704: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00012014633001283158, 'time_algorithm_update': 0.0020594527555066484, 'loss': 1.5274851010939063, 'time_step': 0.0022321128567983936, 'init_value': -9.996346473693848, 'ave_value': -22.564377615461964, 'soft_opc': nan} step=15136




2022-04-22 07:27.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.47 [info     ] FQE_20220422072704: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00013700196909350017, 'time_algorithm_update': 0.0024502069451088127, 'loss': 1.511479908091471, 'time_step': 0.0026517475760260293, 'init_value': -10.471551895141602, 'ave_value': -22.795173102019035, 'soft_opc': nan} step=15480




2022-04-22 07:27.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.48 [info     ] FQE_20220422072704: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00013383737830228583, 'time_algorithm_update': 0.0025580476882845855, 'loss': 1.471223798688761, 'time_step': 0.002756284419880357, 'init_value': -10.487408638000488, 'ave_value': -22.632910623753794, 'soft_opc': nan} step=15824




2022-04-22 07:27.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.49 [info     ] FQE_20220422072704: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00013579254926637162, 'time_algorithm_update': 0.002518980309020641, 'loss': 1.4661770763395483, 'time_step': 0.002719378748605418, 'init_value': -10.6530122756958, 'ave_value': -22.83816903883776, 'soft_opc': nan} step=16168




2022-04-22 07:27.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.50 [info     ] FQE_20220422072704: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00012822068014810251, 'time_algorithm_update': 0.0023317032082136287, 'loss': 1.4149469346631058, 'time_step': 0.002518847238185794, 'init_value': -10.838451385498047, 'ave_value': -22.928006323415214, 'soft_opc': nan} step=16512




2022-04-22 07:27.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.51 [info     ] FQE_20220422072704: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00013610374095828035, 'time_algorithm_update': 0.0024159661559171454, 'loss': 1.3581747733879574, 'time_step': 0.0026138119919355525, 'init_value': -10.521594047546387, 'ave_value': -22.83293788093478, 'soft_opc': nan} step=16856




2022-04-22 07:27.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:27.51 [info     ] FQE_20220422072704: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00012607491293618845, 'time_algorithm_update': 0.0023044916086418683, 'loss': 1.2972202745786066, 'time_step': 0.0024895002675610917, 'init_value': -10.766805648803711, 'ave_value': -23.142740513760227, 'soft_opc': nan} step=17200




2022-04-22 07:27.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422072704/model_17200.pt
search iteration:  30
using hyper params:  [0.0043141434209319, 0.004483686618706367, 3.383986841623044e-05, 5]
2022-04-22 07:27.51 [debug    ] RoundIterator is selected.
2022-04-22 07:27.51 [info     ] Directory is created at d3rlpy_logs/CQL_20220422072751
2022-04-22 07:27.51 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 07:27.51 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 07:27.51 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422072751/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.0043141434209319, 'actor_optim_factory': {'optim_cls

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:28.04 [info     ] CQL_20220422072751: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.00036617097137980377, 'time_algorithm_update': 0.0345732786751896, 'temp_loss': 4.942560978707551, 'temp': 0.9939632767197714, 'alpha_loss': -17.722680146983592, 'alpha': 1.017726174324234, 'critic_loss': 106.9274248773652, 'actor_loss': 3.740554337136749, 'time_step': 0.03502741232083712, 'td_error': 1.3091186788171985, 'init_value': -7.9637250900268555, 'ave_value': -7.439666355526691} step=346
2022-04-22 07:28.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:28.17 [info     ] CQL_20220422072751: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.0003749152828503206, 'time_algorithm_update': 0.0349141255968568, 'temp_loss': 4.957643616406215, 'temp': 0.9822794941463912, 'alpha_loss': -18.381908714426736, 'alpha': 1.054194668813937, 'critic_loss': 160.35046225751756, 'actor_loss': 8.972334455203459, 'time_step': 0.03537937878184236, 'td_error': 1.4193761926232205, 'init_value': -11.427579879760742, 'ave_value': -10.66844312860933} step=692
2022-04-22 07:28.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:28.29 [info     ] CQL_20220422072751: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.00036693721837391055, 'time_algorithm_update': 0.034673608107373896, 'temp_loss': 4.899790151959899, 'temp': 0.9708984538998907, 'alpha_loss': -19.05341686954388, 'alpha': 1.0924765805288545, 'critic_loss': 311.20012391371534, 'actor_loss': 12.568000595004573, 'time_step': 0.0351287078306165, 'td_error': 1.4812539107394822, 'init_value': -13.718544006347656, 'ave_value': -12.827635226446398} step=1038
2022-04-22 07:28.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:28.41 [info     ] CQL_20220422072751: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.00036857376208884176, 'time_algorithm_update': 0.034564391036943205, 'temp_loss': 4.844215947079521, 'temp': 0.9597249191275911, 'alpha_loss': -19.739995250812157, 'alpha': 1.1326385412602067, 'critic_loss': 534.470118880961, 'actor_loss': 13.436924135064803, 'time_step': 0.03502535268750494, 'td_error': 1.4638999143747604, 'init_value': -13.657539367675781, 'ave_value': -13.007299495206844} step=1384
2022-04-22 07:28.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:28.54 [info     ] CQL_20220422072751: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.000384432042954285, 'time_algorithm_update': 0.034863759327486074, 'temp_loss': 4.789006420642655, 'temp': 0.9487289807010937, 'alpha_loss': -20.452820160485416, 'alpha': 1.1747136140145318, 'critic_loss': 840.2346260203102, 'actor_loss': 10.768592341097794, 'time_step': 0.03533733786875113, 'td_error': 1.3416450672136142, 'init_value': -9.688996315002441, 'ave_value': -9.35774163469538} step=1730
2022-04-22 07:28.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:29.06 [info     ] CQL_20220422072751: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.0003739443817579677, 'time_algorithm_update': 0.03437649859169315, 'temp_loss': 4.734216940885334, 'temp': 0.937898038439668, 'alpha_loss': -21.198527462909677, 'alpha': 1.21872262286313, 'critic_loss': 1236.5965465038498, 'actor_loss': 6.2457568659258715, 'time_step': 0.034840538322581036, 'td_error': 1.3049450596991692, 'init_value': -6.821994781494141, 'ave_value': -6.70011588700748} step=2076
2022-04-22 07:29.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:29.19 [info     ] CQL_20220422072751: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.0003740477424136476, 'time_algorithm_update': 0.03465531258224752, 'temp_loss': 4.681492930891886, 'temp': 0.927222005376926, 'alpha_loss': -21.997409258274672, 'alpha': 1.2647440350813672, 'critic_loss': 1644.1207363591718, 'actor_loss': 4.623088745712545, 'time_step': 0.03512315253990923, 'td_error': 1.3045180332451654, 'init_value': -6.1535210609436035, 'ave_value': -6.0832390600044395} step=2422
2022-04-22 07:29.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:29.31 [info     ] CQL_20220422072751: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.00036404725444110143, 'time_algorithm_update': 0.034120062872164514, 'temp_loss': 4.629940460183028, 'temp': 0.9166893871188853, 'alpha_loss': -22.831697662441716, 'alpha': 1.31282160805829, 'critic_loss': 2028.5314726195584, 'actor_loss': 4.555359652965744, 'time_step': 0.0345704162740983, 'td_error': 1.312951809779534, 'init_value': -6.4741530418396, 'ave_value': -6.423436738326687} step=2768
2022-04-22 07:29.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:29.44 [info     ] CQL_20220422072751: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.0003808254451420955, 'time_algorithm_update': 0.03495724421705125, 'temp_loss': 4.57704493076126, 'temp': 0.9062948752345378, 'alpha_loss': -23.701706340547243, 'alpha': 1.3629913843436048, 'critic_loss': 2406.870460113349, 'actor_loss': 4.755723869180404, 'time_step': 0.03542718515230741, 'td_error': 1.3194788305749958, 'init_value': -6.665912628173828, 'ave_value': -6.628364391381168} step=3114
2022-04-22 07:29.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:29.57 [info     ] CQL_20220422072751: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.0003727143899553773, 'time_algorithm_update': 0.0352748846043052, 'temp_loss': 4.523430494903829, 'temp': 0.8960370850356328, 'alpha_loss': -24.60891140403086, 'alpha': 1.4153008760744437, 'critic_loss': 2796.7373413791547, 'actor_loss': 5.089302304163144, 'time_step': 0.035734413676179216, 'td_error': 1.3263633589455985, 'init_value': -6.876847743988037, 'ave_value': -6.852566503359278} step=3460
2022-04-22 07:29.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:30.09 [info     ] CQL_20220422072751: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.0003681651429633874, 'time_algorithm_update': 0.03510087487325503, 'temp_loss': 4.4739223959818055, 'temp': 0.8859076174352899, 'alpha_loss': -25.559323591993035, 'alpha': 1.4698011075830184, 'critic_loss': 3198.6340536657785, 'actor_loss': 5.51107801729544, 'time_step': 0.03556189027135772, 'td_error': 1.337036594723101, 'init_value': -7.4015960693359375, 'ave_value': -7.378198309112014} step=3806
2022-04-22 07:30.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:30.22 [info     ] CQL_20220422072751: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.00038070761399462043, 'time_algorithm_update': 0.034653202646729574, 'temp_loss': 4.424015377298256, 'temp': 0.8759002800966273, 'alpha_loss': -26.547829556327336, 'alpha': 1.526552849077765, 'critic_loss': 3606.2873937353233, 'actor_loss': 5.919446956215566, 'time_step': 0.035130910790724564, 'td_error': 1.348976442615914, 'init_value': -7.964393138885498, 'ave_value': -7.941348309831181} step=4152
2022-04-22 07:30.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:30.34 [info     ] CQL_20220422072751: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.00036549223640750596, 'time_algorithm_update': 0.03477158436196388, 'temp_loss': 4.374478502769691, 'temp': 0.8660124478312586, 'alpha_loss': -27.573015615430183, 'alpha': 1.5856138867449898, 'critic_loss': 4039.5845206376443, 'actor_loss': 6.433829528058885, 'time_step': 0.035226943864987766, 'td_error': 1.3599659907161954, 'init_value': -8.41622543334961, 'ave_value': -8.39936556764099} step=4498
2022-04-22 07:30.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:30.47 [info     ] CQL_20220422072751: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.00036973691400075926, 'time_algorithm_update': 0.03425022150050698, 'temp_loss': 4.324826659494742, 'temp': 0.8562425641999768, 'alpha_loss': -28.63992866339711, 'alpha': 1.6470482848972254, 'critic_loss': 4471.671726822164, 'actor_loss': 6.948775721423199, 'time_step': 0.03471194595270763, 'td_error': 1.3739681250595164, 'init_value': -9.028388977050781, 'ave_value': -9.010569172891298} step=4844
2022-04-22 07:30.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:30.59 [info     ] CQL_20220422072751: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.00037926538831236734, 'time_algorithm_update': 0.034157551092908564, 'temp_loss': 4.275618412591129, 'temp': 0.8465881912694501, 'alpha_loss': -29.75020656144688, 'alpha': 1.7109384960521852, 'critic_loss': 4892.130160822345, 'actor_loss': 7.521397990298409, 'time_step': 0.034623578793740686, 'td_error': 1.3881156876987877, 'init_value': -9.587495803833008, 'ave_value': -9.572270048077268} step=5190
2022-04-22 07:30.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:31.12 [info     ] CQL_20220422072751: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.00038651786098590476, 'time_algorithm_update': 0.03475460496251983, 'temp_loss': 4.227735844650709, 'temp': 0.8370461179686419, 'alpha_loss': -30.90573670822761, 'alpha': 1.777367332078129, 'critic_loss': 5327.657908180546, 'actor_loss': 8.126494220226487, 'time_step': 0.035236374491211996, 'td_error': 1.3969592340920758, 'init_value': -9.832280158996582, 'ave_value': -9.828123066514632} step=5536
2022-04-22 07:31.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:31.24 [info     ] CQL_20220422072751: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.0003651270287574371, 'time_algorithm_update': 0.0344622817342681, 'temp_loss': 4.179572948830665, 'temp': 0.827614736694821, 'alpha_loss': -32.10370807427202, 'alpha': 1.846416084408071, 'critic_loss': 5709.1655696802745, 'actor_loss': 8.777752556552777, 'time_step': 0.03491898285860271, 'td_error': 1.423236534225135, 'init_value': -10.926555633544922, 'ave_value': -10.912021867671585} step=5882
2022-04-22 07:31.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:31.36 [info     ] CQL_20220422072751: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.00037321396645783, 'time_algorithm_update': 0.03379437895868555, 'temp_loss': 4.132655156141071, 'temp': 0.818291338365202, 'alpha_loss': -33.35490534346917, 'alpha': 1.9181861267613538, 'critic_loss': 6163.904837371297, 'actor_loss': 9.439778587032604, 'time_step': 0.034261070923998174, 'td_error': 1.436843807578516, 'init_value': -11.357562065124512, 'ave_value': -11.348691097239625} step=6228
2022-04-22 07:31.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:31.49 [info     ] CQL_20220422072751: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.0003685034768429795, 'time_algorithm_update': 0.0348796300116302, 'temp_loss': 4.0867094194268905, 'temp': 0.8090735875457697, 'alpha_loss': -34.65339888864859, 'alpha': 1.992779225627811, 'critic_loss': 6505.702011549404, 'actor_loss': 10.1301035440037, 'time_step': 0.035334552643616075, 'td_error': 1.4563924465026419, 'init_value': -12.014019012451172, 'ave_value': -12.008694660771} step=6574
2022-04-22 07:31.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:32.01 [info     ] CQL_20220422072751: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.0003774758708270299, 'time_algorithm_update': 0.034793647038454265, 'temp_loss': 4.040798028769521, 'temp': 0.7999608616608416, 'alpha_loss': -35.99779754153566, 'alpha': 2.07028851894974, 'critic_loss': 6675.423720872471, 'actor_loss': 10.82564607521013, 'time_step': 0.03525790933928738, 'td_error': 1.4806427545658072, 'init_value': -12.823997497558594, 'ave_value': -12.816514708228222} step=6920
2022-04-22 07:32.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:32.14 [info     ] CQL_20220422072751: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.0003768632866743672, 'time_algorithm_update': 0.03533223254143158, 'temp_loss': 3.993703855944507, 'temp': 0.7909522945481229, 'alpha_loss': -37.40111358730779, 'alpha': 2.1508283608221594, 'critic_loss': 6911.7195092350075, 'actor_loss': 11.617046499527948, 'time_step': 0.035798464207290916, 'td_error': 1.4996583211936028, 'init_value': -13.38117790222168, 'ave_value': -13.378316224316615} step=7266
2022-04-22 07:32.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:32.26 [info     ] CQL_20220422072751: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.0003689086506132446, 'time_algorithm_update': 0.033888437844425266, 'temp_loss': 3.949002795136733, 'temp': 0.7820474410677232, 'alpha_loss': -38.8619467895155, 'alpha': 2.234527408043084, 'critic_loss': 7081.380730954209, 'actor_loss': 12.339800151097291, 'time_step': 0.0343475486501793, 'td_error': 1.525409397570852, 'init_value': -14.160551071166992, 'ave_value': -14.15796093368711} step=7612
2022-04-22 07:32.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:32.39 [info     ] CQL_20220422072751: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.0003700938527983737, 'time_algorithm_update': 0.03464428400028648, 'temp_loss': 3.9043916029737176, 'temp': 0.773241564028525, 'alpha_loss': -40.37214929657864, 'alpha': 2.3214889123949702, 'critic_loss': 7367.993252969201, 'actor_loss': 13.2097703625012, 'time_step': 0.03510852769620157, 'td_error': 1.5584906166690495, 'init_value': -15.134840965270996, 'ave_value': -15.128141697304347} step=7958
2022-04-22 07:32.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:32.51 [info     ] CQL_20220422072751: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.00038859265388091864, 'time_algorithm_update': 0.035152956240439, 'temp_loss': 3.861419838976998, 'temp': 0.7645355463027954, 'alpha_loss': -41.93792784145113, 'alpha': 2.4118287322149112, 'critic_loss': 7546.398189125723, 'actor_loss': 13.959895960857413, 'time_step': 0.035631219086619474, 'td_error': 1.5875458721169515, 'init_value': -15.932079315185547, 'ave_value': -15.925514793486435} step=8304
2022-04-22 07:32.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:33.04 [info     ] CQL_20220422072751: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.0003788574582579508, 'time_algorithm_update': 0.035188677683041965, 'temp_loss': 3.817660346885637, 'temp': 0.7559267242864377, 'alpha_loss': -43.58020767035512, 'alpha': 2.505701348271673, 'critic_loss': 7415.949491114975, 'actor_loss': 14.69911415452902, 'time_step': 0.03565893145654932, 'td_error': 1.6117973459414767, 'init_value': -16.538211822509766, 'ave_value': -16.536159773673305} step=8650
2022-04-22 07:33.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:33.17 [info     ] CQL_20220422072751: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.00037944454678221244, 'time_algorithm_update': 0.03596654930555752, 'temp_loss': 3.7742864989131863, 'temp': 0.7474154214983042, 'alpha_loss': -45.26721663282097, 'alpha': 2.603224279563551, 'critic_loss': 7620.295961942287, 'actor_loss': 15.60745195157266, 'time_step': 0.03643600444573199, 'td_error': 1.6496310681593236, 'init_value': -17.513818740844727, 'ave_value': -17.50830790436963} step=8996
2022-04-22 07:33.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:33.30 [info     ] CQL_20220422072751: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.00039804464130732366, 'time_algorithm_update': 0.035326088094986934, 'temp_loss': 3.7319394101986307, 'temp': 0.7390005078618926, 'alpha_loss': -47.02646607195022, 'alpha': 2.7045443085576757, 'critic_loss': 7967.97039406837, 'actor_loss': 16.476255157779406, 'time_step': 0.03582188504279694, 'td_error': 1.681845987361167, 'init_value': -18.276805877685547, 'ave_value': -18.273878040557904} step=9342
2022-04-22 07:33.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:33.42 [info     ] CQL_20220422072751: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.00037176347192312254, 'time_algorithm_update': 0.034235714487946795, 'temp_loss': 3.6910033749707174, 'temp': 0.7306792234754287, 'alpha_loss': -48.85766695276161, 'alpha': 2.809802133223914, 'critic_loss': 8093.269672371748, 'actor_loss': 17.269333133807763, 'time_step': 0.03468930445654544, 'td_error': 1.7270739784188802, 'init_value': -19.36831283569336, 'ave_value': -19.35656861838372} step=9688
2022-04-22 07:33.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:33.54 [info     ] CQL_20220422072751: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.00037244151782438243, 'time_algorithm_update': 0.03402712028150614, 'temp_loss': 3.647397719366702, 'temp': 0.7224522851450594, 'alpha_loss': -50.756307061697015, 'alpha': 2.919174928885664, 'critic_loss': 8349.844902964685, 'actor_loss': 18.141328899846602, 'time_step': 0.03448473580310799, 'td_error': 1.7634939324595265, 'init_value': -20.182159423828125, 'ave_value': -20.1706147260381} step=10034
2022-04-22 07:33.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:34.07 [info     ] CQL_20220422072751: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.0003848613342108754, 'time_algorithm_update': 0.03361222854239403, 'temp_loss': 3.6068829015500286, 'temp': 0.7143204468867682, 'alpha_loss': -52.74079210358548, 'alpha': 3.03279444248001, 'critic_loss': 8214.231981575145, 'actor_loss': 18.83968718754763, 'time_step': 0.03409024814649814, 'td_error': 1.7742035613074696, 'init_value': -20.307912826538086, 'ave_value': -20.3155700397808} step=10380
2022-04-22 07:34.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:34.19 [info     ] CQL_20220422072751: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.00037005939924648044, 'time_algorithm_update': 0.03508099655195468, 'temp_loss': 3.567082219730223, 'temp': 0.70627771343799, 'alpha_loss': -54.79256745707782, 'alpha': 3.150863400773506, 'critic_loss': 8242.151830066836, 'actor_loss': 19.70364497851774, 'time_step': 0.03554988527573602, 'td_error': 1.8233839394529971, 'init_value': -21.386131286621094, 'ave_value': -21.386208290328003} step=10726
2022-04-22 07:34.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:34.31 [info     ] CQL_20220422072751: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.00036970590580405526, 'time_algorithm_update': 0.033812309276161856, 'temp_loss': 3.52632992116013, 'temp': 0.6983256584647074, 'alpha_loss': -56.92495555546932, 'alpha': 3.2735106807223633, 'critic_loss': 8584.912481936417, 'actor_loss': 20.51547989817713, 'time_step': 0.0342691165174363, 'td_error': 1.8695564290785274, 'init_value': -22.349451065063477, 'ave_value': -22.34406244866709} step=11072
2022-04-22 07:34.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:34.44 [info     ] CQL_20220422072751: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.0003707305544373617, 'time_algorithm_update': 0.03469386748495819, 'temp_loss': 3.4874818828064584, 'temp': 0.6904629703202, 'alpha_loss': -59.12671004278811, 'alpha': 3.4009259332811212, 'critic_loss': 8978.032624525831, 'actor_loss': 21.32282419700843, 'time_step': 0.03515554714754138, 'td_error': 1.9093341303626303, 'init_value': -23.129064559936523, 'ave_value': -23.12319854005585} step=11418
2022-04-22 07:34.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:34.56 [info     ] CQL_20220422072751: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.00037572425224877506, 'time_algorithm_update': 0.0343195206857141, 'temp_loss': 3.4478048534062555, 'temp': 0.6826883204755066, 'alpha_loss': -61.444735278973, 'alpha': 3.533307051383002, 'critic_loss': 9393.917034524025, 'actor_loss': 22.116733341547796, 'time_step': 0.034782951277804515, 'td_error': 1.9501867365477341, 'init_value': -23.901992797851562, 'ave_value': -23.897275974242934} step=11764
2022-04-22 07:34.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:35.09 [info     ] CQL_20220422072751: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.00036947231072221876, 'time_algorithm_update': 0.03445202422279843, 'temp_loss': 3.409136378007128, 'temp': 0.6750023859429222, 'alpha_loss': -63.838365510709025, 'alpha': 3.670859045375978, 'critic_loss': 9915.513333182804, 'actor_loss': 22.944135765119785, 'time_step': 0.03491644707718337, 'td_error': 1.9932275180924033, 'init_value': -24.699960708618164, 'ave_value': -24.69480701101058} step=12110
2022-04-22 07:35.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:35.21 [info     ] CQL_20220422072751: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.0003650050631837349, 'time_algorithm_update': 0.03452430708559951, 'temp_loss': 3.37106597768089, 'temp': 0.667401340483241, 'alpha_loss': -66.32001111686574, 'alpha': 3.8137516127845457, 'critic_loss': 10254.64684846911, 'actor_loss': 23.686999359571864, 'time_step': 0.034978235388077754, 'td_error': 2.0309105481986633, 'init_value': -25.366003036499023, 'ave_value': -25.362730487119876} step=12456
2022-04-22 07:35.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:35.34 [info     ] CQL_20220422072751: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.00037010143257979025, 'time_algorithm_update': 0.03414808187870621, 'temp_loss': 3.3331850677556387, 'temp': 0.659886532091681, 'alpha_loss': -68.88760894157983, 'alpha': 3.96220213553809, 'critic_loss': 10667.46527558255, 'actor_loss': 24.397154885220388, 'time_step': 0.034608320004678186, 'td_error': 2.0693095849925385, 'init_value': -26.037080764770508, 'ave_value': -26.03441088510498} step=12802
2022-04-22 07:35.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:35.46 [info     ] CQL_20220422072751: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.00038377673639727465, 'time_algorithm_update': 0.03506755484321903, 'temp_loss': 3.29574540791484, 'temp': 0.6524567995112756, 'alpha_loss': -71.5787763430204, 'alpha': 4.116432276764357, 'critic_loss': 11003.06506559339, 'actor_loss': 25.1087524050233, 'time_step': 0.03553876987082421, 'td_error': 2.113282104908726, 'init_value': -26.79836082458496, 'ave_value': -26.793586787209797} step=13148
2022-04-22 07:35.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:35.59 [info     ] CQL_20220422072751: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.0003887828374873696, 'time_algorithm_update': 0.03521570856171536, 'temp_loss': 3.2581469151326, 'temp': 0.6451110369552767, 'alpha_loss': -74.35348363005357, 'alpha': 4.276650765038639, 'critic_loss': 11312.009734578216, 'actor_loss': 25.77507554313351, 'time_step': 0.03568881026582222, 'td_error': 2.139857412085103, 'init_value': -27.219905853271484, 'ave_value': -27.221524566968736} step=13494
2022-04-22 07:35.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:36.12 [info     ] CQL_20220422072751: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.00038611751071290475, 'time_algorithm_update': 0.03522867963493215, 'temp_loss': 3.22147032360121, 'temp': 0.6378482743839308, 'alpha_loss': -77.26600838810033, 'alpha': 4.443126988548764, 'critic_loss': 11504.845059609826, 'actor_loss': 26.39541432485415, 'time_step': 0.035701202519367196, 'td_error': 2.1752288918334077, 'init_value': -27.793899536132812, 'ave_value': -27.79668006625657} step=13840
2022-04-22 07:36.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:36.24 [info     ] CQL_20220422072751: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.00037497729924372856, 'time_algorithm_update': 0.03389658886573218, 'temp_loss': 3.1850834202904235, 'temp': 0.6306675241861729, 'alpha_loss': -80.25510569666163, 'alpha': 4.616078062553626, 'critic_loss': 10201.931544662211, 'actor_loss': 26.648460112555178, 'time_step': 0.0343560834840543, 'td_error': 2.186883422453678, 'init_value': -27.973424911499023, 'ave_value': -27.978895266421205} step=14186
2022-04-22 07:36.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:36.36 [info     ] CQL_20220422072751: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.00037233884623974044, 'time_algorithm_update': 0.03420574954479416, 'temp_loss': 3.148992204252695, 'temp': 0.6235676237268944, 'alpha_loss': -83.38850632154873, 'alpha': 4.795768653726302, 'critic_loss': 8460.683025029353, 'actor_loss': 26.922048282072033, 'time_step': 0.034668845937431206, 'td_error': 2.2175409419713707, 'init_value': -28.486454010009766, 'ave_value': -28.48712265933504} step=14532
2022-04-22 07:36.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:36.49 [info     ] CQL_20220422072751: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.0003672762413245405, 'time_algorithm_update': 0.034812870742268645, 'temp_loss': 3.1144886947091606, 'temp': 0.6165470799614239, 'alpha_loss': -86.6502290405979, 'alpha': 4.982467710627296, 'critic_loss': 7483.76638282379, 'actor_loss': 27.425790648928956, 'time_step': 0.03527029952561924, 'td_error': 2.240807773854651, 'init_value': -28.840595245361328, 'ave_value': -28.84560597882332} step=14878
2022-04-22 07:36.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:37.01 [info     ] CQL_20220422072751: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.00038424461563198555, 'time_algorithm_update': 0.03453100554515861, 'temp_loss': 3.078528790804692, 'temp': 0.609605295981975, 'alpha_loss': -90.0143692369406, 'alpha': 5.176438761584332, 'critic_loss': 6800.245973796514, 'actor_loss': 28.02302063958493, 'time_step': 0.03501260831865961, 'td_error': 2.294836794185091, 'init_value': -29.724716186523438, 'ave_value': -29.71967474890863} step=15224
2022-04-22 07:37.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:37.14 [info     ] CQL_20220422072751: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.0003673485937835164, 'time_algorithm_update': 0.034873212003983516, 'temp_loss': 3.043618827196904, 'temp': 0.602742557925296, 'alpha_loss': -93.51934181610284, 'alpha': 5.377939823734967, 'critic_loss': 6558.8406467327495, 'actor_loss': 28.779181386694052, 'time_step': 0.035329995816842666, 'td_error': 2.329856400581082, 'init_value': -30.228511810302734, 'ave_value': -30.230849065188266} step=15570
2022-04-22 07:37.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:37.26 [info     ] CQL_20220422072751: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.00036521936427651115, 'time_algorithm_update': 0.034617554934727665, 'temp_loss': 3.010178749960971, 'temp': 0.5959567503433008, 'alpha_loss': -97.17210529305342, 'alpha': 5.587288354862632, 'critic_loss': 6168.049628285315, 'actor_loss': 29.457736015319824, 'time_step': 0.03507208479622196, 'td_error': 2.3780274583838636, 'init_value': -30.958900451660156, 'ave_value': -30.958875746295146} step=15916
2022-04-22 07:37.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:37.39 [info     ] CQL_20220422072751: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.00038013086153592675, 'time_algorithm_update': 0.03424304689286072, 'temp_loss': 2.976038482147834, 'temp': 0.5892465886353069, 'alpha_loss': -100.94220647646513, 'alpha': 5.804775927108147, 'critic_loss': 5801.196416072074, 'actor_loss': 30.189820212435862, 'time_step': 0.034708743150523635, 'td_error': 2.422897005440567, 'init_value': -31.609140396118164, 'ave_value': -31.6113175176455} step=16262
2022-04-22 07:37.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:37.51 [info     ] CQL_20220422072751: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.00038529889431992016, 'time_algorithm_update': 0.0346734627133849, 'temp_loss': 2.9424000034442526, 'temp': 0.5826127291414779, 'alpha_loss': -104.88107502667201, 'alpha': 6.030759302866941, 'critic_loss': 5467.690768379696, 'actor_loss': 30.909396921279114, 'time_step': 0.03514977204317302, 'td_error': 2.479785635050141, 'init_value': -32.441375732421875, 'ave_value': -32.439651699075206} step=16608
2022-04-22 07:37.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:38.04 [info     ] CQL_20220422072751: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.0003745569659106304, 'time_algorithm_update': 0.034702213513368815, 'temp_loss': 2.9089577956006707, 'temp': 0.5760536617626345, 'alpha_loss': -108.94901218304055, 'alpha': 6.265523640406614, 'critic_loss': 5122.793530414559, 'actor_loss': 31.63154810425863, 'time_step': 0.03516085506174606, 'td_error': 2.5289390864714334, 'init_value': -33.13333511352539, 'ave_value': -33.13261683171481} step=16954
2022-04-22 07:38.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:38.16 [info     ] CQL_20220422072751: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.00038650752492033676, 'time_algorithm_update': 0.0351655800218527, 'temp_loss': 2.876219869349044, 'temp': 0.5695691637565635, 'alpha_loss': -113.18646837796778, 'alpha': 6.509403731781624, 'critic_loss': 4827.782520095737, 'actor_loss': 32.39897806244778, 'time_step': 0.03563757301065963, 'td_error': 2.5845088757557564, 'init_value': -33.90141296386719, 'ave_value': -33.89810564973559} step=17300
2022-04-22 07:38.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422072751/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.519100

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 07:38.17 [info     ] FQE_20220422073817: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00011023986770446042, 'time_algorithm_update': 0.0019460112215524696, 'loss': 0.007305126170156501, 'time_step': 0.0021094945539911108, 'init_value': -0.11262791603803635, 'ave_value': -0.08292959298737146, 'soft_opc': nan} step=166




2022-04-22 07:38.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.17 [info     ] FQE_20220422073817: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00011138312787894744, 'time_algorithm_update': 0.001978632915450866, 'loss': 0.004335048423339444, 'time_step': 0.002141913735722921, 'init_value': -0.15716904401779175, 'ave_value': -0.10842690566923839, 'soft_opc': nan} step=332




2022-04-22 07:38.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.18 [info     ] FQE_20220422073817: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00011923801468079349, 'time_algorithm_update': 0.0021528163588190653, 'loss': 0.0035762568775856173, 'time_step': 0.002329850771340979, 'init_value': -0.19009128212928772, 'ave_value': -0.13590339851641173, 'soft_opc': nan} step=498




2022-04-22 07:38.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.18 [info     ] FQE_20220422073817: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.0001311560711228704, 'time_algorithm_update': 0.0025483154388795414, 'loss': 0.0033023727991820188, 'time_step': 0.002746985619326672, 'init_value': -0.22539740800857544, 'ave_value': -0.1622699544840568, 'soft_opc': nan} step=664




2022-04-22 07:38.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.19 [info     ] FQE_20220422073817: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.000129044774066971, 'time_algorithm_update': 0.0025151120610983975, 'loss': 0.0029439120918108397, 'time_step': 0.0027050656008433148, 'init_value': -0.2834964394569397, 'ave_value': -0.20636762155824015, 'soft_opc': nan} step=830




2022-04-22 07:38.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.19 [info     ] FQE_20220422073817: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00012139527194471245, 'time_algorithm_update': 0.0023429063429315404, 'loss': 0.002639894565870335, 'time_step': 0.0025207249515027887, 'init_value': -0.2829235792160034, 'ave_value': -0.20417181369722695, 'soft_opc': nan} step=996




2022-04-22 07:38.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.20 [info     ] FQE_20220422073817: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00013259089136698158, 'time_algorithm_update': 0.00250171322420419, 'loss': 0.0024465415795075604, 'time_step': 0.0026956796646118164, 'init_value': -0.3214164078235626, 'ave_value': -0.24115160049719586, 'soft_opc': nan} step=1162




2022-04-22 07:38.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.20 [info     ] FQE_20220422073817: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00013563719140477926, 'time_algorithm_update': 0.0026461000902106963, 'loss': 0.002167282635850989, 'time_step': 0.0028463587703475035, 'init_value': -0.35986799001693726, 'ave_value': -0.2754416933618822, 'soft_opc': nan} step=1328




2022-04-22 07:38.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.21 [info     ] FQE_20220422073817: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00011927822986281062, 'time_algorithm_update': 0.002083402082144496, 'loss': 0.0019074614224282194, 'time_step': 0.002257568290434688, 'init_value': -0.4217567443847656, 'ave_value': -0.3416091193419856, 'soft_opc': nan} step=1494




2022-04-22 07:38.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.21 [info     ] FQE_20220422073817: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00011940462043486445, 'time_algorithm_update': 0.002214056899748653, 'loss': 0.0018103553149821125, 'time_step': 0.0023850260010684827, 'init_value': -0.4945445656776428, 'ave_value': -0.4107532410303483, 'soft_opc': nan} step=1660




2022-04-22 07:38.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.22 [info     ] FQE_20220422073817: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.0001293966569096209, 'time_algorithm_update': 0.0022648817085358032, 'loss': 0.0016397082535073116, 'time_step': 0.002455381025750953, 'init_value': -0.5570868253707886, 'ave_value': -0.4657695306448249, 'soft_opc': nan} step=1826




2022-04-22 07:38.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.22 [info     ] FQE_20220422073817: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00011690553412379988, 'time_algorithm_update': 0.0019471659717789616, 'loss': 0.001477315294323489, 'time_step': 0.0021191189088017107, 'init_value': -0.5898886322975159, 'ave_value': -0.49148021099820466, 'soft_opc': nan} step=1992




2022-04-22 07:38.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.22 [info     ] FQE_20220422073817: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00012006960719464773, 'time_algorithm_update': 0.0020884921751826643, 'loss': 0.0016993628781504862, 'time_step': 0.002268491021121841, 'init_value': -0.7292324900627136, 'ave_value': -0.6128743453530242, 'soft_opc': nan} step=2158




2022-04-22 07:38.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.23 [info     ] FQE_20220422073817: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00012572127652455526, 'time_algorithm_update': 0.002362641943506448, 'loss': 0.0017561577495428767, 'time_step': 0.0025491843740624116, 'init_value': -0.7718031406402588, 'ave_value': -0.6422026394455282, 'soft_opc': nan} step=2324




2022-04-22 07:38.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.23 [info     ] FQE_20220422073817: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00011786208095320736, 'time_algorithm_update': 0.002219599413584514, 'loss': 0.0018901069969757944, 'time_step': 0.002393568854734122, 'init_value': -0.8779745101928711, 'ave_value': -0.7258967498107604, 'soft_opc': nan} step=2490




2022-04-22 07:38.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.24 [info     ] FQE_20220422073817: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00012596687638615988, 'time_algorithm_update': 0.002196077840873994, 'loss': 0.002155374796606645, 'time_step': 0.0023776824215808547, 'init_value': -0.9933245182037354, 'ave_value': -0.8268165517548048, 'soft_opc': nan} step=2656




2022-04-22 07:38.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.24 [info     ] FQE_20220422073817: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00011855148407350103, 'time_algorithm_update': 0.002276127596935594, 'loss': 0.0024337154271008156, 'time_step': 0.0024499807013086527, 'init_value': -1.0559325218200684, 'ave_value': -0.8571719531498446, 'soft_opc': nan} step=2822




2022-04-22 07:38.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.25 [info     ] FQE_20220422073817: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00012171124837484704, 'time_algorithm_update': 0.0021854337439479597, 'loss': 0.0027010352107070677, 'time_step': 0.00236584048673331, 'init_value': -1.1320953369140625, 'ave_value': -0.9154181710150731, 'soft_opc': nan} step=2988




2022-04-22 07:38.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.25 [info     ] FQE_20220422073817: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00011932275381432958, 'time_algorithm_update': 0.0021557879735188312, 'loss': 0.003274634132457684, 'time_step': 0.0023326256189001612, 'init_value': -1.2295953035354614, 'ave_value': -0.9919532558234694, 'soft_opc': nan} step=3154




2022-04-22 07:38.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.26 [info     ] FQE_20220422073817: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00012476042092564595, 'time_algorithm_update': 0.0024359542203236774, 'loss': 0.0037316769818741694, 'time_step': 0.002623084079788392, 'init_value': -1.2883861064910889, 'ave_value': -1.0290660609365314, 'soft_opc': nan} step=3320




2022-04-22 07:38.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.26 [info     ] FQE_20220422073817: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00012631588671580855, 'time_algorithm_update': 0.0024018804711031625, 'loss': 0.003988737342998397, 'time_step': 0.002591707620276026, 'init_value': -1.406015396118164, 'ave_value': -1.1336510429007782, 'soft_opc': nan} step=3486




2022-04-22 07:38.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.26 [info     ] FQE_20220422073817: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.000120952904942524, 'time_algorithm_update': 0.002165313226630889, 'loss': 0.004450310234131898, 'time_step': 0.002344609743141266, 'init_value': -1.451623558998108, 'ave_value': -1.151906556889482, 'soft_opc': nan} step=3652




2022-04-22 07:38.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.27 [info     ] FQE_20220422073817: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.0001224336853946548, 'time_algorithm_update': 0.0022379935505878494, 'loss': 0.004795784530256516, 'time_step': 0.002418977668486446, 'init_value': -1.554129719734192, 'ave_value': -1.2282607302975816, 'soft_opc': nan} step=3818




2022-04-22 07:38.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.27 [info     ] FQE_20220422073817: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00012131915035018001, 'time_algorithm_update': 0.002120820872754936, 'loss': 0.005304786049850644, 'time_step': 0.002302492957517325, 'init_value': -1.6137232780456543, 'ave_value': -1.2795159759989998, 'soft_opc': nan} step=3984




2022-04-22 07:38.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.28 [info     ] FQE_20220422073817: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00013562282883977317, 'time_algorithm_update': 0.0025104011397763908, 'loss': 0.005634130095657668, 'time_step': 0.002707880663584514, 'init_value': -1.6514625549316406, 'ave_value': -1.3015832016034774, 'soft_opc': nan} step=4150




2022-04-22 07:38.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.28 [info     ] FQE_20220422073817: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00012254140463220068, 'time_algorithm_update': 0.002172563449445977, 'loss': 0.006017827957655382, 'time_step': 0.002354637686028538, 'init_value': -1.7744433879852295, 'ave_value': -1.4095588824837595, 'soft_opc': nan} step=4316




2022-04-22 07:38.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.29 [info     ] FQE_20220422073817: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.0001262857253292957, 'time_algorithm_update': 0.002314706882798528, 'loss': 0.006723546734393899, 'time_step': 0.002506044973810035, 'init_value': -1.8279908895492554, 'ave_value': -1.4544476412958256, 'soft_opc': nan} step=4482




2022-04-22 07:38.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.29 [info     ] FQE_20220422073817: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00012550583804946347, 'time_algorithm_update': 0.002304354345942118, 'loss': 0.007032525957266082, 'time_step': 0.0024899330483861715, 'init_value': -1.8858942985534668, 'ave_value': -1.493110804370529, 'soft_opc': nan} step=4648




2022-04-22 07:38.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.30 [info     ] FQE_20220422073817: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00012199562716196819, 'time_algorithm_update': 0.002281224871256265, 'loss': 0.007441666809153317, 'time_step': 0.002461612942707108, 'init_value': -1.8748894929885864, 'ave_value': -1.4763436785454418, 'soft_opc': nan} step=4814




2022-04-22 07:38.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.30 [info     ] FQE_20220422073817: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00012792162148349257, 'time_algorithm_update': 0.0023595109043351137, 'loss': 0.008065368124411486, 'time_step': 0.0025448899671255826, 'init_value': -1.9721497297286987, 'ave_value': -1.5555597339203027, 'soft_opc': nan} step=4980




2022-04-22 07:38.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.31 [info     ] FQE_20220422073817: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00012090838099100504, 'time_algorithm_update': 0.002215728702315365, 'loss': 0.008813806545797241, 'time_step': 0.0023930834000369153, 'init_value': -2.053175449371338, 'ave_value': -1.6259556461981421, 'soft_opc': nan} step=5146




2022-04-22 07:38.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.31 [info     ] FQE_20220422073817: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.0001293348978800946, 'time_algorithm_update': 0.0023169804768389964, 'loss': 0.009044334065366862, 'time_step': 0.0025054158934627673, 'init_value': -2.107020139694214, 'ave_value': -1.6703316881128394, 'soft_opc': nan} step=5312




2022-04-22 07:38.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.32 [info     ] FQE_20220422073817: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00011558130563023579, 'time_algorithm_update': 0.002120379942009248, 'loss': 0.009574307128436392, 'time_step': 0.0022898524640554405, 'init_value': -2.154834508895874, 'ave_value': -1.697410900308541, 'soft_opc': nan} step=5478




2022-04-22 07:38.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.32 [info     ] FQE_20220422073817: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00012100461017654603, 'time_algorithm_update': 0.0021100187876138344, 'loss': 0.008709402660227728, 'time_step': 0.0022859343563217715, 'init_value': -2.21760892868042, 'ave_value': -1.7439048256028677, 'soft_opc': nan} step=5644




2022-04-22 07:38.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.33 [info     ] FQE_20220422073817: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.0001438051821237587, 'time_algorithm_update': 0.0028520520911159285, 'loss': 0.011121741699052032, 'time_step': 0.0030628370951457195, 'init_value': -2.232024669647217, 'ave_value': -1.7401684272430233, 'soft_opc': nan} step=5810




2022-04-22 07:38.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.33 [info     ] FQE_20220422073817: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.0001215963478547981, 'time_algorithm_update': 0.0023338320743606753, 'loss': 0.010975748178701993, 'time_step': 0.0025127264390508814, 'init_value': -2.2617785930633545, 'ave_value': -1.7469115368786965, 'soft_opc': nan} step=5976




2022-04-22 07:38.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.33 [info     ] FQE_20220422073817: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00013057295098362198, 'time_algorithm_update': 0.0024577393589249575, 'loss': 0.011294606230635454, 'time_step': 0.0026517086718455853, 'init_value': -2.325131416320801, 'ave_value': -1.8123292345067, 'soft_opc': nan} step=6142




2022-04-22 07:38.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.34 [info     ] FQE_20220422073817: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00012685161039053676, 'time_algorithm_update': 0.0020887535738657757, 'loss': 0.012002386395919069, 'time_step': 0.002270517579044204, 'init_value': -2.3476834297180176, 'ave_value': -1.8118165689545709, 'soft_opc': nan} step=6308




2022-04-22 07:38.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.34 [info     ] FQE_20220422073817: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00013468926211437546, 'time_algorithm_update': 0.002601930894047381, 'loss': 0.012549520217384357, 'time_step': 0.002796828028667404, 'init_value': -2.4009060859680176, 'ave_value': -1.86084955929703, 'soft_opc': nan} step=6474




2022-04-22 07:38.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.35 [info     ] FQE_20220422073817: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00013018085295895496, 'time_algorithm_update': 0.002373370779566018, 'loss': 0.012424555700763229, 'time_step': 0.0025628101394837162, 'init_value': -2.3347272872924805, 'ave_value': -1.7871950824862464, 'soft_opc': nan} step=6640




2022-04-22 07:38.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.35 [info     ] FQE_20220422073817: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00012489542903670347, 'time_algorithm_update': 0.002268524055021355, 'loss': 0.012684146050330117, 'time_step': 0.00245312035801899, 'init_value': -2.3870372772216797, 'ave_value': -1.8440871844244366, 'soft_opc': nan} step=6806




2022-04-22 07:38.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.36 [info     ] FQE_20220422073817: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00012312021600194723, 'time_algorithm_update': 0.002256166504090091, 'loss': 0.013060464457109436, 'time_step': 0.0024369380560265966, 'init_value': -2.3958539962768555, 'ave_value': -1.8462935950909112, 'soft_opc': nan} step=6972




2022-04-22 07:38.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.36 [info     ] FQE_20220422073817: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00012786704373646932, 'time_algorithm_update': 0.0023458032722932748, 'loss': 0.013622944100538313, 'time_step': 0.002534989851066865, 'init_value': -2.4556360244750977, 'ave_value': -1.8980106003850967, 'soft_opc': nan} step=7138




2022-04-22 07:38.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.37 [info     ] FQE_20220422073817: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00012952161122517413, 'time_algorithm_update': 0.002420158271329949, 'loss': 0.01390171919862392, 'time_step': 0.0026091351566544497, 'init_value': -2.4923386573791504, 'ave_value': -1.9257911950047757, 'soft_opc': nan} step=7304




2022-04-22 07:38.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.37 [info     ] FQE_20220422073817: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00012010120483766119, 'time_algorithm_update': 0.0021765102823096587, 'loss': 0.014156235653362855, 'time_step': 0.002357783087764878, 'init_value': -2.5059847831726074, 'ave_value': -1.9357149231541264, 'soft_opc': nan} step=7470




2022-04-22 07:38.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.38 [info     ] FQE_20220422073817: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.0001304149627685547, 'time_algorithm_update': 0.0025199005402714372, 'loss': 0.014561756709324062, 'time_step': 0.002714547766260354, 'init_value': -2.5428032875061035, 'ave_value': -1.977147924497321, 'soft_opc': nan} step=7636




2022-04-22 07:38.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.38 [info     ] FQE_20220422073817: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00013507705136954067, 'time_algorithm_update': 0.002488700740308647, 'loss': 0.01485796299579284, 'time_step': 0.0026847784777721726, 'init_value': -2.617218017578125, 'ave_value': -2.0585911773296224, 'soft_opc': nan} step=7802




2022-04-22 07:38.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.39 [info     ] FQE_20220422073817: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00012475180338664227, 'time_algorithm_update': 0.0022567998932068608, 'loss': 0.014667882007569164, 'time_step': 0.00243980913277132, 'init_value': -2.6188321113586426, 'ave_value': -2.0493055833864386, 'soft_opc': nan} step=7968




2022-04-22 07:38.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.39 [info     ] FQE_20220422073817: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00012569829642054546, 'time_algorithm_update': 0.0024570528283176653, 'loss': 0.0157659596413604, 'time_step': 0.0026409295668084936, 'init_value': -2.6436479091644287, 'ave_value': -2.076408830473015, 'soft_opc': nan} step=8134




2022-04-22 07:38.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:38.40 [info     ] FQE_20220422073817: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00012772485434290874, 'time_algorithm_update': 0.0023217373583690228, 'loss': 0.015799453843550488, 'time_step': 0.002509186066776873, 'init_value': -2.6165804862976074, 'ave_value': -2.0619964156400514, 'soft_opc': nan} step=8300




2022-04-22 07:38.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073817/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

start
[ 0.00000000e+00  7.95731469e+08  4.27108923e-02  1.24000047e-02
  1.42999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.34732792e-01  6.00000000e-01  3.37421461e-01]
Read chunk # 39 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.38489108e-01  4.94000047e-02
 -1.56000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -7.49080829e-02  7.04145269e-02]
Read chunk # 40 out of 4999
torch.Size([44400, 6])
2022-04-22 07:38.40 [debug    ] RoundIterator is selected.
2022-04-22 07:38.40 [info     ] Directory is created at d3rlpy_logs/FQE_20220422073840
2022-04-22 07:38.40 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 07:38.40 [debug    ] Building models...
2022-04-22 07:38.40 [debug    ] Models have been built.
2022-04-22 07:38.40 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422073840/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 07:38.41 [info     ] FQE_20220422073840: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00012849929720856423, 'time_algorithm_update': 0.002382516860961914, 'loss': 0.022437870627009245, 'time_step': 0.002572612013927726, 'init_value': -1.472324013710022, 'ave_value': -1.4656769454210727, 'soft_opc': nan} step=344




2022-04-22 07:38.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:38.42 [info     ] FQE_20220422073840: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00013103734615237215, 'time_algorithm_update': 0.0024969958981802295, 'loss': 0.021134463620783632, 'time_step': 0.0026906007944151413, 'init_value': -2.0501508712768555, 'ave_value': -2.0585494630777084, 'soft_opc': nan} step=688




2022-04-22 07:38.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:38.43 [info     ] FQE_20220422073840: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.0001317013141720794, 'time_algorithm_update': 0.0024269944013551223, 'loss': 0.024869230440492895, 'time_step': 0.0026220533736916476, 'init_value': -2.944082260131836, 'ave_value': -3.0274922687996617, 'soft_opc': nan} step=1032




2022-04-22 07:38.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:38.44 [info     ] FQE_20220422073840: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00012086227882740109, 'time_algorithm_update': 0.0021333001380742984, 'loss': 0.029046187948890377, 'time_step': 0.0023079722426658455, 'init_value': -3.3836817741394043, 'ave_value': -3.550352213592143, 'soft_opc': nan} step=1376




2022-04-22 07:38.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:38.45 [info     ] FQE_20220422073840: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00012475806613301122, 'time_algorithm_update': 0.002311387727426928, 'loss': 0.035359691582121995, 'time_step': 0.0024927542653194693, 'init_value': -4.037363529205322, 'ave_value': -4.335038284138516, 'soft_opc': nan} step=1720




2022-04-22 07:38.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:38.46 [info     ] FQE_20220422073840: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00013112467388774074, 'time_algorithm_update': 0.0024411567421846613, 'loss': 0.04336965310138239, 'time_step': 0.0026366052239440206, 'init_value': -4.4964776039123535, 'ave_value': -4.939629670272808, 'soft_opc': nan} step=2064




2022-04-22 07:38.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:38.47 [info     ] FQE_20220422073840: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00012722957965939543, 'time_algorithm_update': 0.0022337055483529736, 'loss': 0.05309177852820519, 'time_step': 0.002424022486043531, 'init_value': -5.098881721496582, 'ave_value': -5.6765993808035375, 'soft_opc': nan} step=2408




2022-04-22 07:38.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:38.48 [info     ] FQE_20220422073840: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.0001393584317939226, 'time_algorithm_update': 0.002535885156587113, 'loss': 0.06676989628359416, 'time_step': 0.0027407075083532998, 'init_value': -5.539638519287109, 'ave_value': -6.3326294512395656, 'soft_opc': nan} step=2752




2022-04-22 07:38.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:38.49 [info     ] FQE_20220422073840: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00012720185656880223, 'time_algorithm_update': 0.0024123392825902896, 'loss': 0.08392739036596965, 'time_step': 0.00260282602421073, 'init_value': -6.024567604064941, 'ave_value': -6.9620482746578825, 'soft_opc': nan} step=3096




2022-04-22 07:38.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:38.49 [info     ] FQE_20220422073840: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00012858315955760867, 'time_algorithm_update': 0.0022609712079513906, 'loss': 0.10010343474379285, 'time_step': 0.0024475573107253673, 'init_value': -6.4073028564453125, 'ave_value': -7.647419650019524, 'soft_opc': nan} step=3440




2022-04-22 07:38.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:38.51 [info     ] FQE_20220422073840: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00013851287753082985, 'time_algorithm_update': 0.00269391093143197, 'loss': 0.1179532113019377, 'time_step': 0.002896623556004014, 'init_value': -6.785120487213135, 'ave_value': -8.167144263031062, 'soft_opc': nan} step=3784




2022-04-22 07:38.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:38.51 [info     ] FQE_20220422073840: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.0001249257908311001, 'time_algorithm_update': 0.002230888189271439, 'loss': 0.14531959496373528, 'time_step': 0.002413445433904958, 'init_value': -7.213828086853027, 'ave_value': -8.769915009215307, 'soft_opc': nan} step=4128




2022-04-22 07:38.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:38.52 [info     ] FQE_20220422073840: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00011917879415112872, 'time_algorithm_update': 0.0020488368910412456, 'loss': 0.16572782165522493, 'time_step': 0.002221096393673919, 'init_value': -7.691803455352783, 'ave_value': -9.358853473544103, 'soft_opc': nan} step=4472




2022-04-22 07:38.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:38.53 [info     ] FQE_20220422073840: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00012887702431789663, 'time_algorithm_update': 0.002348809741264166, 'loss': 0.19674253059811023, 'time_step': 0.0025415067062821498, 'init_value': -8.267610549926758, 'ave_value': -10.276438667203996, 'soft_opc': nan} step=4816




2022-04-22 07:38.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:38.54 [info     ] FQE_20220422073840: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00012154634608778842, 'time_algorithm_update': 0.002202556576839713, 'loss': 0.22257479028451407, 'time_step': 0.0023830664712329243, 'init_value': -8.532371520996094, 'ave_value': -10.883500973807715, 'soft_opc': nan} step=5160




2022-04-22 07:38.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:38.55 [info     ] FQE_20220422073840: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.0001252473786819813, 'time_algorithm_update': 0.0021814482156620467, 'loss': 0.24213717253012365, 'time_step': 0.002363411493079607, 'init_value': -9.295439720153809, 'ave_value': -11.917113936658914, 'soft_opc': nan} step=5504




2022-04-22 07:38.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:38.56 [info     ] FQE_20220422073840: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00012875712195108103, 'time_algorithm_update': 0.002281678970469985, 'loss': 0.26733116307404153, 'time_step': 0.002474216527717058, 'init_value': -9.016035079956055, 'ave_value': -12.056673395772556, 'soft_opc': nan} step=5848




2022-04-22 07:38.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:38.57 [info     ] FQE_20220422073840: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00012693779413090197, 'time_algorithm_update': 0.0021131530750629515, 'loss': 0.2683425797745152, 'time_step': 0.002299695513969244, 'init_value': -9.648193359375, 'ave_value': -12.908659134031618, 'soft_opc': nan} step=6192




2022-04-22 07:38.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:38.58 [info     ] FQE_20220422073840: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00012594392133313557, 'time_algorithm_update': 0.002125149549439896, 'loss': 0.2845909486329833, 'time_step': 0.0023098220658856767, 'init_value': -10.265275955200195, 'ave_value': -13.849146326317511, 'soft_opc': nan} step=6536




2022-04-22 07:38.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:38.59 [info     ] FQE_20220422073840: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00012947099153385607, 'time_algorithm_update': 0.00224577548891999, 'loss': 0.29692070684287436, 'time_step': 0.002438023339870364, 'init_value': -10.760558128356934, 'ave_value': -14.4751606097168, 'soft_opc': nan} step=6880




2022-04-22 07:38.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.00 [info     ] FQE_20220422073840: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00013158557026885276, 'time_algorithm_update': 0.0024037215598793917, 'loss': 0.3062171404044209, 'time_step': 0.0025986273621403893, 'init_value': -11.385546684265137, 'ave_value': -15.44799241500462, 'soft_opc': nan} step=7224




2022-04-22 07:39.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.01 [info     ] FQE_20220422073840: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00013735128003497456, 'time_algorithm_update': 0.0025804500247156898, 'loss': 0.30684875194957956, 'time_step': 0.002778740123260853, 'init_value': -11.924577713012695, 'ave_value': -16.125656364309425, 'soft_opc': nan} step=7568




2022-04-22 07:39.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.02 [info     ] FQE_20220422073840: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00013098467228024504, 'time_algorithm_update': 0.0023843847041906314, 'loss': 0.3157080101055028, 'time_step': 0.0025773907816687295, 'init_value': -12.610039710998535, 'ave_value': -16.917920285391176, 'soft_opc': nan} step=7912




2022-04-22 07:39.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.03 [info     ] FQE_20220422073840: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.0001362174056297125, 'time_algorithm_update': 0.002663573553395826, 'loss': 0.3190487757336002, 'time_step': 0.0028666396473729333, 'init_value': -12.968233108520508, 'ave_value': -17.278519195106785, 'soft_opc': nan} step=8256




2022-04-22 07:39.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.04 [info     ] FQE_20220422073840: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.0001308917999267578, 'time_algorithm_update': 0.002434905878333158, 'loss': 0.32694971812130924, 'time_step': 0.002631373876749083, 'init_value': -13.41519546508789, 'ave_value': -17.825883309965164, 'soft_opc': nan} step=8600




2022-04-22 07:39.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.05 [info     ] FQE_20220422073840: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00013155992641005406, 'time_algorithm_update': 0.0022837776084278904, 'loss': 0.34124934716626654, 'time_step': 0.0024758022884989895, 'init_value': -14.473055839538574, 'ave_value': -18.708258769836124, 'soft_opc': nan} step=8944




2022-04-22 07:39.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.05 [info     ] FQE_20220422073840: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00012434637823770212, 'time_algorithm_update': 0.002319168212801911, 'loss': 0.35120574693563716, 'time_step': 0.002507059380065563, 'init_value': -15.226332664489746, 'ave_value': -19.38469087474577, 'soft_opc': nan} step=9288




2022-04-22 07:39.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.06 [info     ] FQE_20220422073840: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.0001281007777812869, 'time_algorithm_update': 0.0022153078123580577, 'loss': 0.3640850164364417, 'time_step': 0.002402753330940424, 'init_value': -15.613670349121094, 'ave_value': -19.675263009710356, 'soft_opc': nan} step=9632




2022-04-22 07:39.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.07 [info     ] FQE_20220422073840: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.0001270909642064294, 'time_algorithm_update': 0.0022735325402991717, 'loss': 0.3819418790382008, 'time_step': 0.002462514611177666, 'init_value': -16.74124526977539, 'ave_value': -20.351813833600094, 'soft_opc': nan} step=9976




2022-04-22 07:39.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.08 [info     ] FQE_20220422073840: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00012870098269262978, 'time_algorithm_update': 0.002378348694291226, 'loss': 0.3969813543534296, 'time_step': 0.00257088070692018, 'init_value': -17.873180389404297, 'ave_value': -20.88329971313124, 'soft_opc': nan} step=10320




2022-04-22 07:39.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.09 [info     ] FQE_20220422073840: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.0001320762689723525, 'time_algorithm_update': 0.0023866995822551637, 'loss': 0.42366302991827387, 'time_step': 0.0025801360607147217, 'init_value': -18.099807739257812, 'ave_value': -20.78926620002215, 'soft_opc': nan} step=10664




2022-04-22 07:39.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.10 [info     ] FQE_20220422073840: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00012075000031049861, 'time_algorithm_update': 0.002049063527306845, 'loss': 0.4387740900111926, 'time_step': 0.0022260345691858334, 'init_value': -19.003360748291016, 'ave_value': -21.11411098934173, 'soft_opc': nan} step=11008




2022-04-22 07:39.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.11 [info     ] FQE_20220422073840: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00012532916179923125, 'time_algorithm_update': 0.0021026342414146248, 'loss': 0.45880802959030453, 'time_step': 0.0022876449795656427, 'init_value': -20.001625061035156, 'ave_value': -21.674361067953697, 'soft_opc': nan} step=11352




2022-04-22 07:39.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.12 [info     ] FQE_20220422073840: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00013532194980355196, 'time_algorithm_update': 0.0024753150551818136, 'loss': 0.4704338909224282, 'time_step': 0.0026756143847177197, 'init_value': -20.62024688720703, 'ave_value': -21.81951986620505, 'soft_opc': nan} step=11696




2022-04-22 07:39.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.13 [info     ] FQE_20220422073840: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00012949109077453613, 'time_algorithm_update': 0.0023697413677393002, 'loss': 0.49285870892818756, 'time_step': 0.0025631314100221145, 'init_value': -21.60324478149414, 'ave_value': -22.24036229048176, 'soft_opc': nan} step=12040




2022-04-22 07:39.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.14 [info     ] FQE_20220422073840: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00013199171354604322, 'time_algorithm_update': 0.002362843169722446, 'loss': 0.5394562101966247, 'time_step': 0.002560363259426383, 'init_value': -22.517580032348633, 'ave_value': -22.887458575374303, 'soft_opc': nan} step=12384




2022-04-22 07:39.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.15 [info     ] FQE_20220422073840: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00013060209363005882, 'time_algorithm_update': 0.0023421430310537647, 'loss': 0.5620454710171839, 'time_step': 0.0025354062401971153, 'init_value': -23.832237243652344, 'ave_value': -23.65684436522424, 'soft_opc': nan} step=12728




2022-04-22 07:39.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.16 [info     ] FQE_20220422073840: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00012856167416239894, 'time_algorithm_update': 0.002334134523258653, 'loss': 0.5844272768536453, 'time_step': 0.0025257260300392327, 'init_value': -24.582250595092773, 'ave_value': -24.27557629892762, 'soft_opc': nan} step=13072




2022-04-22 07:39.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.17 [info     ] FQE_20220422073840: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00011990098066108172, 'time_algorithm_update': 0.0020949798961018406, 'loss': 0.6088893252294943, 'time_step': 0.002273743928864945, 'init_value': -25.129928588867188, 'ave_value': -24.76881422697449, 'soft_opc': nan} step=13416




2022-04-22 07:39.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.17 [info     ] FQE_20220422073840: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00012358260709185932, 'time_algorithm_update': 0.0020732422207677087, 'loss': 0.6274070259411061, 'time_step': 0.002255127180454343, 'init_value': -25.32865333557129, 'ave_value': -24.898354876960987, 'soft_opc': nan} step=13760




2022-04-22 07:39.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.18 [info     ] FQE_20220422073840: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.0001299804033235062, 'time_algorithm_update': 0.0023595697658006535, 'loss': 0.6306363765561823, 'time_step': 0.0025500350220258846, 'init_value': -26.00920867919922, 'ave_value': -25.455692349834504, 'soft_opc': nan} step=14104




2022-04-22 07:39.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.19 [info     ] FQE_20220422073840: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00012949524923812513, 'time_algorithm_update': 0.002443703801132912, 'loss': 0.6574982206911109, 'time_step': 0.002638720495756282, 'init_value': -26.31729507446289, 'ave_value': -25.4168726362976, 'soft_opc': nan} step=14448




2022-04-22 07:39.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.20 [info     ] FQE_20220422073840: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00013150170791980832, 'time_algorithm_update': 0.0024245776409326596, 'loss': 0.6756400858149539, 'time_step': 0.002619628989419272, 'init_value': -26.670948028564453, 'ave_value': -25.859309867837442, 'soft_opc': nan} step=14792




2022-04-22 07:39.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.21 [info     ] FQE_20220422073840: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00012749502825182537, 'time_algorithm_update': 0.0022235387979551804, 'loss': 0.701155744049005, 'time_step': 0.002411438975223275, 'init_value': -27.44106674194336, 'ave_value': -26.257652885932476, 'soft_opc': nan} step=15136




2022-04-22 07:39.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.22 [info     ] FQE_20220422073840: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00012618372606676678, 'time_algorithm_update': 0.002269878636958987, 'loss': 0.7246078322208378, 'time_step': 0.002453991146974785, 'init_value': -27.59814453125, 'ave_value': -26.261296503278974, 'soft_opc': nan} step=15480




2022-04-22 07:39.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.23 [info     ] FQE_20220422073840: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.0001380443572998047, 'time_algorithm_update': 0.002698495637538821, 'loss': 0.7268228440133985, 'time_step': 0.0029024807519690936, 'init_value': -27.790464401245117, 'ave_value': -26.3859139828863, 'soft_opc': nan} step=15824




2022-04-22 07:39.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.24 [info     ] FQE_20220422073840: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.0001336703466814618, 'time_algorithm_update': 0.0024343735949937688, 'loss': 0.7261231692648644, 'time_step': 0.0026358518489571505, 'init_value': -28.035737991333008, 'ave_value': -26.714670062093653, 'soft_opc': nan} step=16168




2022-04-22 07:39.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.25 [info     ] FQE_20220422073840: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00012557797653730526, 'time_algorithm_update': 0.0021810822708662166, 'loss': 0.7456312730670148, 'time_step': 0.0023672442103541175, 'init_value': -28.318737030029297, 'ave_value': -26.94086987880134, 'soft_opc': nan} step=16512




2022-04-22 07:39.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.26 [info     ] FQE_20220422073840: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00012520024942797283, 'time_algorithm_update': 0.002242703770482263, 'loss': 0.7624774463258164, 'time_step': 0.002427939065667086, 'init_value': -28.735870361328125, 'ave_value': -27.329293116792062, 'soft_opc': nan} step=16856




2022-04-22 07:39.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:39.27 [info     ] FQE_20220422073840: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00012366300405457963, 'time_algorithm_update': 0.0021036870257799016, 'loss': 0.7773132446915084, 'time_step': 0.0022861077341922494, 'init_value': -28.907304763793945, 'ave_value': -27.591811264013486, 'soft_opc': nan} step=17200




2022-04-22 07:39.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422073840/model_17200.pt
search iteration:  31
using hyper params:  [0.007460172379283987, 0.004034596908210848, 9.304126838021404e-05, 5]
2022-04-22 07:39.27 [debug    ] RoundIterator is selected.
2022-04-22 07:39.27 [info     ] Directory is created at d3rlpy_logs/CQL_20220422073927
2022-04-22 07:39.27 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 07:39.27 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 07:39.27 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422073927/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.007460172379283987, 'actor_optim_factory': {'optim

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:39.40 [info     ] CQL_20220422073927: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.00038975649486387395, 'time_algorithm_update': 0.03482698980783452, 'temp_loss': 4.9093060066245195, 'temp': 0.9836138864128576, 'alpha_loss': -17.740333722505955, 'alpha': 1.0177293161436312, 'critic_loss': 110.37136496836051, 'actor_loss': 3.2015651038207245, 'time_step': 0.035311248950186495, 'td_error': 1.2872665048288858, 'init_value': -6.981492519378662, 'ave_value': -6.440820238385624} step=346
2022-04-22 07:39.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:39.52 [info     ] CQL_20220422073927: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.0003752253648173602, 'time_algorithm_update': 0.03501925716510398, 'temp_loss': 4.8041898349806065, 'temp': 0.952546316247455, 'alpha_loss': -18.369106992820782, 'alpha': 1.0541569321830837, 'critic_loss': 151.50061308579637, 'actor_loss': 8.595635124713699, 'time_step': 0.0354873975577382, 'td_error': 1.401041815158424, 'init_value': -11.44303035736084, 'ave_value': -10.781401850115694} step=692
2022-04-22 07:39.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:40.04 [info     ] CQL_20220422073927: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.0003649526937848571, 'time_algorithm_update': 0.03327070288575454, 'temp_loss': 4.659196367153543, 'temp': 0.9230596007983809, 'alpha_loss': -19.03850587944075, 'alpha': 1.0924064237947408, 'critic_loss': 288.777766497838, 'actor_loss': 12.57580775608217, 'time_step': 0.03372613887566363, 'td_error': 1.4984862057207362, 'init_value': -14.509749412536621, 'ave_value': -13.72252651574545} step=1038
2022-04-22 07:40.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:40.17 [info     ] CQL_20220422073927: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.00037802643858628464, 'time_algorithm_update': 0.03426565875896829, 'temp_loss': 4.516931478687794, 'temp': 0.8948433164916286, 'alpha_loss': -19.72742154556892, 'alpha': 1.1325533390045166, 'critic_loss': 491.0653630074738, 'actor_loss': 14.361356958488509, 'time_step': 0.0347376241849337, 'td_error': 1.4884926587954717, 'init_value': -14.974628448486328, 'ave_value': -14.365328636594494} step=1384
2022-04-22 07:40.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:40.29 [info     ] CQL_20220422073927: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.0003761425183687596, 'time_algorithm_update': 0.03422420079997509, 'temp_loss': 4.382095928137013, 'temp': 0.8677734972080055, 'alpha_loss': -20.451953000415955, 'alpha': 1.1746293464837048, 'critic_loss': 764.0235634511606, 'actor_loss': 12.87292361672903, 'time_step': 0.0346964390980715, 'td_error': 1.390458344571357, 'init_value': -11.944476127624512, 'ave_value': -11.561713968198296} step=1730
2022-04-22 07:40.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:40.41 [info     ] CQL_20220422073927: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.000363510468102604, 'time_algorithm_update': 0.033396231645793584, 'temp_loss': 4.250612992082717, 'temp': 0.8417441863889639, 'alpha_loss': -21.19898432252035, 'alpha': 1.2186532044686333, 'critic_loss': 1119.106755074738, 'actor_loss': 8.386060415664849, 'time_step': 0.03384991463898234, 'td_error': 1.314535485961086, 'init_value': -7.5198073387146, 'ave_value': -7.388224059363122} step=2076
2022-04-22 07:40.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:40.53 [info     ] CQL_20220422073927: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.0003682126888650001, 'time_algorithm_update': 0.03321226001474899, 'temp_loss': 4.124783145209958, 'temp': 0.8166736090458886, 'alpha_loss': -21.99454340631562, 'alpha': 1.2646774570376886, 'critic_loss': 1520.0269577820177, 'actor_loss': 5.215073464233751, 'time_step': 0.03367717968935222, 'td_error': 1.3034412147197292, 'init_value': -6.217355251312256, 'ave_value': -6.1577969435551205} step=2422
2022-04-22 07:40.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:41.05 [info     ] CQL_20220422073927: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.0003796602260170644, 'time_algorithm_update': 0.033698449245078026, 'temp_loss': 4.002780868827952, 'temp': 0.7924891229654323, 'alpha_loss': -22.82727128508463, 'alpha': 1.3127519322957606, 'critic_loss': 1904.9545602081828, 'actor_loss': 4.718151937330389, 'time_step': 0.03416889320219183, 'td_error': 1.3090683866989512, 'init_value': -6.282577991485596, 'ave_value': -6.238502473048883} step=2768
2022-04-22 07:41.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:41.18 [info     ] CQL_20220422073927: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.00038649374349957946, 'time_algorithm_update': 0.034814778779972494, 'temp_loss': 3.8840586160648765, 'temp': 0.7691381866532254, 'alpha_loss': -23.702195162028936, 'alpha': 1.3629259274185048, 'critic_loss': 2271.075304681855, 'actor_loss': 4.846261108541764, 'time_step': 0.035290577508121555, 'td_error': 1.3134969338289288, 'init_value': -6.333973407745361, 'ave_value': -6.299611099708018} step=3114
2022-04-22 07:41.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:41.30 [info     ] CQL_20220422073927: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.00038334951235379785, 'time_algorithm_update': 0.03500353325309092, 'temp_loss': 3.771388502479289, 'temp': 0.7465603015670886, 'alpha_loss': -24.612019439653164, 'alpha': 1.4152393213586312, 'critic_loss': 2636.82983680681, 'actor_loss': 5.072200357569435, 'time_step': 0.035481016070856526, 'td_error': 1.3198145472493903, 'init_value': -6.534792423248291, 'ave_value': -6.513223385076039} step=3460
2022-04-22 07:41.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:41.43 [info     ] CQL_20220422073927: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.0003711770724698987, 'time_algorithm_update': 0.0345119906298687, 'temp_loss': 3.660534925543504, 'temp': 0.7247106308881948, 'alpha_loss': -25.55861885423605, 'alpha': 1.4697446681860555, 'critic_loss': 3018.244307854272, 'actor_loss': 5.431902720059963, 'time_step': 0.03497914289463462, 'td_error': 1.3293650494682043, 'init_value': -7.023918628692627, 'ave_value': -7.002958542184843} step=3806
2022-04-22 07:41.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:41.56 [info     ] CQL_20220422073927: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.0003761962659097131, 'time_algorithm_update': 0.035043369138860976, 'temp_loss': 3.5533966914766784, 'temp': 0.7035585932993476, 'alpha_loss': -26.547095612983483, 'alpha': 1.5264933925832627, 'critic_loss': 3416.4379974541635, 'actor_loss': 5.827573398634188, 'time_step': 0.03551271402766939, 'td_error': 1.339316956775134, 'init_value': -7.502641201019287, 'ave_value': -7.481618522998562} step=4152
2022-04-22 07:41.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:42.08 [info     ] CQL_20220422073927: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.00037220172110320515, 'time_algorithm_update': 0.035157763889070194, 'temp_loss': 3.4502917479917494, 'temp': 0.6830639735811708, 'alpha_loss': -27.571767476252738, 'alpha': 1.5855497866007633, 'critic_loss': 3809.2608289773752, 'actor_loss': 6.2552949679380205, 'time_step': 0.03562553700684123, 'td_error': 1.34772668764915, 'init_value': -7.849854469299316, 'ave_value': -7.835259560467} step=4498
2022-04-22 07:42.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:42.21 [info     ] CQL_20220422073927: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.0003916989861196176, 'time_algorithm_update': 0.03614191512841021, 'temp_loss': 3.34973408789993, 'temp': 0.6631964689734354, 'alpha_loss': -28.639478082601734, 'alpha': 1.6469849534117418, 'critic_loss': 4213.917549618407, 'actor_loss': 6.730379425721361, 'time_step': 0.03662857706147122, 'td_error': 1.3559856877083216, 'init_value': -8.178250312805176, 'ave_value': -8.166895887104467} step=4844
2022-04-22 07:42.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:42.34 [info     ] CQL_20220422073927: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.00038135602984125215, 'time_algorithm_update': 0.03518536876391813, 'temp_loss': 3.251749931732354, 'temp': 0.6439310660251992, 'alpha_loss': -29.747545413199187, 'alpha': 1.7108680478410225, 'critic_loss': 4616.831411725524, 'actor_loss': 7.231721792606949, 'time_step': 0.035653798566388256, 'td_error': 1.3671308626365108, 'init_value': -8.646889686584473, 'ave_value': -8.63884811717902} step=5190
2022-04-22 07:42.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:42.47 [info     ] CQL_20220422073927: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.0003633650741136143, 'time_algorithm_update': 0.03436673445508659, 'temp_loss': 3.1573954613911623, 'temp': 0.6252467828678947, 'alpha_loss': -30.905194106129553, 'alpha': 1.7772905103044014, 'critic_loss': 4972.761025842215, 'actor_loss': 7.759304471098619, 'time_step': 0.03482591554608648, 'td_error': 1.3808344137474617, 'init_value': -9.246925354003906, 'ave_value': -9.236466346548992} step=5536
2022-04-22 07:42.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:42.59 [info     ] CQL_20220422073927: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.0004016974068790502, 'time_algorithm_update': 0.03434069583870772, 'temp_loss': 3.064790988933144, 'temp': 0.60711794247517, 'alpha_loss': -32.09978214440318, 'alpha': 1.8463358048758756, 'critic_loss': 5312.580025909953, 'actor_loss': 8.347015022542434, 'time_step': 0.03483789435700874, 'td_error': 1.395352509903253, 'init_value': -9.825359344482422, 'ave_value': -9.815865989113487} step=5882
2022-04-22 07:42.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:43.12 [info     ] CQL_20220422073927: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.00037636302110087664, 'time_algorithm_update': 0.03525559750595534, 'temp_loss': 2.9778284868063953, 'temp': 0.5895209033365194, 'alpha_loss': -33.35254242516667, 'alpha': 1.9181021055734226, 'critic_loss': 5701.758385454299, 'actor_loss': 8.965212455374656, 'time_step': 0.03572939723902355, 'td_error': 1.409113953276392, 'init_value': -10.338363647460938, 'ave_value': -10.330646674064393} step=6228
2022-04-22 07:43.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:43.24 [info     ] CQL_20220422073927: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.0003749421566207974, 'time_algorithm_update': 0.03539198051298285, 'temp_loss': 2.8911934946313758, 'temp': 0.5724376243662972, 'alpha_loss': -34.65504161195259, 'alpha': 1.992697425315835, 'critic_loss': 6096.158706929642, 'actor_loss': 9.569510247666022, 'time_step': 0.035861224108348695, 'td_error': 1.4262942342546057, 'init_value': -10.979427337646484, 'ave_value': -10.970591229203189} step=6574
2022-04-22 07:43.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:43.37 [info     ] CQL_20220422073927: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.00039162456644752813, 'time_algorithm_update': 0.03588882415969937, 'temp_loss': 2.8067506916950204, 'temp': 0.5558601070001635, 'alpha_loss': -35.999819860292995, 'alpha': 2.0702106242924065, 'critic_loss': 6440.670817998103, 'actor_loss': 10.192137139381012, 'time_step': 0.036371768554511096, 'td_error': 1.4442540785238602, 'init_value': -11.610135078430176, 'ave_value': -11.602583202110642} step=6920
2022-04-22 07:43.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:43.50 [info     ] CQL_20220422073927: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.0003990706680827058, 'time_algorithm_update': 0.03521453093931165, 'temp_loss': 2.7267134802878936, 'temp': 0.5397652768330767, 'alpha_loss': -37.39926408756675, 'alpha': 2.150751614157175, 'critic_loss': 6838.410788475434, 'actor_loss': 10.86192787313737, 'time_step': 0.035701783406252116, 'td_error': 1.4625143099481939, 'init_value': -12.225471496582031, 'ave_value': -12.218699745116725} step=7266
2022-04-22 07:43.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:44.03 [info     ] CQL_20220422073927: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.00037077603312586085, 'time_algorithm_update': 0.03507905268255686, 'temp_loss': 2.647134888378871, 'temp': 0.5241368435366305, 'alpha_loss': -38.85073730160046, 'alpha': 2.2344322776518806, 'critic_loss': 7146.644230660676, 'actor_loss': 11.496590732839065, 'time_step': 0.035540501506342366, 'td_error': 1.4788017513480554, 'init_value': -12.73410701751709, 'ave_value': -12.72995312230855} step=7612
2022-04-22 07:44.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:44.16 [info     ] CQL_20220422073927: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.0003868754888545571, 'time_algorithm_update': 0.035872801190855876, 'temp_loss': 2.5703866254387564, 'temp': 0.5089648631266775, 'alpha_loss': -40.363751328749466, 'alpha': 2.3213713995983145, 'critic_loss': 7426.377204321712, 'actor_loss': 12.153796044388258, 'time_step': 0.03635959198020097, 'td_error': 1.5018975514385187, 'init_value': -13.478987693786621, 'ave_value': -13.47310190865529} step=7958
2022-04-22 07:44.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:44.28 [info     ] CQL_20220422073927: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.0003875521566137413, 'time_algorithm_update': 0.035239082540390806, 'temp_loss': 2.4964903710205433, 'temp': 0.4942349205816412, 'alpha_loss': -41.936290807117615, 'alpha': 2.41171875785541, 'critic_loss': 7698.536894869942, 'actor_loss': 12.831367688371955, 'time_step': 0.035718246691488806, 'td_error': 1.521376030129974, 'init_value': -14.051229476928711, 'ave_value': -14.048083176664855} step=8304
2022-04-22 07:44.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:44.41 [info     ] CQL_20220422073927: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.0003709924014317507, 'time_algorithm_update': 0.03529283421577057, 'temp_loss': 2.424542746102879, 'temp': 0.4799293327021461, 'alpha_loss': -43.56993411731169, 'alpha': 2.5055748692826727, 'critic_loss': 8033.700013265445, 'actor_loss': 13.572643359961537, 'time_step': 0.035761095195836416, 'td_error': 1.5452409030346286, 'init_value': -14.734720230102539, 'ave_value': -14.732035795078847} step=8650
2022-04-22 07:44.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:44.54 [info     ] CQL_20220422073927: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.0003696066795746026, 'time_algorithm_update': 0.034982905222501366, 'temp_loss': 2.3534448291524988, 'temp': 0.46604101962781364, 'alpha_loss': -45.27079292253263, 'alpha': 2.6031034936794657, 'critic_loss': 8333.352430398754, 'actor_loss': 14.211472916465274, 'time_step': 0.035441682517873066, 'td_error': 1.572125928079996, 'init_value': -15.504809379577637, 'ave_value': -15.499003788643734} step=8996
2022-04-22 07:44.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:45.06 [info     ] CQL_20220422073927: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.0003732938986982224, 'time_algorithm_update': 0.03532146236110974, 'temp_loss': 2.285919623567879, 'temp': 0.4525559313724496, 'alpha_loss': -47.03236327419391, 'alpha': 2.704430612525499, 'critic_loss': 8573.784116182713, 'actor_loss': 14.94377382091015, 'time_step': 0.035781355262491744, 'td_error': 1.6048719162774574, 'init_value': -16.406892776489258, 'ave_value': -16.39594256860942} step=9342
2022-04-22 07:45.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:45.19 [info     ] CQL_20220422073927: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.0003876431139907396, 'time_algorithm_update': 0.03578148480784686, 'temp_loss': 2.2198887470829693, 'temp': 0.43945986497608913, 'alpha_loss': -48.85298379446041, 'alpha': 2.809694551319056, 'critic_loss': 8782.825579163657, 'actor_loss': 15.646616147432713, 'time_step': 0.03626280506222234, 'td_error': 1.6223453593554575, 'init_value': -16.812868118286133, 'ave_value': -16.810166160435177} step=9688
2022-04-22 07:45.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:45.32 [info     ] CQL_20220422073927: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.00037016207083112243, 'time_algorithm_update': 0.035280946362225306, 'temp_loss': 2.1554971388998747, 'temp': 0.4267446781169472, 'alpha_loss': -50.757928341110315, 'alpha': 2.919041857554044, 'critic_loss': 9023.28272613349, 'actor_loss': 16.371351696852315, 'time_step': 0.035739450096394974, 'td_error': 1.649891887549362, 'init_value': -17.49991226196289, 'ave_value': -17.497510523499894} step=10034
2022-04-22 07:45.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:45.45 [info     ] CQL_20220422073927: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.00036593944351108086, 'time_algorithm_update': 0.03539501173647842, 'temp_loss': 2.0929911736119, 'temp': 0.4143974283182552, 'alpha_loss': -52.720962017257776, 'alpha': 3.0326480079937532, 'critic_loss': 9118.141195131864, 'actor_loss': 17.07093941131768, 'time_step': 0.03585019690452972, 'td_error': 1.6776200737171563, 'init_value': -18.162574768066406, 'ave_value': -18.1614789813545} step=10380
2022-04-22 07:45.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:45.57 [info     ] CQL_20220422073927: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.0003709303850383428, 'time_algorithm_update': 0.034912562094671876, 'temp_loss': 2.0322206075480906, 'temp': 0.4024089976197722, 'alpha_loss': -54.789947245162345, 'alpha': 3.150696070208026, 'critic_loss': 8695.777558255058, 'actor_loss': 17.633703689354693, 'time_step': 0.03537776015397441, 'td_error': 1.7016812051510877, 'init_value': -18.71952247619629, 'ave_value': -18.720721520559096} step=10726
2022-04-22 07:45.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:46.10 [info     ] CQL_20220422073927: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.000374441201976269, 'time_algorithm_update': 0.0345274223757617, 'temp_loss': 1.9728927384911246, 'temp': 0.39076871411993325, 'alpha_loss': -56.91664301039856, 'alpha': 3.2733477719257333, 'critic_loss': 7196.710796378252, 'actor_loss': 18.00235620950688, 'time_step': 0.03499916385363981, 'td_error': 1.7196592024681903, 'init_value': -19.10895347595215, 'ave_value': -19.112970658940352} step=11072
2022-04-22 07:46.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:46.22 [info     ] CQL_20220422073927: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.00036754360088723246, 'time_algorithm_update': 0.034779683013871916, 'temp_loss': 1.9166962194304935, 'temp': 0.37946378073595854, 'alpha_loss': -59.134958972820655, 'alpha': 3.4007667206615384, 'critic_loss': 5988.639899634212, 'actor_loss': 18.65865947745439, 'time_step': 0.03524281937262915, 'td_error': 1.7489149727277633, 'init_value': -19.778221130371094, 'ave_value': -19.780894326598457} step=11418
2022-04-22 07:46.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:46.35 [info     ] CQL_20220422073927: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.000367725515641229, 'time_algorithm_update': 0.034748694800228054, 'temp_loss': 1.8613537491401495, 'temp': 0.3684833662186055, 'alpha_loss': -61.44258333768459, 'alpha': 3.5331564983191517, 'critic_loss': 5395.658546050849, 'actor_loss': 19.577698040559802, 'time_step': 0.03521090022401314, 'td_error': 1.794504670975168, 'init_value': -20.801916122436523, 'ave_value': -20.79902885386366} step=11764
2022-04-22 07:46.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:46.48 [info     ] CQL_20220422073927: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.0003777873309361452, 'time_algorithm_update': 0.03568862766199718, 'temp_loss': 1.806770258900747, 'temp': 0.3578224748028496, 'alpha_loss': -63.826751797185466, 'alpha': 3.670702794383716, 'critic_loss': 5735.379754391709, 'actor_loss': 20.730634534979142, 'time_step': 0.036157193900532805, 'td_error': 1.8473702400560523, 'init_value': -21.918628692626953, 'ave_value': -21.911358658223993} step=12110
2022-04-22 07:46.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:47.00 [info     ] CQL_20220422073927: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.00037387754186729474, 'time_algorithm_update': 0.03514824988525038, 'temp_loss': 1.7547940370664432, 'temp': 0.34747132073248055, 'alpha_loss': -66.31151080269345, 'alpha': 3.8135803104136032, 'critic_loss': 6012.877942388457, 'actor_loss': 21.647038415677287, 'time_step': 0.035607699713955034, 'td_error': 1.8903748835308551, 'init_value': -22.758153915405273, 'ave_value': -22.75328002159151} step=12456
2022-04-22 07:47.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:47.13 [info     ] CQL_20220422073927: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.0003712707861310485, 'time_algorithm_update': 0.03511621566177103, 'temp_loss': 1.703861658972812, 'temp': 0.33741933736153423, 'alpha_loss': -68.89536744046073, 'alpha': 3.962030123423979, 'critic_loss': 6276.311835316565, 'actor_loss': 22.540451386071354, 'time_step': 0.03558023127517259, 'td_error': 1.932595461328726, 'init_value': -23.566503524780273, 'ave_value': -23.563196260028114} step=12802
2022-04-22 07:47.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:47.26 [info     ] CQL_20220422073927: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.0004014837948573118, 'time_algorithm_update': 0.035654476612289515, 'temp_loss': 1.6548024571010833, 'temp': 0.32765720534875903, 'alpha_loss': -71.58979426918691, 'alpha': 4.116267612214722, 'critic_loss': 6644.440501659592, 'actor_loss': 23.37793818788032, 'time_step': 0.036146708306549605, 'td_error': 1.9791035515011375, 'init_value': -24.444042205810547, 'ave_value': -24.438299857547012} step=13148
2022-04-22 07:47.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:47.38 [info     ] CQL_20220422073927: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.000369902291049847, 'time_algorithm_update': 0.0345924810178018, 'temp_loss': 1.6069429654606504, 'temp': 0.31817612637674186, 'alpha_loss': -74.36255326022992, 'alpha': 4.2765185846758715, 'critic_loss': 6971.374843354859, 'actor_loss': 24.155633590124935, 'time_step': 0.035053727254702176, 'td_error': 2.017290219811989, 'init_value': -25.12553596496582, 'ave_value': -25.122443575382007} step=13494
2022-04-22 07:47.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:47.51 [info     ] CQL_20220422073927: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.00037651599487128286, 'time_algorithm_update': 0.034568562673006446, 'temp_loss': 1.5604992064437426, 'temp': 0.30896985625600537, 'alpha_loss': -77.26631876774606, 'alpha': 4.442989070980535, 'critic_loss': 7324.869676887644, 'actor_loss': 24.980950487831425, 'time_step': 0.035034972808264585, 'td_error': 2.0672417985790243, 'init_value': -26.01927947998047, 'ave_value': -26.013838334714364} step=13840
2022-04-22 07:47.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:48.04 [info     ] CQL_20220422073927: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.00037992000579833984, 'time_algorithm_update': 0.03544333008672461, 'temp_loss': 1.5151437414174824, 'temp': 0.30003111055820664, 'alpha_loss': -80.2557465217017, 'alpha': 4.615928619583218, 'critic_loss': 7588.701653100163, 'actor_loss': 25.770740101103147, 'time_step': 0.03592327219902435, 'td_error': 2.103306738929286, 'init_value': -26.609094619750977, 'ave_value': -26.60866294488233} step=14186
2022-04-22 07:48.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:48.16 [info     ] CQL_20220422073927: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.0003686481817609313, 'time_algorithm_update': 0.034232182309806694, 'temp_loss': 1.4713821511048113, 'temp': 0.2913511446273396, 'alpha_loss': -83.39487236772659, 'alpha': 4.795600570006178, 'critic_loss': 7932.2677206579665, 'actor_loss': 26.516786680056182, 'time_step': 0.03469159699588842, 'td_error': 2.1554262002892166, 'init_value': -27.50152587890625, 'ave_value': -27.49760699193022} step=14532
2022-04-22 07:48.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:48.29 [info     ] CQL_20220422073927: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.0004007912784642567, 'time_algorithm_update': 0.0356692564969807, 'temp_loss': 1.4286884059106684, 'temp': 0.2829217977951028, 'alpha_loss': -86.6272974400162, 'alpha': 4.982267306719212, 'critic_loss': 7815.3988989681175, 'actor_loss': 27.059177200229183, 'time_step': 0.036166450880855495, 'td_error': 2.172439745839885, 'init_value': -27.751510620117188, 'ave_value': -27.753766589573967} step=14878
2022-04-22 07:48.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:48.42 [info     ] CQL_20220422073927: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.00036238659323984486, 'time_algorithm_update': 0.03477439094830111, 'temp_loss': 1.387760031774554, 'temp': 0.2747353335164186, 'alpha_loss': -89.99960666722644, 'alpha': 5.176179646067537, 'critic_loss': 6706.699743722905, 'actor_loss': 27.341512437500704, 'time_step': 0.03523009360870185, 'td_error': 2.1909160142912976, 'init_value': -28.047677993774414, 'ave_value': -28.051190356204383} step=15224
2022-04-22 07:48.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:48.54 [info     ] CQL_20220422073927: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.0004012019648028247, 'time_algorithm_update': 0.03478909021168086, 'temp_loss': 1.3475898673079607, 'temp': 0.2667850472162225, 'alpha_loss': -93.49505650514813, 'alpha': 5.377657756640043, 'critic_loss': 6117.307629888457, 'actor_loss': 27.97748906350549, 'time_step': 0.035277552687363815, 'td_error': 2.237726130750178, 'init_value': -28.81007194519043, 'ave_value': -28.81041804042796} step=15570
2022-04-22 07:48.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:49.07 [info     ] CQL_20220422073927: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.0003683918473348452, 'time_algorithm_update': 0.03462047728499925, 'temp_loss': 1.3083342151834785, 'temp': 0.2590667123050359, 'alpha_loss': -97.14433328264711, 'alpha': 5.586988049435478, 'critic_loss': 6173.455034377258, 'actor_loss': 28.809929886305262, 'time_step': 0.035081850310970594, 'td_error': 2.2963539950370717, 'init_value': -29.739675521850586, 'ave_value': -29.73683629567276} step=15916
2022-04-22 07:49.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:49.19 [info     ] CQL_20220422073927: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.00037405187683987477, 'time_algorithm_update': 0.03497557557387159, 'temp_loss': 1.2704737841049372, 'temp': 0.25157186324369013, 'alpha_loss': -100.91932942825935, 'alpha': 5.804466830512692, 'critic_loss': 6477.798035020774, 'actor_loss': 29.657658891181725, 'time_step': 0.03543537338345037, 'td_error': 2.3452935716735555, 'init_value': -30.479421615600586, 'ave_value': -30.477369963246648} step=16262
2022-04-22 07:49.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:49.32 [info     ] CQL_20220422073927: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.00038580329431963794, 'time_algorithm_update': 0.03514131783060945, 'temp_loss': 1.2336597039520396, 'temp': 0.24429344058553606, 'alpha_loss': -104.85670563802553, 'alpha': 6.0304174809097555, 'critic_loss': 6707.354157728956, 'actor_loss': 30.413889802260204, 'time_step': 0.03562054330902981, 'td_error': 2.394210420436223, 'init_value': -31.2049503326416, 'ave_value': -31.20409682795243} step=16608
2022-04-22 07:49.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:49.45 [info     ] CQL_20220422073927: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.0003709979140000536, 'time_algorithm_update': 0.035331369135421134, 'temp_loss': 1.198043450454756, 'temp': 0.23722602508832955, 'alpha_loss': -108.94682065048659, 'alpha': 6.265176074353256, 'critic_loss': 6845.872712416456, 'actor_loss': 31.092716272166697, 'time_step': 0.03579093610620223, 'td_error': 2.443593549508946, 'init_value': -31.929035186767578, 'ave_value': -31.92730545968227} step=16954
2022-04-22 07:49.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:49.57 [info     ] CQL_20220422073927: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.0003723471150921948, 'time_algorithm_update': 0.03561389928608272, 'temp_loss': 1.163653007477005, 'temp': 0.23036197745214307, 'alpha_loss': -113.17685075440158, 'alpha': 6.509049764258324, 'critic_loss': 7139.132527434068, 'actor_loss': 31.850069349211765, 'time_step': 0.036081099096750245, 'td_error': 2.4864990970704133, 'init_value': -32.5302734375, 'ave_value': -32.52962858725521} step=17300
2022-04-22 07:49.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422073927/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.5191004

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 07:49.58 [info     ] FQE_20220422074958: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00012784406363245952, 'time_algorithm_update': 0.002173726817211473, 'loss': 0.007227645432352122, 'time_step': 0.0023615432072834797, 'init_value': -0.12975212931632996, 'ave_value': -0.10540054748547976, 'soft_opc': nan} step=166




2022-04-22 07:49.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:49.59 [info     ] FQE_20220422074958: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00016080327780849962, 'time_algorithm_update': 0.0023495045052953512, 'loss': 0.0043811334846877905, 'time_step': 0.0025671232177550533, 'init_value': -0.1893916130065918, 'ave_value': -0.13210569349755238, 'soft_opc': nan} step=332




2022-04-22 07:49.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:49.59 [info     ] FQE_20220422074958: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.0001365635768476739, 'time_algorithm_update': 0.0024565889174679675, 'loss': 0.0036114878301699476, 'time_step': 0.002654240792056164, 'init_value': -0.23609335720539093, 'ave_value': -0.1637867907523639, 'soft_opc': nan} step=498




2022-04-22 07:49.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.00 [info     ] FQE_20220422074958: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.000146904623652079, 'time_algorithm_update': 0.002826401986271502, 'loss': 0.003352176339129906, 'time_step': 0.0030321012060326264, 'init_value': -0.295976459980011, 'ave_value': -0.2016614729488218, 'soft_opc': nan} step=664




2022-04-22 07:50.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.00 [info     ] FQE_20220422074958: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00012663760817194558, 'time_algorithm_update': 0.0020802911505641707, 'loss': 0.003080953857917563, 'time_step': 0.0022592573280794076, 'init_value': -0.35125017166137695, 'ave_value': -0.24328092646088686, 'soft_opc': nan} step=830




2022-04-22 07:50.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.00 [info     ] FQE_20220422074958: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00012302542307290686, 'time_algorithm_update': 0.0020329837339470185, 'loss': 0.002727603613460683, 'time_step': 0.002206780824316553, 'init_value': -0.35383257269859314, 'ave_value': -0.2397128246254749, 'soft_opc': nan} step=996




2022-04-22 07:50.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.01 [info     ] FQE_20220422074958: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.0001356515539697854, 'time_algorithm_update': 0.0022969561887074665, 'loss': 0.0025016653796108104, 'time_step': 0.0024911265775381802, 'init_value': -0.399408221244812, 'ave_value': -0.2820975725614541, 'soft_opc': nan} step=1162




2022-04-22 07:50.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.01 [info     ] FQE_20220422074958: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00012410261544836573, 'time_algorithm_update': 0.0020345794149191983, 'loss': 0.002227619298066136, 'time_step': 0.002211223165672946, 'init_value': -0.44742393493652344, 'ave_value': -0.34174951400171527, 'soft_opc': nan} step=1328




2022-04-22 07:50.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.02 [info     ] FQE_20220422074958: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00013078551694571254, 'time_algorithm_update': 0.0022778568497623304, 'loss': 0.0019050317557657655, 'time_step': 0.0024673378611185463, 'init_value': -0.45168620347976685, 'ave_value': -0.35149630371246254, 'soft_opc': nan} step=1494




2022-04-22 07:50.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.02 [info     ] FQE_20220422074958: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00012640062584934463, 'time_algorithm_update': 0.002216657960271261, 'loss': 0.0017156637559858072, 'time_step': 0.002401883343616164, 'init_value': -0.48188450932502747, 'ave_value': -0.3901006643735879, 'soft_opc': nan} step=1660




2022-04-22 07:50.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.03 [info     ] FQE_20220422074958: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00013034027743052287, 'time_algorithm_update': 0.002140376941267266, 'loss': 0.001568724729502237, 'time_step': 0.0023259053747337983, 'init_value': -0.5213969945907593, 'ave_value': -0.4392431356180627, 'soft_opc': nan} step=1826




2022-04-22 07:50.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.03 [info     ] FQE_20220422074958: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00012711731784314993, 'time_algorithm_update': 0.002146534172885389, 'loss': 0.0013844384378767628, 'time_step': 0.0023289947624666146, 'init_value': -0.5423526167869568, 'ave_value': -0.46853877104533553, 'soft_opc': nan} step=1992




2022-04-22 07:50.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.03 [info     ] FQE_20220422074958: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.0001252401305968503, 'time_algorithm_update': 0.0021140072719160333, 'loss': 0.001326221749866774, 'time_step': 0.0022936226373695464, 'init_value': -0.5662021636962891, 'ave_value': -0.5012369492099629, 'soft_opc': nan} step=2158




2022-04-22 07:50.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.04 [info     ] FQE_20220422074958: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.0001287173075848315, 'time_algorithm_update': 0.0023583547178521215, 'loss': 0.0012933160185376294, 'time_step': 0.0025453897843877955, 'init_value': -0.5883294343948364, 'ave_value': -0.5465415272828158, 'soft_opc': nan} step=2324




2022-04-22 07:50.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.04 [info     ] FQE_20220422074958: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00013612120984548545, 'time_algorithm_update': 0.0024558305740356445, 'loss': 0.001250785542126038, 'time_step': 0.002652227160442306, 'init_value': -0.5903271436691284, 'ave_value': -0.5436776671650911, 'soft_opc': nan} step=2490




2022-04-22 07:50.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.05 [info     ] FQE_20220422074958: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00013928959168583513, 'time_algorithm_update': 0.002579331398010254, 'loss': 0.001316636607032662, 'time_step': 0.0027823002941637152, 'init_value': -0.6145533323287964, 'ave_value': -0.5760398323370798, 'soft_opc': nan} step=2656




2022-04-22 07:50.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.05 [info     ] FQE_20220422074958: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00012490835534520895, 'time_algorithm_update': 0.0021555581724787332, 'loss': 0.001344200012593878, 'time_step': 0.002336599740637354, 'init_value': -0.6460963487625122, 'ave_value': -0.612827939886667, 'soft_opc': nan} step=2822




2022-04-22 07:50.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.06 [info     ] FQE_20220422074958: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00013438333947974514, 'time_algorithm_update': 0.0024599037974713796, 'loss': 0.0013439591489423695, 'time_step': 0.002655070948313518, 'init_value': -0.6900206208229065, 'ave_value': -0.6439865647001309, 'soft_opc': nan} step=2988




2022-04-22 07:50.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.06 [info     ] FQE_20220422074958: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00012679990515651474, 'time_algorithm_update': 0.0021074952849422595, 'loss': 0.0013644409019511238, 'time_step': 0.0022921203130699067, 'init_value': -0.7155593633651733, 'ave_value': -0.6738250298945754, 'soft_opc': nan} step=3154




2022-04-22 07:50.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.07 [info     ] FQE_20220422074958: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.0001358583749058735, 'time_algorithm_update': 0.0024714541722493, 'loss': 0.001373863292658836, 'time_step': 0.0026690758854509837, 'init_value': -0.7983067035675049, 'ave_value': -0.7606224998328331, 'soft_opc': nan} step=3320




2022-04-22 07:50.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.07 [info     ] FQE_20220422074958: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00012966667313173594, 'time_algorithm_update': 0.0023624753377523766, 'loss': 0.0014345351989165016, 'time_step': 0.0025502687477203735, 'init_value': -0.7934557795524597, 'ave_value': -0.7593687841114966, 'soft_opc': nan} step=3486




2022-04-22 07:50.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.08 [info     ] FQE_20220422074958: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00013935565948486328, 'time_algorithm_update': 0.0024367743227855267, 'loss': 0.0015530900728849537, 'time_step': 0.0026344075260392153, 'init_value': -0.8435238003730774, 'ave_value': -0.7978935271233052, 'soft_opc': nan} step=3652




2022-04-22 07:50.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.08 [info     ] FQE_20220422074958: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00012545844158494328, 'time_algorithm_update': 0.002242823681199407, 'loss': 0.0015309869442541848, 'time_step': 0.002424959676811494, 'init_value': -0.8875852823257446, 'ave_value': -0.8278856330962332, 'soft_opc': nan} step=3818




2022-04-22 07:50.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.09 [info     ] FQE_20220422074958: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00014224397130759367, 'time_algorithm_update': 0.002512946186295475, 'loss': 0.0017125008649512541, 'time_step': 0.002724512513861599, 'init_value': -0.9062706828117371, 'ave_value': -0.8491031761574852, 'soft_opc': nan} step=3984




2022-04-22 07:50.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.09 [info     ] FQE_20220422074958: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00012655717780791134, 'time_algorithm_update': 0.002238612577139613, 'loss': 0.0016714906446559984, 'time_step': 0.0024221058351447783, 'init_value': -0.9416618943214417, 'ave_value': -0.8820697657361224, 'soft_opc': nan} step=4150




2022-04-22 07:50.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.10 [info     ] FQE_20220422074958: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00013006020741290357, 'time_algorithm_update': 0.0024277431419096798, 'loss': 0.0018191784718466632, 'time_step': 0.0026154977729521602, 'init_value': -0.9848750233650208, 'ave_value': -0.9110030865884042, 'soft_opc': nan} step=4316




2022-04-22 07:50.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.10 [info     ] FQE_20220422074958: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00012831371950815958, 'time_algorithm_update': 0.0021680277514170452, 'loss': 0.0018286308200387508, 'time_step': 0.0023548531245036297, 'init_value': -0.9878418445587158, 'ave_value': -0.9205431656526015, 'soft_opc': nan} step=4482




2022-04-22 07:50.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.11 [info     ] FQE_20220422074958: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00012191806931093515, 'time_algorithm_update': 0.002082932426268796, 'loss': 0.0020019016751959212, 'time_step': 0.0022556408342108668, 'init_value': -1.0433032512664795, 'ave_value': -0.9587390297436499, 'soft_opc': nan} step=4648




2022-04-22 07:50.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.11 [info     ] FQE_20220422074958: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.0001285923532692783, 'time_algorithm_update': 0.002295715263090938, 'loss': 0.0020801725931302926, 'time_step': 0.00248269862439259, 'init_value': -1.09464430809021, 'ave_value': -1.0053304796350433, 'soft_opc': nan} step=4814




2022-04-22 07:50.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.11 [info     ] FQE_20220422074958: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.0001332932207957808, 'time_algorithm_update': 0.0023787754127778202, 'loss': 0.0021941526632521763, 'time_step': 0.0025767174111791403, 'init_value': -1.1339380741119385, 'ave_value': -1.043438908887339, 'soft_opc': nan} step=4980




2022-04-22 07:50.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.12 [info     ] FQE_20220422074958: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00011744125779852809, 'time_algorithm_update': 0.0019566653722740083, 'loss': 0.0022566610614485837, 'time_step': 0.002124013670955796, 'init_value': -1.150164246559143, 'ave_value': -1.054595897286325, 'soft_opc': nan} step=5146




2022-04-22 07:50.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.12 [info     ] FQE_20220422074958: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00013551367334572664, 'time_algorithm_update': 0.0026543240949331998, 'loss': 0.0021577329584888286, 'time_step': 0.002851096980543022, 'init_value': -1.2089037895202637, 'ave_value': -1.0960051153157209, 'soft_opc': nan} step=5312




2022-04-22 07:50.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.13 [info     ] FQE_20220422074958: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00012595107756465315, 'time_algorithm_update': 0.002120053911783609, 'loss': 0.002520312685224912, 'time_step': 0.0023010782448642225, 'init_value': -1.2295947074890137, 'ave_value': -1.1094189632180576, 'soft_opc': nan} step=5478




2022-04-22 07:50.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.13 [info     ] FQE_20220422074958: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.0001369757824633495, 'time_algorithm_update': 0.0024471340409244397, 'loss': 0.0025828797293052704, 'time_step': 0.002643042300120894, 'init_value': -1.2964918613433838, 'ave_value': -1.1694555142307066, 'soft_opc': nan} step=5644




2022-04-22 07:50.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.14 [info     ] FQE_20220422074958: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00013244008443441736, 'time_algorithm_update': 0.002423465970050858, 'loss': 0.0026852402714058667, 'time_step': 0.002616804766367717, 'init_value': -1.359933614730835, 'ave_value': -1.2227768718122363, 'soft_opc': nan} step=5810




2022-04-22 07:50.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.14 [info     ] FQE_20220422074958: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.0001327977123030697, 'time_algorithm_update': 0.0023690950439636967, 'loss': 0.0028490649352336853, 'time_step': 0.0025590916714036322, 'init_value': -1.3981993198394775, 'ave_value': -1.2487327062868858, 'soft_opc': nan} step=5976




2022-04-22 07:50.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.15 [info     ] FQE_20220422074958: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00013728888638048288, 'time_algorithm_update': 0.0024524783513632164, 'loss': 0.0031216609493705883, 'time_step': 0.002655938447239887, 'init_value': -1.4624629020690918, 'ave_value': -1.2960647759271098, 'soft_opc': nan} step=6142




2022-04-22 07:50.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.15 [info     ] FQE_20220422074958: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.0001438066183802593, 'time_algorithm_update': 0.0026809494179415414, 'loss': 0.0033577527847009183, 'time_step': 0.0028983297118221423, 'init_value': -1.5210931301116943, 'ave_value': -1.3503323745217408, 'soft_opc': nan} step=6308




2022-04-22 07:50.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.16 [info     ] FQE_20220422074958: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00013144188616649215, 'time_algorithm_update': 0.002326458333486534, 'loss': 0.003407827176992411, 'time_step': 0.0025189009057470114, 'init_value': -1.52012038230896, 'ave_value': -1.344626764684647, 'soft_opc': nan} step=6474




2022-04-22 07:50.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.16 [info     ] FQE_20220422074958: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00013026702834899166, 'time_algorithm_update': 0.002375298235789839, 'loss': 0.0034283467439418457, 'time_step': 0.0025644374180989094, 'init_value': -1.6034983396530151, 'ave_value': -1.420110249626744, 'soft_opc': nan} step=6640




2022-04-22 07:50.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.17 [info     ] FQE_20220422074958: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00012240783277764377, 'time_algorithm_update': 0.001979177256664598, 'loss': 0.0036385631446827606, 'time_step': 0.0021570777318563805, 'init_value': -1.645784854888916, 'ave_value': -1.4522614056059906, 'soft_opc': nan} step=6806




2022-04-22 07:50.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.17 [info     ] FQE_20220422074958: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.0001312925154904285, 'time_algorithm_update': 0.00231123688709305, 'loss': 0.003836056655861931, 'time_step': 0.0025018065808767296, 'init_value': -1.6423863172531128, 'ave_value': -1.4488040482138729, 'soft_opc': nan} step=6972




2022-04-22 07:50.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.18 [info     ] FQE_20220422074958: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.0001280724284160568, 'time_algorithm_update': 0.0023544308650924497, 'loss': 0.004176214665030017, 'time_step': 0.0025412734732570418, 'init_value': -1.6768910884857178, 'ave_value': -1.4743232163760038, 'soft_opc': nan} step=7138




2022-04-22 07:50.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.18 [info     ] FQE_20220422074958: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00013107564075883613, 'time_algorithm_update': 0.002245424741722015, 'loss': 0.004195467629771489, 'time_step': 0.0024364123861473725, 'init_value': -1.7401504516601562, 'ave_value': -1.5395882056371586, 'soft_opc': nan} step=7304




2022-04-22 07:50.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.19 [info     ] FQE_20220422074958: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00014209747314453125, 'time_algorithm_update': 0.002691524574555546, 'loss': 0.004116809625719977, 'time_step': 0.002901745129780597, 'init_value': -1.7767727375030518, 'ave_value': -1.5709593984189334, 'soft_opc': nan} step=7470




2022-04-22 07:50.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.19 [info     ] FQE_20220422074958: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.0001394289085663945, 'time_algorithm_update': 0.002675235989582108, 'loss': 0.00442163032077741, 'time_step': 0.0028779377420264556, 'init_value': -1.8229259252548218, 'ave_value': -1.6041374503760724, 'soft_opc': nan} step=7636




2022-04-22 07:50.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.20 [info     ] FQE_20220422074958: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.0001319373946592032, 'time_algorithm_update': 0.00223477920853948, 'loss': 0.004501273132704973, 'time_step': 0.0024257165839873165, 'init_value': -1.8420816659927368, 'ave_value': -1.620156221883791, 'soft_opc': nan} step=7802




2022-04-22 07:50.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.20 [info     ] FQE_20220422074958: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.0001397003610450101, 'time_algorithm_update': 0.0024178387170814605, 'loss': 0.0045663921117953706, 'time_step': 0.0026242661188883953, 'init_value': -1.912215232849121, 'ave_value': -1.6728586379203711, 'soft_opc': nan} step=7968




2022-04-22 07:50.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.21 [info     ] FQE_20220422074958: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00013331332838678933, 'time_algorithm_update': 0.0023207966103611223, 'loss': 0.004771401788527426, 'time_step': 0.0025148822600582995, 'init_value': -1.9747501611709595, 'ave_value': -1.7091851583740734, 'soft_opc': nan} step=8134




2022-04-22 07:50.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 07:50.21 [info     ] FQE_20220422074958: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.0001447286950536521, 'time_algorithm_update': 0.002719405185745423, 'loss': 0.0048906336121572495, 'time_step': 0.0029316307550453276, 'init_value': -1.9520809650421143, 'ave_value': -1.6930603971889426, 'soft_opc': nan} step=8300




2022-04-22 07:50.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422074958/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-22 07:50.21 [debug    ] RoundIterator is selected.
2022-04-22 07:50.21 [info     ] Directory is created at d3rlpy_logs/FQE_20220422075021
2022-04-22 07:50.21 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 07:50.21 [debug    ] Building models...
2022-04-22 07:50.21 [debug    ] Models have been built.
2022-04-22 07:50.21 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422075021/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}},

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 07:50.22 [info     ] FQE_20220422075021: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00012252081272213957, 'time_algorithm_update': 0.002037554286247076, 'loss': 0.021744292551586622, 'time_step': 0.0022174272426339083, 'init_value': -1.2181295156478882, 'ave_value': -1.221044057553953, 'soft_opc': nan} step=344




2022-04-22 07:50.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.23 [info     ] FQE_20220422075021: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00012629184612008028, 'time_algorithm_update': 0.0021622409654218095, 'loss': 0.019198221966710893, 'time_step': 0.002343826515730037, 'init_value': -1.9791069030761719, 'ave_value': -1.9616085566915906, 'soft_opc': nan} step=688




2022-04-22 07:50.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.24 [info     ] FQE_20220422075021: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00014334847760754963, 'time_algorithm_update': 0.002450581206831821, 'loss': 0.021951396844003265, 'time_step': 0.0026569477347440497, 'init_value': -2.760826587677002, 'ave_value': -2.7544162753197523, 'soft_opc': nan} step=1032




2022-04-22 07:50.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.25 [info     ] FQE_20220422075021: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00013155438179193542, 'time_algorithm_update': 0.002359607191972954, 'loss': 0.026714202490375313, 'time_step': 0.002551253451857456, 'init_value': -3.363459587097168, 'ave_value': -3.361086800332005, 'soft_opc': nan} step=1376




2022-04-22 07:50.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.26 [info     ] FQE_20220422075021: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.0001382446566293406, 'time_algorithm_update': 0.002482315135556598, 'loss': 0.03401902509138508, 'time_step': 0.0026850430078284686, 'init_value': -4.224176406860352, 'ave_value': -4.203833108737662, 'soft_opc': nan} step=1720




2022-04-22 07:50.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.27 [info     ] FQE_20220422075021: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00013139636017555414, 'time_algorithm_update': 0.0023588669854541157, 'loss': 0.039657408601691035, 'time_step': 0.0025503212629362595, 'init_value': -4.773853778839111, 'ave_value': -4.735880902342431, 'soft_opc': nan} step=2064




2022-04-22 07:50.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.28 [info     ] FQE_20220422075021: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001328379608864008, 'time_algorithm_update': 0.0024101311384245407, 'loss': 0.04842575774852966, 'time_step': 0.0026079069736391998, 'init_value': -5.614459037780762, 'ave_value': -5.599193660304085, 'soft_opc': nan} step=2408




2022-04-22 07:50.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.29 [info     ] FQE_20220422075021: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.0001371655353280001, 'time_algorithm_update': 0.002469609643137732, 'loss': 0.05833397846571495, 'time_step': 0.0026730818803920304, 'init_value': -6.226944446563721, 'ave_value': -6.158873296557582, 'soft_opc': nan} step=2752




2022-04-22 07:50.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.30 [info     ] FQE_20220422075021: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00013973962428957918, 'time_algorithm_update': 0.002536375162213348, 'loss': 0.06832677440793622, 'time_step': 0.0027415745480116023, 'init_value': -6.785214900970459, 'ave_value': -6.74282140797969, 'soft_opc': nan} step=3096




2022-04-22 07:50.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.31 [info     ] FQE_20220422075021: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00012910712596982024, 'time_algorithm_update': 0.0022932602915652963, 'loss': 0.08137948357305208, 'time_step': 0.002480834722518921, 'init_value': -7.663817405700684, 'ave_value': -7.631905417661979, 'soft_opc': nan} step=3440




2022-04-22 07:50.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.32 [info     ] FQE_20220422075021: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00012972465781278388, 'time_algorithm_update': 0.0022166142630022628, 'loss': 0.09424046194722313, 'time_step': 0.0024092841980069184, 'init_value': -8.432014465332031, 'ave_value': -8.32884160700036, 'soft_opc': nan} step=3784




2022-04-22 07:50.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.33 [info     ] FQE_20220422075021: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.0001350876896880394, 'time_algorithm_update': 0.0024340152740478516, 'loss': 0.11657328055196897, 'time_step': 0.0026287325592928156, 'init_value': -9.281807899475098, 'ave_value': -9.201168254786381, 'soft_opc': nan} step=4128




2022-04-22 07:50.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.34 [info     ] FQE_20220422075021: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00013045654740444449, 'time_algorithm_update': 0.00232784207477126, 'loss': 0.13896972348177156, 'time_step': 0.002515733935112177, 'init_value': -10.105094909667969, 'ave_value': -9.889794112355155, 'soft_opc': nan} step=4472




2022-04-22 07:50.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.35 [info     ] FQE_20220422075021: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00012939198072566541, 'time_algorithm_update': 0.0022440386372943257, 'loss': 0.16919016754146404, 'time_step': 0.0024353646954824756, 'init_value': -11.237897872924805, 'ave_value': -11.013639504595467, 'soft_opc': nan} step=4816




2022-04-22 07:50.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.36 [info     ] FQE_20220422075021: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00013483125110005223, 'time_algorithm_update': 0.0024194994638132494, 'loss': 0.19642837712985226, 'time_step': 0.002616416576296784, 'init_value': -11.874281883239746, 'ave_value': -11.572634723771213, 'soft_opc': nan} step=5160




2022-04-22 07:50.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.37 [info     ] FQE_20220422075021: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.0001291819783144219, 'time_algorithm_update': 0.0022692403127980787, 'loss': 0.22831617584885205, 'time_step': 0.0024560544379921847, 'init_value': -13.117725372314453, 'ave_value': -12.781027866140645, 'soft_opc': nan} step=5504




2022-04-22 07:50.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.37 [info     ] FQE_20220422075021: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00013320251952770145, 'time_algorithm_update': 0.0023955785950949024, 'loss': 0.2659626164755156, 'time_step': 0.0025913458923960842, 'init_value': -13.925182342529297, 'ave_value': -13.469803170940361, 'soft_opc': nan} step=5848




2022-04-22 07:50.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.39 [info     ] FQE_20220422075021: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00013739425082539403, 'time_algorithm_update': 0.0025166238463202187, 'loss': 0.3027469128868434, 'time_step': 0.002718951813010282, 'init_value': -14.559839248657227, 'ave_value': -14.100196292363778, 'soft_opc': nan} step=6192




2022-04-22 07:50.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.39 [info     ] FQE_20220422075021: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00013126051703164744, 'time_algorithm_update': 0.0022797480572101683, 'loss': 0.34525696873166706, 'time_step': 0.0024716355079828305, 'init_value': -15.589670181274414, 'ave_value': -15.200275498974296, 'soft_opc': nan} step=6536




2022-04-22 07:50.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.40 [info     ] FQE_20220422075021: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00012915702753288802, 'time_algorithm_update': 0.0022716917270837827, 'loss': 0.3708805330151822, 'time_step': 0.002463376799295115, 'init_value': -15.76695442199707, 'ave_value': -15.569161513424689, 'soft_opc': nan} step=6880




2022-04-22 07:50.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.41 [info     ] FQE_20220422075021: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00014551226482834926, 'time_algorithm_update': 0.0027440502200015756, 'loss': 0.40288600936358754, 'time_step': 0.002956350182378015, 'init_value': -16.842714309692383, 'ave_value': -16.65695656599091, 'soft_opc': nan} step=7224




2022-04-22 07:50.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.42 [info     ] FQE_20220422075021: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00012691700181295706, 'time_algorithm_update': 0.0021135474360266396, 'loss': 0.4418177133978366, 'time_step': 0.0022973951905272726, 'init_value': -17.16643524169922, 'ave_value': -17.08237770177877, 'soft_opc': nan} step=7568




2022-04-22 07:50.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.43 [info     ] FQE_20220422075021: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00012964772623638775, 'time_algorithm_update': 0.0023201406002044678, 'loss': 0.4798307885729903, 'time_step': 0.002511686363885569, 'init_value': -17.47358512878418, 'ave_value': -17.484622133254735, 'soft_opc': nan} step=7912




2022-04-22 07:50.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.44 [info     ] FQE_20220422075021: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.0001328150893366614, 'time_algorithm_update': 0.002300038587215335, 'loss': 0.5076107431127321, 'time_step': 0.0024952188480732054, 'init_value': -17.948148727416992, 'ave_value': -18.060349544372226, 'soft_opc': nan} step=8256




2022-04-22 07:50.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.45 [info     ] FQE_20220422075021: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00013590829316959826, 'time_algorithm_update': 0.002379927524300509, 'loss': 0.5403184433749249, 'time_step': 0.0025790645632632943, 'init_value': -18.355562210083008, 'ave_value': -18.5258640126321, 'soft_opc': nan} step=8600




2022-04-22 07:50.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.46 [info     ] FQE_20220422075021: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00013168675954951795, 'time_algorithm_update': 0.0022157978179842925, 'loss': 0.5735463947274311, 'time_step': 0.0024071273415587667, 'init_value': -18.690967559814453, 'ave_value': -18.979204948545174, 'soft_opc': nan} step=8944




2022-04-22 07:50.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.47 [info     ] FQE_20220422075021: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00012729403584502464, 'time_algorithm_update': 0.002122998930687128, 'loss': 0.5978139266588314, 'time_step': 0.002307948678038841, 'init_value': -18.94548225402832, 'ave_value': -19.462987093302747, 'soft_opc': nan} step=9288




2022-04-22 07:50.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.48 [info     ] FQE_20220422075021: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00013871525609216026, 'time_algorithm_update': 0.0024476335492244986, 'loss': 0.6323761217306952, 'time_step': 0.0026507557824600575, 'init_value': -19.585161209106445, 'ave_value': -20.181571407016232, 'soft_opc': nan} step=9632




2022-04-22 07:50.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.49 [info     ] FQE_20220422075021: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.0001317588395850603, 'time_algorithm_update': 0.0022383588691090427, 'loss': 0.6614541778784938, 'time_step': 0.0024329153604285662, 'init_value': -20.026107788085938, 'ave_value': -20.765452882451182, 'soft_opc': nan} step=9976




2022-04-22 07:50.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.50 [info     ] FQE_20220422075021: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.0001403703246005746, 'time_algorithm_update': 0.0024505430875822556, 'loss': 0.6878220046123187, 'time_step': 0.0026552635569905125, 'init_value': -20.74302101135254, 'ave_value': -21.531408163749504, 'soft_opc': nan} step=10320




2022-04-22 07:50.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.51 [info     ] FQE_20220422075021: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00012565975965455521, 'time_algorithm_update': 0.002080201409583868, 'loss': 0.7184578844476058, 'time_step': 0.002263408067614533, 'init_value': -20.935501098632812, 'ave_value': -21.751467188951132, 'soft_opc': nan} step=10664




2022-04-22 07:50.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.52 [info     ] FQE_20220422075021: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00013076358063276425, 'time_algorithm_update': 0.0021627067133437755, 'loss': 0.7330651278986574, 'time_step': 0.0023523887922597487, 'init_value': -21.653209686279297, 'ave_value': -22.69175339706435, 'soft_opc': nan} step=11008




2022-04-22 07:50.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.53 [info     ] FQE_20220422075021: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00014159637828205907, 'time_algorithm_update': 0.0025496205618215162, 'loss': 0.7610522341362179, 'time_step': 0.0027578556260397266, 'init_value': -21.710025787353516, 'ave_value': -22.81510188359566, 'soft_opc': nan} step=11352




2022-04-22 07:50.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.54 [info     ] FQE_20220422075021: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00013001228487768838, 'time_algorithm_update': 0.002203714708949244, 'loss': 0.7899358176279726, 'time_step': 0.0023936539195304695, 'init_value': -22.134510040283203, 'ave_value': -23.21112407304615, 'soft_opc': nan} step=11696




2022-04-22 07:50.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.54 [info     ] FQE_20220422075021: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00013377777365751044, 'time_algorithm_update': 0.0022560746170753655, 'loss': 0.7996092064652679, 'time_step': 0.002455979585647583, 'init_value': -22.361858367919922, 'ave_value': -23.53853023542125, 'soft_opc': nan} step=12040




2022-04-22 07:50.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.55 [info     ] FQE_20220422075021: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00013221141903899437, 'time_algorithm_update': 0.002298797285834024, 'loss': 0.8105041331849819, 'time_step': 0.002489957005478615, 'init_value': -22.58347511291504, 'ave_value': -23.91212423809695, 'soft_opc': nan} step=12384




2022-04-22 07:50.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.56 [info     ] FQE_20220422075021: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00013300499250722486, 'time_algorithm_update': 0.0022680697052977804, 'loss': 0.8124070539583214, 'time_step': 0.0024658989074618316, 'init_value': -22.896461486816406, 'ave_value': -24.466633474172788, 'soft_opc': nan} step=12728




2022-04-22 07:50.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.57 [info     ] FQE_20220422075021: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00013996279516885447, 'time_algorithm_update': 0.0024705550005269606, 'loss': 0.8299591094363741, 'time_step': 0.0026752595291581263, 'init_value': -22.960269927978516, 'ave_value': -24.592369971903427, 'soft_opc': nan} step=13072




2022-04-22 07:50.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.58 [info     ] FQE_20220422075021: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.0001223718011102011, 'time_algorithm_update': 0.0019453562969385192, 'loss': 0.8252560871133451, 'time_step': 0.0021235880463622335, 'init_value': -23.275310516357422, 'ave_value': -24.973708163068824, 'soft_opc': nan} step=13416




2022-04-22 07:50.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:50.59 [info     ] FQE_20220422075021: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00012809038162231445, 'time_algorithm_update': 0.0020991425181544105, 'loss': 0.827189545439513, 'time_step': 0.002285978821820991, 'init_value': -23.59193229675293, 'ave_value': -25.24469026559041, 'soft_opc': nan} step=13760




2022-04-22 07:50.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:51.00 [info     ] FQE_20220422075021: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.0001304406066273534, 'time_algorithm_update': 0.0022118777729744133, 'loss': 0.8426238414181699, 'time_step': 0.0024019237174544225, 'init_value': -23.793682098388672, 'ave_value': -25.732275206367444, 'soft_opc': nan} step=14104




2022-04-22 07:51.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:51.01 [info     ] FQE_20220422075021: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00013428164082904194, 'time_algorithm_update': 0.0022413799929064376, 'loss': 0.8623451710527026, 'time_step': 0.002436709265376246, 'init_value': -23.861623764038086, 'ave_value': -25.88366954495065, 'soft_opc': nan} step=14448




2022-04-22 07:51.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:51.02 [info     ] FQE_20220422075021: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00013427124467006949, 'time_algorithm_update': 0.002419517483822135, 'loss': 0.8682182872949471, 'time_step': 0.0026195686916972317, 'init_value': -24.374801635742188, 'ave_value': -26.38637332675542, 'soft_opc': nan} step=14792




2022-04-22 07:51.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:51.03 [info     ] FQE_20220422075021: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00013531016749004986, 'time_algorithm_update': 0.0022571107675862867, 'loss': 0.8610898353224403, 'time_step': 0.002452894698741824, 'init_value': -24.834871292114258, 'ave_value': -26.879683618490827, 'soft_opc': nan} step=15136




2022-04-22 07:51.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:51.04 [info     ] FQE_20220422075021: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00012812226317649665, 'time_algorithm_update': 0.0022536599358846973, 'loss': 0.8744014271556638, 'time_step': 0.002440944660541623, 'init_value': -24.86511993408203, 'ave_value': -26.914972749233314, 'soft_opc': nan} step=15480




2022-04-22 07:51.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:51.05 [info     ] FQE_20220422075021: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.0001429638197255689, 'time_algorithm_update': 0.002634250147398128, 'loss': 0.8663428199076808, 'time_step': 0.0028466041698012243, 'init_value': -25.470333099365234, 'ave_value': -27.57487273344535, 'soft_opc': nan} step=15824




2022-04-22 07:51.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:51.06 [info     ] FQE_20220422075021: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00013300499250722486, 'time_algorithm_update': 0.002340645984161732, 'loss': 0.8759169285618809, 'time_step': 0.002534178800361101, 'init_value': -25.862533569335938, 'ave_value': -27.77429812167528, 'soft_opc': nan} step=16168




2022-04-22 07:51.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:51.07 [info     ] FQE_20220422075021: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00013934942178947982, 'time_algorithm_update': 0.0024138120717780535, 'loss': 0.8993784914383405, 'time_step': 0.0026150726994802784, 'init_value': -25.952531814575195, 'ave_value': -28.04680815709268, 'soft_opc': nan} step=16512




2022-04-22 07:51.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:51.07 [info     ] FQE_20220422075021: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00013144764789315157, 'time_algorithm_update': 0.0023527665193690812, 'loss': 0.9023746735643768, 'time_step': 0.002545947945395181, 'init_value': -26.736555099487305, 'ave_value': -28.881328896673917, 'soft_opc': nan} step=16856




2022-04-22 07:51.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 07:51.08 [info     ] FQE_20220422075021: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.0001336065835730974, 'time_algorithm_update': 0.0023135979508244714, 'loss': 0.9167336790259321, 'time_step': 0.002510128326194231, 'init_value': -27.06298065185547, 'ave_value': -29.241983999112236, 'soft_opc': nan} step=17200




2022-04-22 07:51.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422075021/model_17200.pt
search iteration:  32
using hyper params:  [0.004151036785968068, 0.006749573545946535, 8.256112367276785e-05, 5]
2022-04-22 07:51.08 [debug    ] RoundIterator is selected.
2022-04-22 07:51.08 [info     ] Directory is created at d3rlpy_logs/CQL_20220422075108
2022-04-22 07:51.08 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 07:51.08 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 07:51.08 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422075108/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.004151036785968068, 'actor_optim_factory': {'optim

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:51.21 [info     ] CQL_20220422075108: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.0003850735680905381, 'time_algorithm_update': 0.035405374675816885, 'temp_loss': 4.879885103661201, 'temp': 0.9852630437454047, 'alpha_loss': -17.7251466133691, 'alpha': 1.017739756947997, 'critic_loss': 99.10392422758775, 'actor_loss': 3.8681363211029525, 'time_step': 0.035889839161338144, 'td_error': 1.3158004273947645, 'init_value': -7.38847017288208, 'ave_value': -6.798112478212137} step=346
2022-04-22 07:51.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:51.34 [info     ] CQL_20220422075108: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.0003814876424094845, 'time_algorithm_update': 0.035312833813573585, 'temp_loss': 4.831485362411234, 'temp': 0.9573364269871243, 'alpha_loss': -18.390648318163922, 'alpha': 1.054211131065567, 'critic_loss': 173.69607352107934, 'actor_loss': 8.254933208399425, 'time_step': 0.035784793726970694, 'td_error': 1.4245783258684461, 'init_value': -10.726914405822754, 'ave_value': -9.98519652973933} step=692
2022-04-22 07:51.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:51.46 [info     ] CQL_20220422075108: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.00037113641727866463, 'time_algorithm_update': 0.03404960053504547, 'temp_loss': 4.698144125800601, 'temp': 0.9308404405682074, 'alpha_loss': -19.04788368974807, 'alpha': 1.0924851604968826, 'critic_loss': 364.3755873751778, 'actor_loss': 10.359493754502665, 'time_step': 0.034516805858281305, 'td_error': 1.3732354041046784, 'init_value': -10.957563400268555, 'ave_value': -10.312558991087167} step=1038
2022-04-22 07:51.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:51.59 [info     ] CQL_20220422075108: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.0003729927746546751, 'time_algorithm_update': 0.03454360589815702, 'temp_loss': 4.569939206790373, 'temp': 0.9054135283982823, 'alpha_loss': -19.721602649357965, 'alpha': 1.132625836512946, 'critic_loss': 668.0423659837314, 'actor_loss': 8.538991740673264, 'time_step': 0.035006915213744765, 'td_error': 1.306079743842703, 'init_value': -7.962957382202148, 'ave_value': -7.664456287861548} step=1384
2022-04-22 07:51.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:52.11 [info     ] CQL_20220422075108: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.00036949918449269556, 'time_algorithm_update': 0.03389392009360253, 'temp_loss': 4.446970443505084, 'temp': 0.8809202651067966, 'alpha_loss': -20.4345538078705, 'alpha': 1.1746650341618268, 'critic_loss': 1076.9975637094135, 'actor_loss': 5.348128987185528, 'time_step': 0.034363590913011846, 'td_error': 1.2974686326503568, 'init_value': -6.304927825927734, 'ave_value': -6.189214728600948} step=1730
2022-04-22 07:52.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:52.23 [info     ] CQL_20220422075108: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.0003721507298464031, 'time_algorithm_update': 0.034116527248669225, 'temp_loss': 4.3282059300152556, 'temp': 0.8572778166029494, 'alpha_loss': -21.194594785657234, 'alpha': 1.2186771703593304, 'critic_loss': 1500.6361965995304, 'actor_loss': 4.8534180153312025, 'time_step': 0.034579199862617975, 'td_error': 1.3095039536967534, 'init_value': -6.619510650634766, 'ave_value': -6.534338695147186} step=2076
2022-04-22 07:52.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:52.36 [info     ] CQL_20220422075108: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.00036377920580737163, 'time_algorithm_update': 0.035008713000082556, 'temp_loss': 4.213206404206381, 'temp': 0.8344182225665605, 'alpha_loss': -21.993902741140023, 'alpha': 1.2647080063130813, 'critic_loss': 1909.4470430054416, 'actor_loss': 5.19257534583869, 'time_step': 0.03547165021730985, 'td_error': 1.3218558009438974, 'init_value': -6.966246604919434, 'ave_value': -6.916457027278844} step=2422
2022-04-22 07:52.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:52.48 [info     ] CQL_20220422075108: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.0003792013047058458, 'time_algorithm_update': 0.03427590869065654, 'temp_loss': 4.101576570830593, 'temp': 0.8122842751141918, 'alpha_loss': -22.83149814605713, 'alpha': 1.3127976583607623, 'critic_loss': 2330.354554986678, 'actor_loss': 5.753052336632172, 'time_step': 0.03474515504230653, 'td_error': 1.3360170262511304, 'init_value': -7.525727272033691, 'ave_value': -7.490475395224342} step=2768
2022-04-22 07:52.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:53.01 [info     ] CQL_20220422075108: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.0003745617894078955, 'time_algorithm_update': 0.03439847720151692, 'temp_loss': 3.993855754075023, 'temp': 0.7908328921464137, 'alpha_loss': -23.702208122077018, 'alpha': 1.362979784866289, 'critic_loss': 2765.816639100885, 'actor_loss': 6.376879233156325, 'time_step': 0.034869757690870695, 'td_error': 1.3541213750429342, 'init_value': -8.389163970947266, 'ave_value': -8.35192810549675} step=3114
2022-04-22 07:53.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:53.13 [info     ] CQL_20220422075108: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.0003739154407743774, 'time_algorithm_update': 0.034482744387808566, 'temp_loss': 3.888138427210681, 'temp': 0.7700182588803286, 'alpha_loss': -24.60786543278336, 'alpha': 1.4152946596200755, 'critic_loss': 3220.5476060106575, 'actor_loss': 7.0807288147810565, 'time_step': 0.034951213467327845, 'td_error': 1.3702689594869786, 'init_value': -8.985621452331543, 'ave_value': -8.958779428444975} step=3460
2022-04-22 07:53.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:53.26 [info     ] CQL_20220422075108: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.00038810892601233687, 'time_algorithm_update': 0.03571714969039652, 'temp_loss': 3.7873687640779967, 'temp': 0.7498078354865829, 'alpha_loss': -25.56224852214659, 'alpha': 1.4698027916726348, 'critic_loss': 3705.0776571814035, 'actor_loss': 7.845408265990329, 'time_step': 0.03620594016389351, 'td_error': 1.3886428285896304, 'init_value': -9.675325393676758, 'ave_value': -9.651688600253472} step=3806
2022-04-22 07:53.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:53.38 [info     ] CQL_20220422075108: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.0003682430079906662, 'time_algorithm_update': 0.034614713205767506, 'temp_loss': 3.6878739388691897, 'temp': 0.7301722217157397, 'alpha_loss': -26.54744894104886, 'alpha': 1.5265584936031718, 'critic_loss': 4195.981448840544, 'actor_loss': 8.679611277718076, 'time_step': 0.035081084753047526, 'td_error': 1.4142780558918593, 'init_value': -10.70136833190918, 'ave_value': -10.677283071714196} step=4152
2022-04-22 07:53.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:53.51 [info     ] CQL_20220422075108: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.000374317858260491, 'time_algorithm_update': 0.035309019805379, 'temp_loss': 3.5921171255883455, 'temp': 0.711083700202104, 'alpha_loss': -27.572638054114545, 'alpha': 1.5856205996750408, 'critic_loss': 4724.8022319815755, 'actor_loss': 9.598952930097635, 'time_step': 0.03577744685156497, 'td_error': 1.4374823659502638, 'init_value': -11.453551292419434, 'ave_value': -11.436573459232742} step=4498
2022-04-22 07:53.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:54.04 [info     ] CQL_20220422075108: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.0003714919779342034, 'time_algorithm_update': 0.03548519046320391, 'temp_loss': 3.4978489076471053, 'temp': 0.6925191677719182, 'alpha_loss': -28.644085768330303, 'alpha': 1.6470597393250879, 'critic_loss': 5272.237616566565, 'actor_loss': 10.551322961818276, 'time_step': 0.035952336526330494, 'td_error': 1.4660398474696192, 'init_value': -12.403843879699707, 'ave_value': -12.389496689873774} step=4844
2022-04-22 07:54.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:54.16 [info     ] CQL_20220422075108: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.0003919746145347639, 'time_algorithm_update': 0.03457839364950367, 'temp_loss': 3.4067433872663906, 'temp': 0.6744624384910385, 'alpha_loss': -29.74989567464487, 'alpha': 1.710952779116658, 'critic_loss': 5794.3315711931, 'actor_loss': 11.59425521861611, 'time_step': 0.03506092115633749, 'td_error': 1.4986875956926482, 'init_value': -13.451367378234863, 'ave_value': -13.436232928587623} step=5190
2022-04-22 07:54.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:54.29 [info     ] CQL_20220422075108: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.0003636083161899809, 'time_algorithm_update': 0.03562600764236009, 'temp_loss': 3.3171933682667727, 'temp': 0.6568928621063342, 'alpha_loss': -30.906808919300232, 'alpha': 1.7773788809087234, 'critic_loss': 6345.7229427271495, 'actor_loss': 12.69165141320642, 'time_step': 0.03607873489401933, 'td_error': 1.535912260203073, 'init_value': -14.550636291503906, 'ave_value': -14.536364225077369} step=5536
2022-04-22 07:54.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:54.40 [info     ] CQL_20220422075108: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.0003235967173052661, 'time_algorithm_update': 0.028318877165028126, 'temp_loss': 3.232215677382629, 'temp': 0.6397897880201395, 'alpha_loss': -32.10630410806292, 'alpha': 1.8464321556118872, 'critic_loss': 6964.489384822074, 'actor_loss': 13.826679364794252, 'time_step': 0.028724667653872098, 'td_error': 1.582777967092053, 'init_value': -15.872751235961914, 'ave_value': -15.856084651413434} step=5882
2022-04-22 07:54.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:54.52 [info     ] CQL_20220422075108: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.00038724827628604247, 'time_algorithm_update': 0.035070703208791035, 'temp_loss': 3.1474953944972484, 'temp': 0.6231390988895659, 'alpha_loss': -33.35492183707353, 'alpha': 1.9181988663067018, 'critic_loss': 7612.841863202222, 'actor_loss': 15.031890025717674, 'time_step': 0.03555398387026924, 'td_error': 1.6133640805449723, 'init_value': -16.570453643798828, 'ave_value': -16.569974702497746} step=6228
2022-04-22 07:54.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:55.04 [info     ] CQL_20220422075108: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.00037508617246771134, 'time_algorithm_update': 0.03339597806765165, 'temp_loss': 3.066228735653651, 'temp': 0.6069303182508216, 'alpha_loss': -34.65655404019218, 'alpha': 1.992791257497203, 'critic_loss': 8261.495838319635, 'actor_loss': 16.29593305367266, 'time_step': 0.033862054003456425, 'td_error': 1.6779544759160792, 'init_value': -18.249082565307617, 'ave_value': -18.237264663023428} step=6574
2022-04-22 07:55.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:55.17 [info     ] CQL_20220422075108: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.0003800275008802469, 'time_algorithm_update': 0.03401325548315324, 'temp_loss': 2.9854783150502024, 'temp': 0.5911471133976314, 'alpha_loss': -36.00453576324992, 'alpha': 2.0703115573508204, 'critic_loss': 8892.019607455744, 'actor_loss': 17.53737950738455, 'time_step': 0.0344930466888957, 'td_error': 1.7243278841914877, 'init_value': -19.28516960144043, 'ave_value': -19.278542970537064} step=6920
2022-04-22 07:55.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:55.29 [info     ] CQL_20220422075108: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.00036955499924676266, 'time_algorithm_update': 0.034697163311732296, 'temp_loss': 2.9086328400352786, 'temp': 0.5757786063100562, 'alpha_loss': -37.40730180905734, 'alpha': 2.1508604191631253, 'critic_loss': 9580.783273685875, 'actor_loss': 18.82337173285512, 'time_step': 0.03515905865355034, 'td_error': 1.7788735800968312, 'init_value': -20.47968101501465, 'ave_value': -20.475463459309722} step=7266
2022-04-22 07:55.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:55.41 [info     ] CQL_20220422075108: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.0003797215533394345, 'time_algorithm_update': 0.033962811348755235, 'temp_loss': 2.8327460647318405, 'temp': 0.5608112684564094, 'alpha_loss': -38.84734776667777, 'alpha': 2.2345401854873392, 'critic_loss': 10208.299209153722, 'actor_loss': 20.065665906564348, 'time_step': 0.03442630395723905, 'td_error': 1.8449439251549016, 'init_value': -21.879436492919922, 'ave_value': -21.870862712832977} step=7612
2022-04-22 07:55.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:55.54 [info     ] CQL_20220422075108: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.00037585724295908314, 'time_algorithm_update': 0.03471825233084618, 'temp_loss': 2.7585236536974165, 'temp': 0.5462387720973505, 'alpha_loss': -40.37235467282334, 'alpha': 2.3214883073905987, 'critic_loss': 10735.742489500542, 'actor_loss': 21.30607282495223, 'time_step': 0.03518151478960335, 'td_error': 1.8907013143907028, 'init_value': -22.7380428314209, 'ave_value': -22.738774843428473} step=7958
2022-04-22 07:55.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:56.07 [info     ] CQL_20220422075108: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.0003899074014211666, 'time_algorithm_update': 0.03599024645854972, 'temp_loss': 2.686968446466964, 'temp': 0.5320450060629431, 'alpha_loss': -41.937486020126784, 'alpha': 2.4118343936225584, 'critic_loss': 11239.29212201951, 'actor_loss': 22.53189115028161, 'time_step': 0.03646379743697326, 'td_error': 1.9632177032320937, 'init_value': -24.14168357849121, 'ave_value': -24.13732334080891} step=8304
2022-04-22 07:56.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:56.19 [info     ] CQL_20220422075108: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.00035956553641082234, 'time_algorithm_update': 0.03442306463429005, 'temp_loss': 2.6170306777678474, 'temp': 0.5182213581710882, 'alpha_loss': -43.579999769354146, 'alpha': 2.505703348644896, 'critic_loss': 11516.607043668713, 'actor_loss': 23.708091443673723, 'time_step': 0.03486714611163718, 'td_error': 2.0136671618726645, 'init_value': -25.028818130493164, 'ave_value': -25.03108879313643} step=8650
2022-04-22 07:56.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:56.32 [info     ] CQL_20220422075108: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.0003830532118075156, 'time_algorithm_update': 0.03510613593062913, 'temp_loss': 2.5495303821012465, 'temp': 0.5047553017993883, 'alpha_loss': -45.264262204914424, 'alpha': 2.6032269145712, 'critic_loss': 11135.061577063765, 'actor_loss': 24.714276390957693, 'time_step': 0.035572330386652425, 'td_error': 2.07493188271028, 'init_value': -26.110322952270508, 'ave_value': -26.111280941590138} step=8996
2022-04-22 07:56.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:56.45 [info     ] CQL_20220422075108: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.00037819801727471324, 'time_algorithm_update': 0.03524039935514417, 'temp_loss': 2.483530771525609, 'temp': 0.4916386730925885, 'alpha_loss': -47.02970623832218, 'alpha': 2.7045492354155964, 'critic_loss': 10189.312866916547, 'actor_loss': 25.771242626829643, 'time_step': 0.035705835833025806, 'td_error': 2.122427803325819, 'init_value': -26.876758575439453, 'ave_value': -26.88583577564397} step=9342
2022-04-22 07:56.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:56.57 [info     ] CQL_20220422075108: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.000380078492137049, 'time_algorithm_update': 0.034407321428287924, 'temp_loss': 2.4178876490951273, 'temp': 0.478865068513534, 'alpha_loss': -48.85212480401717, 'alpha': 2.8098048247353877, 'critic_loss': 9376.497380780347, 'actor_loss': 26.884710091386918, 'time_step': 0.03487664151053897, 'td_error': 2.208131958705339, 'init_value': -28.33831214904785, 'ave_value': -28.338871824464395} step=9688
2022-04-22 07:56.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:57.09 [info     ] CQL_20220422075108: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.0003758799823033327, 'time_algorithm_update': 0.03395321947990814, 'temp_loss': 2.3564077553721523, 'temp': 0.46642395052951197, 'alpha_loss': -50.75865677188587, 'alpha': 2.919157716580209, 'critic_loss': 8082.783371059881, 'actor_loss': 27.828795162928586, 'time_step': 0.03442444070915266, 'td_error': 2.258344923585572, 'init_value': -29.101547241210938, 'ave_value': -29.106132158141047} step=10034
2022-04-22 07:57.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:57.22 [info     ] CQL_20220422075108: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.000377429013996455, 'time_algorithm_update': 0.03519216024806734, 'temp_loss': 2.2939586074366045, 'temp': 0.4543056118522765, 'alpha_loss': -52.73941220851303, 'alpha': 3.0327909206379355, 'critic_loss': 6592.692680579389, 'actor_loss': 28.81850857266112, 'time_step': 0.03566582699042524, 'td_error': 2.3305369306116765, 'init_value': -30.241727828979492, 'ave_value': -30.24272429781426} step=10380
2022-04-22 07:57.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:57.35 [info     ] CQL_20220422075108: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.0003879022047009771, 'time_algorithm_update': 0.03516075721365868, 'temp_loss': 2.234792734846214, 'temp': 0.4425036385224734, 'alpha_loss': -54.791940512684725, 'alpha': 3.1508569000773345, 'critic_loss': 6651.062165541456, 'actor_loss': 30.4930540095864, 'time_step': 0.03564892269972432, 'td_error': 2.4621500494738253, 'init_value': -32.246456146240234, 'ave_value': -32.23562761942546} step=10726
2022-04-22 07:57.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:57.47 [info     ] CQL_20220422075108: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.00036024151509896866, 'time_algorithm_update': 0.03442053918893627, 'temp_loss': 2.1769279737693035, 'temp': 0.4310069868847125, 'alpha_loss': -56.92834670006195, 'alpha': 3.273511750849685, 'critic_loss': 7078.465515489524, 'actor_loss': 31.949162306813147, 'time_step': 0.03486641362912393, 'td_error': 2.5554577161850633, 'init_value': -33.527069091796875, 'ave_value': -33.5213892958864} step=11072
2022-04-22 07:57.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:58.00 [info     ] CQL_20220422075108: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.00036943923531240124, 'time_algorithm_update': 0.034671982588795565, 'temp_loss': 2.1205947640314267, 'temp': 0.4198090923314839, 'alpha_loss': -59.15021944872906, 'alpha': 3.4009443934942256, 'critic_loss': 7460.4199486881325, 'actor_loss': 33.333407815481195, 'time_step': 0.035131264284166984, 'td_error': 2.6556732615470766, 'init_value': -34.86732864379883, 'ave_value': -34.86437861649218} step=11418
2022-04-22 07:58.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:58.12 [info     ] CQL_20220422075108: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.0003765497593521383, 'time_algorithm_update': 0.03606513125358978, 'temp_loss': 2.064869304612882, 'temp': 0.4089012602505656, 'alpha_loss': -61.44662519686484, 'alpha': 3.5333407304190487, 'critic_loss': 7867.972747979136, 'actor_loss': 34.77317708627337, 'time_step': 0.036530394774640915, 'td_error': 2.745302366995076, 'init_value': -35.999141693115234, 'ave_value': -36.001253840707385} step=11764
2022-04-22 07:58.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:58.25 [info     ] CQL_20220422075108: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.00037016344897319815, 'time_algorithm_update': 0.034298794807037174, 'temp_loss': 2.011459507694134, 'temp': 0.39827919850459675, 'alpha_loss': -63.841228418956604, 'alpha': 3.6708771948180448, 'critic_loss': 8132.8386936077495, 'actor_loss': 36.16925425887797, 'time_step': 0.03475692299749121, 'td_error': 2.8522740387423084, 'init_value': -37.361846923828125, 'ave_value': -37.364089959670494} step=12110
2022-04-22 07:58.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:58.37 [info     ] CQL_20220422075108: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.0003724745932342, 'time_algorithm_update': 0.03496141034054619, 'temp_loss': 1.9592525941788117, 'temp': 0.3879325463937197, 'alpha_loss': -66.29502077047536, 'alpha': 3.8137472006626902, 'critic_loss': 8446.690113574783, 'actor_loss': 37.53103885760886, 'time_step': 0.035425569280723615, 'td_error': 2.9858097428420947, 'init_value': -39.03457260131836, 'ave_value': -39.03142589371936} step=12456
2022-04-22 07:58.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:58.50 [info     ] CQL_20220422075108: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.00037001047520279193, 'time_algorithm_update': 0.03506409088311168, 'temp_loss': 1.908295131832189, 'temp': 0.3778544325876787, 'alpha_loss': -68.90128665990223, 'alpha': 3.9621747918211656, 'critic_loss': 8755.937672168533, 'actor_loss': 38.81575372728998, 'time_step': 0.035523551736952944, 'td_error': 3.0667419055219995, 'init_value': -39.943031311035156, 'ave_value': -39.94382271294121} step=12802
2022-04-22 07:58.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:59.03 [info     ] CQL_20220422075108: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.0003790097429573191, 'time_algorithm_update': 0.03520728053385123, 'temp_loss': 1.8591115526381257, 'temp': 0.36803652354747574, 'alpha_loss': -71.57776264786031, 'alpha': 4.116401158316287, 'critic_loss': 8716.873502698249, 'actor_loss': 39.96009652462998, 'time_step': 0.03567997017347744, 'td_error': 3.182768667585851, 'init_value': -41.31157302856445, 'ave_value': -41.30810096491113} step=13148
2022-04-22 07:59.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:59.15 [info     ] CQL_20220422075108: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.0003959154118003184, 'time_algorithm_update': 0.03408396588584591, 'temp_loss': 1.8104642147273686, 'temp': 0.3584758045011862, 'alpha_loss': -74.35580805960419, 'alpha': 4.276621110177453, 'critic_loss': 8660.585329265265, 'actor_loss': 41.0744536736108, 'time_step': 0.034567829501422155, 'td_error': 3.272669702632687, 'init_value': -42.313438415527344, 'ave_value': -42.31188723318607} step=13494
2022-04-22 07:59.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:59.28 [info     ] CQL_20220422075108: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.00038028245716425726, 'time_algorithm_update': 0.035753531263053764, 'temp_loss': 1.7632024074565469, 'temp': 0.34916229985352887, 'alpha_loss': -77.27250821328576, 'alpha': 4.443098554721457, 'critic_loss': 8064.9937229046245, 'actor_loss': 41.949782729837935, 'time_step': 0.036234850139287164, 'td_error': 3.332265080004422, 'init_value': -42.93476104736328, 'ave_value': -42.93807476020187} step=13840
2022-04-22 07:59.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:59.41 [info     ] CQL_20220422075108: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.0003768632866743672, 'time_algorithm_update': 0.03485750256246225, 'temp_loss': 1.7176341896801326, 'temp': 0.3400921584726069, 'alpha_loss': -80.26421704595488, 'alpha': 4.616052980367848, 'critic_loss': 6967.021722870755, 'actor_loss': 42.68330549780344, 'time_step': 0.035329645768755434, 'td_error': 3.3902970511224, 'init_value': -43.555633544921875, 'ave_value': -43.557453193501765} step=14186
2022-04-22 07:59.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 07:59.53 [info     ] CQL_20220422075108: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.000373044454982515, 'time_algorithm_update': 0.035022143683681596, 'temp_loss': 1.6727213797541711, 'temp': 0.3312571058211299, 'alpha_loss': -83.39206730837078, 'alpha': 4.795720086621412, 'critic_loss': 6018.780442783598, 'actor_loss': 43.55700646108286, 'time_step': 0.035488128662109375, 'td_error': 3.4858717420998873, 'init_value': -44.58913803100586, 'ave_value': -44.592499516009156} step=14532
2022-04-22 07:59.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:00.06 [info     ] CQL_20220422075108: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.00037586895716672686, 'time_algorithm_update': 0.03483320798488022, 'temp_loss': 1.6295700655507215, 'temp': 0.3226517969473249, 'alpha_loss': -86.62396451916997, 'alpha': 4.982377234221883, 'critic_loss': 5844.756392815209, 'actor_loss': 44.6069031753981, 'time_step': 0.03530466901084591, 'td_error': 3.588725082426889, 'init_value': -45.665531158447266, 'ave_value': -45.66577704652558} step=14878
2022-04-22 08:00.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:00.18 [info     ] CQL_20220422075108: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.0003676697008871619, 'time_algorithm_update': 0.034658329335251294, 'temp_loss': 1.5869924429524151, 'temp': 0.3142689997922478, 'alpha_loss': -90.01664295086282, 'alpha': 5.176336685357066, 'critic_loss': 6030.3136938899925, 'actor_loss': 45.6952566907585, 'time_step': 0.03511350485630807, 'td_error': 3.6778209536059854, 'init_value': -46.57231903076172, 'ave_value': -46.57309887896448} step=15224
2022-04-22 08:00.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:00.31 [info     ] CQL_20220422075108: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.00036373097083472103, 'time_algorithm_update': 0.0347864159269829, 'temp_loss': 1.5462807003473271, 'temp': 0.3061042574444258, 'alpha_loss': -93.51654229136561, 'alpha': 5.377831629935027, 'critic_loss': 5853.055285856214, 'actor_loss': 46.54768172027059, 'time_step': 0.03523912388465308, 'td_error': 3.769087673809875, 'init_value': -47.485076904296875, 'ave_value': -47.48675721893383} step=15570
2022-04-22 08:00.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:00.44 [info     ] CQL_20220422075108: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.000384668394320273, 'time_algorithm_update': 0.035479097697087104, 'temp_loss': 1.5059815879502048, 'temp': 0.2981502921939585, 'alpha_loss': -97.15875678531008, 'alpha': 5.587196049662683, 'critic_loss': 5834.443143458725, 'actor_loss': 47.38424085054783, 'time_step': 0.03595907426293875, 'td_error': 3.8392210191766702, 'init_value': -48.189186096191406, 'ave_value': -48.18926883261596} step=15916
2022-04-22 08:00.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:00.56 [info     ] CQL_20220422075108: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.00037699765552675105, 'time_algorithm_update': 0.035079920222993534, 'temp_loss': 1.46676874436395, 'temp': 0.2904038429260254, 'alpha_loss': -100.92841954313951, 'alpha': 5.804658921467776, 'critic_loss': 5961.17899176978, 'actor_loss': 48.28053479938838, 'time_step': 0.03555055849814002, 'td_error': 3.9478946487029414, 'init_value': -49.26860809326172, 'ave_value': -49.26408444997426} step=16262
2022-04-22 08:00.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:01.09 [info     ] CQL_20220422075108: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.00036189735280296015, 'time_algorithm_update': 0.034552101454982866, 'temp_loss': 1.4286810790183226, 'temp': 0.2828593732132388, 'alpha_loss': -104.86380578189916, 'alpha': 6.03059779426266, 'critic_loss': 6226.955382947976, 'actor_loss': 49.10711958780454, 'time_step': 0.03499879175527936, 'td_error': 4.027588391232633, 'init_value': -50.02717971801758, 'ave_value': -50.02382462014514} step=16608
2022-04-22 08:01.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:01.21 [info     ] CQL_20220422075108: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.00037368460197669234, 'time_algorithm_update': 0.03463060938554003, 'temp_loss': 1.3914358543522785, 'temp': 0.2755107788336759, 'alpha_loss': -108.93761664594528, 'alpha': 6.265339647414367, 'critic_loss': 6476.729883094744, 'actor_loss': 49.8408132343623, 'time_step': 0.0350942777071385, 'td_error': 4.092022686309174, 'init_value': -50.61455154418945, 'ave_value': -50.614217714406465} step=16954
2022-04-22 08:01.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:01.34 [info     ] CQL_20220422075108: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.0003742999424135065, 'time_algorithm_update': 0.03532399056274767, 'temp_loss': 1.3555745348075912, 'temp': 0.2683532107083095, 'alpha_loss': -113.19867373339703, 'alpha': 6.509219034558776, 'critic_loss': 6298.950961603595, 'actor_loss': 50.27711464766133, 'time_step': 0.035794448301282235, 'td_error': 4.106101385450899, 'init_value': -50.7179069519043, 'ave_value': -50.72410872953658} step=17300
2022-04-22 08:01.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422075108/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.519100

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 08:01.35 [info     ] FQE_20220422080134: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00013541888041668628, 'time_algorithm_update': 0.0025697056069431535, 'loss': 0.00762728823724775, 'time_step': 0.002772832491311682, 'init_value': -0.14991839230060577, 'ave_value': -0.10679319687858894, 'soft_opc': nan} step=166




2022-04-22 08:01.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.35 [info     ] FQE_20220422080134: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00012963363923222185, 'time_algorithm_update': 0.002517219049384795, 'loss': 0.004290885235889849, 'time_step': 0.0027067144233060172, 'init_value': -0.2029004693031311, 'ave_value': -0.1140213367000625, 'soft_opc': nan} step=332




2022-04-22 08:01.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.36 [info     ] FQE_20220422080134: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00012285450854933406, 'time_algorithm_update': 0.0022382477679884576, 'loss': 0.003512400480166796, 'time_step': 0.0024208534194762447, 'init_value': -0.19434471428394318, 'ave_value': -0.09234362935378879, 'soft_opc': nan} step=498




2022-04-22 08:01.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.36 [info     ] FQE_20220422080134: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00012129473398966962, 'time_algorithm_update': 0.0021384221961699337, 'loss': 0.0031360738689124763, 'time_step': 0.0023152670228337668, 'init_value': -0.2475278675556183, 'ave_value': -0.12514924999989302, 'soft_opc': nan} step=664




2022-04-22 08:01.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.36 [info     ] FQE_20220422080134: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00012179024248238069, 'time_algorithm_update': 0.002302962613393025, 'loss': 0.00282621816086796, 'time_step': 0.0024809277201273353, 'init_value': -0.323140949010849, 'ave_value': -0.1788720851512374, 'soft_opc': nan} step=830




2022-04-22 08:01.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.37 [info     ] FQE_20220422080134: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.0001330907086291945, 'time_algorithm_update': 0.002476268504039351, 'loss': 0.002629303733161819, 'time_step': 0.0026745379689228103, 'init_value': -0.36638569831848145, 'ave_value': -0.20934553321257973, 'soft_opc': nan} step=996




2022-04-22 08:01.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.37 [info     ] FQE_20220422080134: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.0001282447791961302, 'time_algorithm_update': 0.00225496722991208, 'loss': 0.0024950341167242593, 'time_step': 0.0024434888219258873, 'init_value': -0.40424591302871704, 'ave_value': -0.22096403496809774, 'soft_opc': nan} step=1162




2022-04-22 08:01.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.38 [info     ] FQE_20220422080134: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00011753748698406909, 'time_algorithm_update': 0.0020892792437449993, 'loss': 0.002326839757769043, 'time_step': 0.0022608285926910767, 'init_value': -0.47379767894744873, 'ave_value': -0.2565207786673495, 'soft_opc': nan} step=1328




2022-04-22 08:01.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.38 [info     ] FQE_20220422080134: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00011781468448868717, 'time_algorithm_update': 0.0020058700837284685, 'loss': 0.0021146423946392825, 'time_step': 0.0021792808210993387, 'init_value': -0.5351666808128357, 'ave_value': -0.2960442666874712, 'soft_opc': nan} step=1494




2022-04-22 08:01.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.39 [info     ] FQE_20220422080134: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00012242075908614928, 'time_algorithm_update': 0.0022215196885258318, 'loss': 0.0020398641557780556, 'time_step': 0.002397690910890878, 'init_value': -0.6258013248443604, 'ave_value': -0.34453152489370187, 'soft_opc': nan} step=1660




2022-04-22 08:01.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.39 [info     ] FQE_20220422080134: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.0001227496618247894, 'time_algorithm_update': 0.0021930617022227093, 'loss': 0.0019273371358559165, 'time_step': 0.0023713830005691714, 'init_value': -0.7186071872711182, 'ave_value': -0.40143657251017856, 'soft_opc': nan} step=1826




2022-04-22 08:01.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.40 [info     ] FQE_20220422080134: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00013193164963320078, 'time_algorithm_update': 0.00246063197951719, 'loss': 0.0019400305092244982, 'time_step': 0.00265282320689006, 'init_value': -0.7892131209373474, 'ave_value': -0.44174081216826366, 'soft_opc': nan} step=1992




2022-04-22 08:01.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.40 [info     ] FQE_20220422080134: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00013134422072445053, 'time_algorithm_update': 0.0023650376193494684, 'loss': 0.0022115670905465327, 'time_step': 0.0025590816176081278, 'init_value': -0.8931799530982971, 'ave_value': -0.5044751390328144, 'soft_opc': nan} step=2158




2022-04-22 08:01.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.41 [info     ] FQE_20220422080134: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00012428502002394343, 'time_algorithm_update': 0.0021665828773774296, 'loss': 0.0023173417104267887, 'time_step': 0.0023502298148281604, 'init_value': -0.9820955395698547, 'ave_value': -0.5480879090373983, 'soft_opc': nan} step=2324




2022-04-22 08:01.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.41 [info     ] FQE_20220422080134: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.0001318009502916451, 'time_algorithm_update': 0.002616971372121788, 'loss': 0.002505512905646542, 'time_step': 0.0028140631066747458, 'init_value': -1.0796524286270142, 'ave_value': -0.6133230229973927, 'soft_opc': nan} step=2490




2022-04-22 08:01.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.42 [info     ] FQE_20220422080134: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.000126103320753718, 'time_algorithm_update': 0.0021909662039883166, 'loss': 0.002834999017243509, 'time_step': 0.002375025347054723, 'init_value': -1.1977965831756592, 'ave_value': -0.6842537893703929, 'soft_opc': nan} step=2656




2022-04-22 08:01.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.42 [info     ] FQE_20220422080134: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00011614431817847562, 'time_algorithm_update': 0.002137777317001159, 'loss': 0.00301687213513686, 'time_step': 0.0023077022598450444, 'init_value': -1.3001651763916016, 'ave_value': -0.7357087798953593, 'soft_opc': nan} step=2822




2022-04-22 08:01.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.42 [info     ] FQE_20220422080134: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00012686453669904227, 'time_algorithm_update': 0.0022955759462103785, 'loss': 0.0033134459398675665, 'time_step': 0.0024811761925019413, 'init_value': -1.3618392944335938, 'ave_value': -0.7632849360170129, 'soft_opc': nan} step=2988




2022-04-22 08:01.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.43 [info     ] FQE_20220422080134: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00012296510029988117, 'time_algorithm_update': 0.0023515827684517368, 'loss': 0.00380539190001696, 'time_step': 0.002533621098621782, 'init_value': -1.474870204925537, 'ave_value': -0.8285869263448157, 'soft_opc': nan} step=3154




2022-04-22 08:01.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.43 [info     ] FQE_20220422080134: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00012498447693974138, 'time_algorithm_update': 0.002312460577631571, 'loss': 0.004221577807792451, 'time_step': 0.002498626708984375, 'init_value': -1.5752484798431396, 'ave_value': -0.8809207068236025, 'soft_opc': nan} step=3320




2022-04-22 08:01.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.44 [info     ] FQE_20220422080134: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00013391368360404508, 'time_algorithm_update': 0.0025764315961355187, 'loss': 0.004476425002668479, 'time_step': 0.002779815570417657, 'init_value': -1.6555687189102173, 'ave_value': -0.9334545768394663, 'soft_opc': nan} step=3486




2022-04-22 08:01.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.44 [info     ] FQE_20220422080134: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.0001271144453301487, 'time_algorithm_update': 0.002418976232229945, 'loss': 0.005313542097139296, 'time_step': 0.0026065240423363374, 'init_value': -1.7946714162826538, 'ave_value': -1.0159997915161085, 'soft_opc': nan} step=3652




2022-04-22 08:01.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.45 [info     ] FQE_20220422080134: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00013404725545860198, 'time_algorithm_update': 0.0023607101785131247, 'loss': 0.005471861912835255, 'time_step': 0.0025541983455060475, 'init_value': -1.8680825233459473, 'ave_value': -1.032207569902813, 'soft_opc': nan} step=3818




2022-04-22 08:01.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.45 [info     ] FQE_20220422080134: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.0001205406993268484, 'time_algorithm_update': 0.002191575176744576, 'loss': 0.006104712914507163, 'time_step': 0.002370547099285815, 'init_value': -1.9129071235656738, 'ave_value': -1.0547359650333723, 'soft_opc': nan} step=3984




2022-04-22 08:01.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.46 [info     ] FQE_20220422080134: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00011589010077786732, 'time_algorithm_update': 0.00216714732618217, 'loss': 0.00685228782086016, 'time_step': 0.00233656670673784, 'init_value': -2.050379514694214, 'ave_value': -1.1560409676384282, 'soft_opc': nan} step=4150




2022-04-22 08:01.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.46 [info     ] FQE_20220422080134: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00013106271445033062, 'time_algorithm_update': 0.0025949320161198996, 'loss': 0.007440278135486935, 'time_step': 0.002789320715938706, 'init_value': -2.2033863067626953, 'ave_value': -1.2634571820229024, 'soft_opc': nan} step=4316




2022-04-22 08:01.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.47 [info     ] FQE_20220422080134: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00012228575097509176, 'time_algorithm_update': 0.0022617549781339713, 'loss': 0.007960900833342135, 'time_step': 0.002442993313433176, 'init_value': -2.2072606086730957, 'ave_value': -1.2395070777417303, 'soft_opc': nan} step=4482




2022-04-22 08:01.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.47 [info     ] FQE_20220422080134: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001281672213450972, 'time_algorithm_update': 0.0023633931056562677, 'loss': 0.00865393866790481, 'time_step': 0.002547952065984887, 'init_value': -2.295628547668457, 'ave_value': -1.2849363130894866, 'soft_opc': nan} step=4648




2022-04-22 08:01.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.48 [info     ] FQE_20220422080134: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00011517340878406202, 'time_algorithm_update': 0.0020136905003743, 'loss': 0.009277790884069359, 'time_step': 0.0021867938788540393, 'init_value': -2.3744311332702637, 'ave_value': -1.3380971682769758, 'soft_opc': nan} step=4814




2022-04-22 08:01.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.48 [info     ] FQE_20220422080134: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00013295857303113822, 'time_algorithm_update': 0.002561783216085779, 'loss': 0.010186937165303514, 'time_step': 0.0027597783559776216, 'init_value': -2.532541275024414, 'ave_value': -1.4944966112305451, 'soft_opc': nan} step=4980




2022-04-22 08:01.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.49 [info     ] FQE_20220422080134: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00013140023472797438, 'time_algorithm_update': 0.002436744161399014, 'loss': 0.010910952216538837, 'time_step': 0.002628218696778079, 'init_value': -2.5904250144958496, 'ave_value': -1.5131474855112599, 'soft_opc': nan} step=5146




2022-04-22 08:01.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.49 [info     ] FQE_20220422080134: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00012940240193562335, 'time_algorithm_update': 0.0023621019110622176, 'loss': 0.01148389533431415, 'time_step': 0.002556224903428411, 'init_value': -2.643247604370117, 'ave_value': -1.5501157596573099, 'soft_opc': nan} step=5312




2022-04-22 08:01.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.49 [info     ] FQE_20220422080134: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.0001252659832138613, 'time_algorithm_update': 0.0022923989468310252, 'loss': 0.011757738700316074, 'time_step': 0.0024778153522905097, 'init_value': -2.6465566158294678, 'ave_value': -1.5398341274476266, 'soft_opc': nan} step=5478




2022-04-22 08:01.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.50 [info     ] FQE_20220422080134: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00012827494058264307, 'time_algorithm_update': 0.0024020987820912556, 'loss': 0.01266074598435851, 'time_step': 0.002592203128768737, 'init_value': -2.7279937267303467, 'ave_value': -1.5737235750700977, 'soft_opc': nan} step=5644




2022-04-22 08:01.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.50 [info     ] FQE_20220422080134: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00012300387922539768, 'time_algorithm_update': 0.002247958298189094, 'loss': 0.013341612331933869, 'time_step': 0.002430961792727551, 'init_value': -2.8093252182006836, 'ave_value': -1.6492667629777849, 'soft_opc': nan} step=5810




2022-04-22 08:01.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.51 [info     ] FQE_20220422080134: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00013160131063806005, 'time_algorithm_update': 0.002355459224746888, 'loss': 0.01368208123014746, 'time_step': 0.002550290291567883, 'init_value': -2.841460943222046, 'ave_value': -1.6543785247179839, 'soft_opc': nan} step=5976




2022-04-22 08:01.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.51 [info     ] FQE_20220422080134: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00012378807527473173, 'time_algorithm_update': 0.002160450062119817, 'loss': 0.014206965384143686, 'time_step': 0.0023412991719073558, 'init_value': -2.8661367893218994, 'ave_value': -1.6526373051308296, 'soft_opc': nan} step=6142




2022-04-22 08:01.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.52 [info     ] FQE_20220422080134: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00012416293822139143, 'time_algorithm_update': 0.0022708134478833302, 'loss': 0.014597769984091824, 'time_step': 0.0024543052696319946, 'init_value': -2.946854591369629, 'ave_value': -1.7042642959200585, 'soft_opc': nan} step=6308




2022-04-22 08:01.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.52 [info     ] FQE_20220422080134: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00014263176056275885, 'time_algorithm_update': 0.0028374410537352047, 'loss': 0.015150994687980736, 'time_step': 0.00304368174219706, 'init_value': -3.0601439476013184, 'ave_value': -1.8027869461758717, 'soft_opc': nan} step=6474




2022-04-22 08:01.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.53 [info     ] FQE_20220422080134: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001257643642195736, 'time_algorithm_update': 0.002416445548275867, 'loss': 0.016201247405737685, 'time_step': 0.002603088516786874, 'init_value': -3.129652976989746, 'ave_value': -1.8450514862263525, 'soft_opc': nan} step=6640




2022-04-22 08:01.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.53 [info     ] FQE_20220422080134: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00012955751763768942, 'time_algorithm_update': 0.0022410714482686608, 'loss': 0.01700917707969356, 'time_step': 0.0024284282362604715, 'init_value': -3.2360236644744873, 'ave_value': -1.8978045286627503, 'soft_opc': nan} step=6806




2022-04-22 08:01.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.54 [info     ] FQE_20220422080134: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00013151657150452397, 'time_algorithm_update': 0.002559927572686988, 'loss': 0.017592714270170083, 'time_step': 0.002756476402282715, 'init_value': -3.2714810371398926, 'ave_value': -1.9001103732634235, 'soft_opc': nan} step=6972




2022-04-22 08:01.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.54 [info     ] FQE_20220422080134: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00012193243187594127, 'time_algorithm_update': 0.0022480315472706257, 'loss': 0.018624261145393017, 'time_step': 0.0024328648325908616, 'init_value': -3.3495893478393555, 'ave_value': -1.9709149133648958, 'soft_opc': nan} step=7138




2022-04-22 08:01.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.55 [info     ] FQE_20220422080134: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00012027068310473338, 'time_algorithm_update': 0.0021515696881765343, 'loss': 0.019106918241104942, 'time_step': 0.0023299742894000316, 'init_value': -3.37313175201416, 'ave_value': -1.9925997755280487, 'soft_opc': nan} step=7304




2022-04-22 08:01.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.55 [info     ] FQE_20220422080134: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.0001246814268181123, 'time_algorithm_update': 0.00219637370971312, 'loss': 0.01970490726084078, 'time_step': 0.002379798027406256, 'init_value': -3.4342589378356934, 'ave_value': -2.035654653004698, 'soft_opc': nan} step=7470




2022-04-22 08:01.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.56 [info     ] FQE_20220422080134: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.0001366009195166898, 'time_algorithm_update': 0.0026234359626310416, 'loss': 0.020405895431548447, 'time_step': 0.002820182995623853, 'init_value': -3.5030429363250732, 'ave_value': -2.09241070835977, 'soft_opc': nan} step=7636




2022-04-22 08:01.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.56 [info     ] FQE_20220422080134: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00012281285711081632, 'time_algorithm_update': 0.002334953790687653, 'loss': 0.021161179974147803, 'time_step': 0.0025167738098696053, 'init_value': -3.591836929321289, 'ave_value': -2.166283991127401, 'soft_opc': nan} step=7802




2022-04-22 08:01.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.57 [info     ] FQE_20220422080134: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00012636328318032874, 'time_algorithm_update': 0.00230771662241005, 'loss': 0.021777694108292844, 'time_step': 0.0024932177670030707, 'init_value': -3.6898601055145264, 'ave_value': -2.2443859330974183, 'soft_opc': nan} step=7968




2022-04-22 08:01.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.57 [info     ] FQE_20220422080134: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00011633534029305699, 'time_algorithm_update': 0.002085981598819595, 'loss': 0.02203412009269863, 'time_step': 0.0022552286285951913, 'init_value': -3.72918701171875, 'ave_value': -2.2661146215490393, 'soft_opc': nan} step=8134




2022-04-22 08:01.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:01.57 [info     ] FQE_20220422080134: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00012619236865675593, 'time_algorithm_update': 0.0022845368787466763, 'loss': 0.022764111435284885, 'time_step': 0.0024726175400147953, 'init_value': -3.7891900539398193, 'ave_value': -2.3116251981473184, 'soft_opc': nan} step=8300




2022-04-22 08:01.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080134/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-22 08:01.58 [debug    ] RoundIterator is selected.
2022-04-22 08:01.58 [info     ] Directory is created at d3rlpy_logs/FQE_20220422080158
2022-04-22 08:01.58 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 08:01.58 [debug    ] Building models...
2022-04-22 08:01.58 [debug    ] Models have been built.
2022-04-22 08:01.58 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422080158/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}},

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 08:01.59 [info     ] FQE_20220422080158: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00013610096864922103, 'time_algorithm_update': 0.002548621144405631, 'loss': 0.02375230310907111, 'time_step': 0.0027506178201631057, 'init_value': -1.2587553262710571, 'ave_value': -1.2261061934603228, 'soft_opc': nan} step=344




2022-04-22 08:01.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.00 [info     ] FQE_20220422080158: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00012650184853132382, 'time_algorithm_update': 0.0021830090256624445, 'loss': 0.021983248945109026, 'time_step': 0.0023688389811404916, 'init_value': -2.046069622039795, 'ave_value': -1.9804668994637222, 'soft_opc': nan} step=688




2022-04-22 08:02.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.01 [info     ] FQE_20220422080158: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.0001389439715895542, 'time_algorithm_update': 0.00262028741282086, 'loss': 0.025410553586647608, 'time_step': 0.002824915702952895, 'init_value': -2.9181466102600098, 'ave_value': -2.860038098612347, 'soft_opc': nan} step=1032




2022-04-22 08:02.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.02 [info     ] FQE_20220422080158: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00012434845746949662, 'time_algorithm_update': 0.002186529858167781, 'loss': 0.02908518989732879, 'time_step': 0.002368551354075587, 'init_value': -3.545663833618164, 'ave_value': -3.520558281954344, 'soft_opc': nan} step=1376




2022-04-22 08:02.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.03 [info     ] FQE_20220422080158: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.0001306568467339804, 'time_algorithm_update': 0.0025344962297483933, 'loss': 0.03824349530420244, 'time_step': 0.002727026163145553, 'init_value': -4.434206008911133, 'ave_value': -4.455839094237701, 'soft_opc': nan} step=1720




2022-04-22 08:02.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.04 [info     ] FQE_20220422080158: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00012859702110290527, 'time_algorithm_update': 0.0023272730583368344, 'loss': 0.04715051071252674, 'time_step': 0.002516973850338958, 'init_value': -4.983936309814453, 'ave_value': -5.026564805357306, 'soft_opc': nan} step=2064




2022-04-22 08:02.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.05 [info     ] FQE_20220422080158: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00013498719348463902, 'time_algorithm_update': 0.0025601331577744593, 'loss': 0.0600185391390263, 'time_step': 0.0027592972267505736, 'init_value': -5.8071489334106445, 'ave_value': -5.90146259103004, 'soft_opc': nan} step=2408




2022-04-22 08:02.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.05 [info     ] FQE_20220422080158: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00013132220090821732, 'time_algorithm_update': 0.0022709300351697343, 'loss': 0.07464309820775376, 'time_step': 0.002466167128363321, 'init_value': -6.451777458190918, 'ave_value': -6.603975660441158, 'soft_opc': nan} step=2752




2022-04-22 08:02.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.06 [info     ] FQE_20220422080158: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00012299418449401855, 'time_algorithm_update': 0.0020937379016432653, 'loss': 0.08954821928772469, 'time_step': 0.0022752104803573253, 'init_value': -7.054325103759766, 'ave_value': -7.309457397548196, 'soft_opc': nan} step=3096




2022-04-22 08:02.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.07 [info     ] FQE_20220422080158: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00013242973837741587, 'time_algorithm_update': 0.0024461940277454466, 'loss': 0.10731140108302582, 'time_step': 0.002644692049470059, 'init_value': -7.75650691986084, 'ave_value': -8.127447144012176, 'soft_opc': nan} step=3440




2022-04-22 08:02.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.08 [info     ] FQE_20220422080158: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.0001391768455505371, 'time_algorithm_update': 0.002301453850990118, 'loss': 0.12244612446899504, 'time_step': 0.0025009256462718166, 'init_value': -8.288689613342285, 'ave_value': -8.764319579530431, 'soft_opc': nan} step=3784




2022-04-22 08:02.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.09 [info     ] FQE_20220422080158: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00012621214223462483, 'time_algorithm_update': 0.002258194740428481, 'loss': 0.14253836666602035, 'time_step': 0.002443001713863639, 'init_value': -8.8698091506958, 'ave_value': -9.568118528444488, 'soft_opc': nan} step=4128




2022-04-22 08:02.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.10 [info     ] FQE_20220422080158: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00012379191642583801, 'time_algorithm_update': 0.002145568991816321, 'loss': 0.15740707237273455, 'time_step': 0.0023276480131371076, 'init_value': -9.322216987609863, 'ave_value': -10.248917048364072, 'soft_opc': nan} step=4472




2022-04-22 08:02.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.11 [info     ] FQE_20220422080158: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00012515034786490507, 'time_algorithm_update': 0.0021476107974385105, 'loss': 0.17367678872569528, 'time_step': 0.0023288165414056114, 'init_value': -9.94072151184082, 'ave_value': -11.279896300368279, 'soft_opc': nan} step=4816




2022-04-22 08:02.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.12 [info     ] FQE_20220422080158: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00012980020323465037, 'time_algorithm_update': 0.0023461393145627753, 'loss': 0.19164990437715207, 'time_step': 0.002537662899771402, 'init_value': -10.204931259155273, 'ave_value': -11.825385076972145, 'soft_opc': nan} step=5160




2022-04-22 08:02.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.13 [info     ] FQE_20220422080158: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00013284419858178428, 'time_algorithm_update': 0.0022834885952084565, 'loss': 0.20553921840998324, 'time_step': 0.0024796530257823854, 'init_value': -10.686257362365723, 'ave_value': -12.62979810389623, 'soft_opc': nan} step=5504




2022-04-22 08:02.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.14 [info     ] FQE_20220422080158: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00012630570766537688, 'time_algorithm_update': 0.0022240884082261906, 'loss': 0.22574540160509737, 'time_step': 0.0024142445519913075, 'init_value': -10.721061706542969, 'ave_value': -12.96774844339218, 'soft_opc': nan} step=5848




2022-04-22 08:02.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.15 [info     ] FQE_20220422080158: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.0001331803410552269, 'time_algorithm_update': 0.0023216016070787297, 'loss': 0.2431696970069911, 'time_step': 0.002515525318855463, 'init_value': -11.004018783569336, 'ave_value': -13.523990563389653, 'soft_opc': nan} step=6192




2022-04-22 08:02.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.16 [info     ] FQE_20220422080158: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00013074833293293798, 'time_algorithm_update': 0.002336650393729986, 'loss': 0.2549586015006224, 'time_step': 0.0025286937868872353, 'init_value': -11.198709487915039, 'ave_value': -14.193393084832241, 'soft_opc': nan} step=6536




2022-04-22 08:02.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.17 [info     ] FQE_20220422080158: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.0001293912876484006, 'time_algorithm_update': 0.002316863730896351, 'loss': 0.26896056383956485, 'time_step': 0.0025057563948076827, 'init_value': -11.356443405151367, 'ave_value': -14.658104308516727, 'soft_opc': nan} step=6880




2022-04-22 08:02.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.17 [info     ] FQE_20220422080158: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00012653095777644666, 'time_algorithm_update': 0.0022454150887422785, 'loss': 0.2810027254041458, 'time_step': 0.0024286231329274733, 'init_value': -11.409292221069336, 'ave_value': -15.196514427270069, 'soft_opc': nan} step=7224




2022-04-22 08:02.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.18 [info     ] FQE_20220422080158: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00013516323510990588, 'time_algorithm_update': 0.0022509305976157966, 'loss': 0.29705623430633094, 'time_step': 0.0024431327054666918, 'init_value': -11.532892227172852, 'ave_value': -15.594504803604957, 'soft_opc': nan} step=7568




2022-04-22 08:02.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.19 [info     ] FQE_20220422080158: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.0001415852890458218, 'time_algorithm_update': 0.002478509448295416, 'loss': 0.3078793012916089, 'time_step': 0.0026836963587029035, 'init_value': -11.953954696655273, 'ave_value': -16.49089010336965, 'soft_opc': nan} step=7912




2022-04-22 08:02.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.20 [info     ] FQE_20220422080158: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00012514203093772711, 'time_algorithm_update': 0.002243461997009987, 'loss': 0.32661142874357485, 'time_step': 0.0024296779965245446, 'init_value': -12.015016555786133, 'ave_value': -16.81854693480004, 'soft_opc': nan} step=8256




2022-04-22 08:02.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.21 [info     ] FQE_20220422080158: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00012873840886493061, 'time_algorithm_update': 0.0023027249546938165, 'loss': 0.33545634828962734, 'time_step': 0.002493135457815126, 'init_value': -12.143156051635742, 'ave_value': -17.309570504228272, 'soft_opc': nan} step=8600




2022-04-22 08:02.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.22 [info     ] FQE_20220422080158: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00012970109318577967, 'time_algorithm_update': 0.0023479288400605667, 'loss': 0.3412447957657711, 'time_step': 0.002538123103075249, 'init_value': -12.287592887878418, 'ave_value': -17.778628651588015, 'soft_opc': nan} step=8944




2022-04-22 08:02.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.23 [info     ] FQE_20220422080158: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.0001279462215512298, 'time_algorithm_update': 0.0023111908934837166, 'loss': 0.3401531695312452, 'time_step': 0.002498982257621233, 'init_value': -12.242494583129883, 'ave_value': -18.14259682357173, 'soft_opc': nan} step=9288




2022-04-22 08:02.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.24 [info     ] FQE_20220422080158: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00012855890185333962, 'time_algorithm_update': 0.0021531956140385116, 'loss': 0.340470049963441, 'time_step': 0.0023404401402140774, 'init_value': -12.282720565795898, 'ave_value': -18.489142593303654, 'soft_opc': nan} step=9632




2022-04-22 08:02.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.25 [info     ] FQE_20220422080158: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00013156893641449685, 'time_algorithm_update': 0.0023958412713782733, 'loss': 0.34210108979713433, 'time_step': 0.002591004205304523, 'init_value': -12.225245475769043, 'ave_value': -18.64718333461536, 'soft_opc': nan} step=9976




2022-04-22 08:02.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.26 [info     ] FQE_20220422080158: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00012777017992596294, 'time_algorithm_update': 0.0023425935312759044, 'loss': 0.3333849795758291, 'time_step': 0.002529098544009896, 'init_value': -12.049692153930664, 'ave_value': -18.631589994929428, 'soft_opc': nan} step=10320




2022-04-22 08:02.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.27 [info     ] FQE_20220422080158: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00012683799100476643, 'time_algorithm_update': 0.002186968576076419, 'loss': 0.3262370949351164, 'time_step': 0.002375141132709592, 'init_value': -11.9436616897583, 'ave_value': -18.832297812263597, 'soft_opc': nan} step=10664




2022-04-22 08:02.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.28 [info     ] FQE_20220422080158: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00013186834579290344, 'time_algorithm_update': 0.002453405496686004, 'loss': 0.31436089282790414, 'time_step': 0.0026456145353095477, 'init_value': -12.198310852050781, 'ave_value': -19.373892610035167, 'soft_opc': nan} step=11008




2022-04-22 08:02.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.29 [info     ] FQE_20220422080158: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00013175329496694166, 'time_algorithm_update': 0.0023781823557476665, 'loss': 0.3061345103900706, 'time_step': 0.0025720804236656013, 'init_value': -12.085514068603516, 'ave_value': -19.44777999884105, 'soft_opc': nan} step=11352




2022-04-22 08:02.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.30 [info     ] FQE_20220422080158: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00013128408165865167, 'time_algorithm_update': 0.0024312505888384444, 'loss': 0.3031861717314568, 'time_step': 0.0026212944540866586, 'init_value': -12.43459415435791, 'ave_value': -20.11995075277917, 'soft_opc': nan} step=11696




2022-04-22 08:02.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.31 [info     ] FQE_20220422080158: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00013055496437605038, 'time_algorithm_update': 0.002321073482202929, 'loss': 0.296411607414484, 'time_step': 0.002513314402380655, 'init_value': -12.725930213928223, 'ave_value': -20.51824205947352, 'soft_opc': nan} step=12040




2022-04-22 08:02.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.32 [info     ] FQE_20220422080158: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00013140121171640795, 'time_algorithm_update': 0.0023442291936209034, 'loss': 0.29374634005605826, 'time_step': 0.002538127261538838, 'init_value': -12.942255973815918, 'ave_value': -20.821366363898054, 'soft_opc': nan} step=12384




2022-04-22 08:02.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.32 [info     ] FQE_20220422080158: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00012108129124308741, 'time_algorithm_update': 0.0020020389279653858, 'loss': 0.28615337188983725, 'time_step': 0.0021799809710924016, 'init_value': -13.100215911865234, 'ave_value': -21.052312413958816, 'soft_opc': nan} step=12728




2022-04-22 08:02.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.33 [info     ] FQE_20220422080158: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.0001264789769815844, 'time_algorithm_update': 0.0021831760572832686, 'loss': 0.2791761675129406, 'time_step': 0.002368393332459206, 'init_value': -13.252580642700195, 'ave_value': -21.161778695357814, 'soft_opc': nan} step=13072




2022-04-22 08:02.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.34 [info     ] FQE_20220422080158: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00013236181680546252, 'time_algorithm_update': 0.0024048921673796896, 'loss': 0.2718980198422837, 'time_step': 0.0025973403176595996, 'init_value': -12.929618835449219, 'ave_value': -20.877114519181553, 'soft_opc': nan} step=13416




2022-04-22 08:02.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.35 [info     ] FQE_20220422080158: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.000132004882014075, 'time_algorithm_update': 0.0023183420647022337, 'loss': 0.26925980917524633, 'time_step': 0.002513440542442854, 'init_value': -12.85963249206543, 'ave_value': -20.833107436052313, 'soft_opc': nan} step=13760




2022-04-22 08:02.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.36 [info     ] FQE_20220422080158: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00013521175051844396, 'time_algorithm_update': 0.002347583687582681, 'loss': 0.26692989818903423, 'time_step': 0.0025471927121628163, 'init_value': -12.988863945007324, 'ave_value': -20.82338464602425, 'soft_opc': nan} step=14104




2022-04-22 08:02.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.37 [info     ] FQE_20220422080158: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00013225300367488416, 'time_algorithm_update': 0.0023596827373948207, 'loss': 0.2619270833187498, 'time_step': 0.0025573449079380477, 'init_value': -13.221151351928711, 'ave_value': -20.964073223869004, 'soft_opc': nan} step=14448




2022-04-22 08:02.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.38 [info     ] FQE_20220422080158: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00013036783351454625, 'time_algorithm_update': 0.00230595400167066, 'loss': 0.26459166236481696, 'time_step': 0.0025007177230923676, 'init_value': -13.52177619934082, 'ave_value': -21.258198896855923, 'soft_opc': nan} step=14792




2022-04-22 08:02.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.39 [info     ] FQE_20220422080158: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00013319628183231798, 'time_algorithm_update': 0.002255229062812273, 'loss': 0.27333091537814674, 'time_step': 0.002449647631756095, 'init_value': -14.112728118896484, 'ave_value': -21.70978955975524, 'soft_opc': nan} step=15136




2022-04-22 08:02.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.40 [info     ] FQE_20220422080158: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00012839880100516386, 'time_algorithm_update': 0.0023845087650210357, 'loss': 0.27902446596255137, 'time_step': 0.0025744729263837947, 'init_value': -14.06936264038086, 'ave_value': -21.621431067881282, 'soft_opc': nan} step=15480




2022-04-22 08:02.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.41 [info     ] FQE_20220422080158: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.0001326792461927547, 'time_algorithm_update': 0.0023117765437724977, 'loss': 0.2871053430365485, 'time_step': 0.0025071411631828132, 'init_value': -14.664502143859863, 'ave_value': -22.160980910438674, 'soft_opc': nan} step=15824




2022-04-22 08:02.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.42 [info     ] FQE_20220422080158: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.0001297253508900487, 'time_algorithm_update': 0.0024080331935439, 'loss': 0.29601606846310546, 'time_step': 0.002599635789560717, 'init_value': -14.885622024536133, 'ave_value': -22.428259178459108, 'soft_opc': nan} step=16168




2022-04-22 08:02.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.43 [info     ] FQE_20220422080158: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00013081971989121547, 'time_algorithm_update': 0.0023328703503276028, 'loss': 0.3024856594504875, 'time_step': 0.002526908419853033, 'init_value': -15.184078216552734, 'ave_value': -22.578897243429413, 'soft_opc': nan} step=16512




2022-04-22 08:02.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.44 [info     ] FQE_20220422080158: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00013336331345314203, 'time_algorithm_update': 0.0023616538491359976, 'loss': 0.30704678876899444, 'time_step': 0.0025595364182494406, 'init_value': -15.289159774780273, 'ave_value': -22.68722907563856, 'soft_opc': nan} step=16856




2022-04-22 08:02.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:02.45 [info     ] FQE_20220422080158: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00013170062109481458, 'time_algorithm_update': 0.0024003927097764124, 'loss': 0.30898768996392095, 'time_step': 0.002598343893539074, 'init_value': -15.283400535583496, 'ave_value': -22.665025367607942, 'soft_opc': nan} step=17200




2022-04-22 08:02.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422080158/model_17200.pt
search iteration:  33
using hyper params:  [0.0031866426579564757, 0.001543315999472548, 8.973127253826645e-05, 3]
2022-04-22 08:02.45 [debug    ] RoundIterator is selected.
2022-04-22 08:02.45 [info     ] Directory is created at d3rlpy_logs/CQL_20220422080245
2022-04-22 08:02.45 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 08:02.45 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 08:02.45 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422080245/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.0031866426579564757, 'actor_optim_factory': {'opt

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:02.57 [info     ] CQL_20220422080245: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.00034527282494341017, 'time_algorithm_update': 0.03435670020263319, 'temp_loss': 4.826394029435395, 'temp': 0.983823431881866, 'alpha_loss': -17.688778607142453, 'alpha': 1.0177311986857067, 'critic_loss': 77.99773506208652, 'actor_loss': -0.2706133769448265, 'time_step': 0.034789624241735206, 'td_error': 1.2554214492237552, 'init_value': -3.0572948455810547, 'ave_value': -2.718928201853056} step=346
2022-04-22 08:02.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:03.10 [info     ] CQL_20220422080245: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.0003489986320451505, 'time_algorithm_update': 0.0348414913078264, 'temp_loss': 4.811449347203866, 'temp': 0.95333645367898, 'alpha_loss': -18.362216999076004, 'alpha': 1.0542289697365954, 'critic_loss': 75.229206724663, 'actor_loss': 3.3629155458742486, 'time_step': 0.03528640173763209, 'td_error': 1.2607207240774077, 'init_value': -5.38028621673584, 'ave_value': -4.85787120330328} step=692
2022-04-22 08:03.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:03.22 [info     ] CQL_20220422080245: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.00035349895499345194, 'time_algorithm_update': 0.03479354092151443, 'temp_loss': 4.667935149518051, 'temp': 0.9246515988269982, 'alpha_loss': -19.014535639327384, 'alpha': 1.092500544696874, 'critic_loss': 124.75452692109036, 'actor_loss': 6.316784286774652, 'time_step': 0.03523865256006318, 'td_error': 1.3207044420747036, 'init_value': -8.354750633239746, 'ave_value': -7.771152110572334} step=1038
2022-04-22 08:03.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:03.35 [info     ] CQL_20220422080245: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.0003455491424295944, 'time_algorithm_update': 0.0345060563500906, 'temp_loss': 4.530243825361219, 'temp': 0.8972598074833092, 'alpha_loss': -19.707081965628387, 'alpha': 1.132657110346535, 'critic_loss': 195.4097172731609, 'actor_loss': 8.629061281336526, 'time_step': 0.0349479679427395, 'td_error': 1.348417716782855, 'init_value': -10.066010475158691, 'ave_value': -9.421239840334318} step=1384
2022-04-22 08:03.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:03.47 [info     ] CQL_20220422080245: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.000350962484503068, 'time_algorithm_update': 0.035304103283523826, 'temp_loss': 4.396631695631611, 'temp': 0.8709803855832602, 'alpha_loss': -20.44209193080836, 'alpha': 1.1747478353494853, 'critic_loss': 286.9324227922914, 'actor_loss': 10.307785050717392, 'time_step': 0.03575284977179731, 'td_error': 1.3867887800562395, 'init_value': -11.559976577758789, 'ave_value': -10.952789115204864} step=1730
2022-04-22 08:03.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:04.00 [info     ] CQL_20220422080245: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.00033728511347246997, 'time_algorithm_update': 0.03455849327793011, 'temp_loss': 4.26958587817374, 'temp': 0.8456972399888011, 'alpha_loss': -21.203451955938615, 'alpha': 1.2188142283114394, 'critic_loss': 398.40824449131253, 'actor_loss': 11.013079370377381, 'time_step': 0.0349925942503648, 'td_error': 1.401978398383896, 'init_value': -12.039297103881836, 'ave_value': -11.47148592407639} step=2076
2022-04-22 08:04.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:04.12 [info     ] CQL_20220422080245: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.0003472924921553948, 'time_algorithm_update': 0.03461402206751653, 'temp_loss': 4.146244947620899, 'temp': 0.8213246561199254, 'alpha_loss': -22.0014931397631, 'alpha': 1.264880311971455, 'critic_loss': 528.0209833045915, 'actor_loss': 10.594207909754935, 'time_step': 0.035058066335027616, 'td_error': 1.3694722503665375, 'init_value': -10.955029487609863, 'ave_value': -10.525244671793105} step=2422
2022-04-22 08:04.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:04.25 [info     ] CQL_20220422080245: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.0003484142998050403, 'time_algorithm_update': 0.03541626199821516, 'temp_loss': 4.0280545543384, 'temp': 0.7977945430774909, 'alpha_loss': -22.837323751063707, 'alpha': 1.3129925724398883, 'critic_loss': 678.0484253988101, 'actor_loss': 8.970835374269871, 'time_step': 0.03586051505425073, 'td_error': 1.3336139493051613, 'init_value': -8.965859413146973, 'ave_value': -8.68758390636001} step=2768
2022-04-22 08:04.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:04.38 [info     ] CQL_20220422080245: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.00034889320417635705, 'time_algorithm_update': 0.03461915426860655, 'temp_loss': 3.9135337206669627, 'temp': 0.7750429387037465, 'alpha_loss': -23.706109587167727, 'alpha': 1.3631894650486853, 'critic_loss': 851.4767579536217, 'actor_loss': 6.371612154679491, 'time_step': 0.03506898880004883, 'td_error': 1.2943143249351008, 'init_value': -6.352205276489258, 'ave_value': -6.203076343866478} step=3114
2022-04-22 08:04.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:04.50 [info     ] CQL_20220422080245: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.0003496711653781075, 'time_algorithm_update': 0.03517975834752783, 'temp_loss': 3.8027342826644808, 'temp': 0.753024651480548, 'alpha_loss': -24.61458936040801, 'alpha': 1.4155176912429015, 'critic_loss': 1042.0992934386854, 'actor_loss': 4.0081723142910555, 'time_step': 0.03562394663088583, 'td_error': 1.2791523018691777, 'init_value': -4.665914058685303, 'ave_value': -4.586643689229286} step=3460
2022-04-22 08:04.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:05.03 [info     ] CQL_20220422080245: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.0003429175801359849, 'time_algorithm_update': 0.03486190021382591, 'temp_loss': 3.6952569877481185, 'temp': 0.7316967894920724, 'alpha_loss': -25.562583333495034, 'alpha': 1.4700327477703206, 'critic_loss': 1238.2763848277186, 'actor_loss': 2.9873777221393034, 'time_step': 0.035299880656203785, 'td_error': 1.2773549163152393, 'init_value': -4.176904201507568, 'ave_value': -4.128758111391208} step=3806
2022-04-22 08:05.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:05.15 [info     ] CQL_20220422080245: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.0003416228156558351, 'time_algorithm_update': 0.034448926848483226, 'temp_loss': 3.5908097786710442, 'temp': 0.7110234883134765, 'alpha_loss': -26.54870140759242, 'alpha': 1.5267963233710713, 'critic_loss': 1434.9316021693235, 'actor_loss': 2.6781857055046654, 'time_step': 0.03487455982693358, 'td_error': 1.2778772885698573, 'init_value': -4.023139953613281, 'ave_value': -3.9863338230796876} step=4152
2022-04-22 08:05.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:05.28 [info     ] CQL_20220422080245: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.000346634429314233, 'time_algorithm_update': 0.03537442022665387, 'temp_loss': 3.4894277546447134, 'temp': 0.6909746519058426, 'alpha_loss': -27.580749070713285, 'alpha': 1.58586640957463, 'critic_loss': 1637.8510611649883, 'actor_loss': 2.626551936127547, 'time_step': 0.03581408269143518, 'td_error': 1.2786642729058222, 'init_value': -3.9699695110321045, 'ave_value': -3.9445838869437613} step=4498
2022-04-22 08:05.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:05.40 [info     ] CQL_20220422080245: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.0003461644828664085, 'time_algorithm_update': 0.03408373573611927, 'temp_loss': 3.392059760286629, 'temp': 0.6715197146283409, 'alpha_loss': -28.644277875823093, 'alpha': 1.647320505511554, 'critic_loss': 1848.8672541800263, 'actor_loss': 2.6456491802469153, 'time_step': 0.034516606716751365, 'td_error': 1.279853497048486, 'init_value': -3.9921340942382812, 'ave_value': -3.9697389857245597} step=4844
2022-04-22 08:05.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:05.53 [info     ] CQL_20220422080245: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.0003422567610106716, 'time_algorithm_update': 0.034334473527235794, 'temp_loss': 3.296554317364114, 'temp': 0.6526337665629525, 'alpha_loss': -29.756564950667364, 'alpha': 1.7112208422208797, 'critic_loss': 2068.793916735346, 'actor_loss': 2.7064170651353163, 'time_step': 0.03476466746688578, 'td_error': 1.2802764358706746, 'init_value': -3.932255506515503, 'ave_value': -3.915155066693208} step=5190
2022-04-22 08:05.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:06.05 [info     ] CQL_20220422080245: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.0003605012948802441, 'time_algorithm_update': 0.03415667666176151, 'temp_loss': 3.2035272183445835, 'temp': 0.6342982598122834, 'alpha_loss': -30.910372425366, 'alpha': 1.7776607595427187, 'critic_loss': 2297.091711496342, 'actor_loss': 2.7699601326374648, 'time_step': 0.03460553546861417, 'td_error': 1.2820681671192853, 'init_value': -4.027817726135254, 'ave_value': -4.012698580360684} step=5536
2022-04-22 08:06.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:06.17 [info     ] CQL_20220422080245: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.0003441716894249006, 'time_algorithm_update': 0.0336116311178042, 'temp_loss': 3.1140796683427228, 'temp': 0.6164897623434232, 'alpha_loss': -32.11087891683413, 'alpha': 1.8467222245442385, 'critic_loss': 2536.3867053434337, 'actor_loss': 2.9010609305662918, 'time_step': 0.034041482589148374, 'td_error': 1.2838900866754808, 'init_value': -4.1232476234436035, 'ave_value': -4.110356179567918} step=5882
2022-04-22 08:06.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:06.29 [info     ] CQL_20220422080245: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.0003505125211153416, 'time_algorithm_update': 0.033818042347196896, 'temp_loss': 3.026893524076208, 'temp': 0.5991907950081578, 'alpha_loss': -33.361424275216336, 'alpha': 1.9185061161917758, 'critic_loss': 2773.5951993203576, 'actor_loss': 3.028458377529431, 'time_step': 0.03425524758465717, 'td_error': 1.2855113296212994, 'init_value': -4.222989082336426, 'ave_value': -4.211122815605587} step=6228
2022-04-22 08:06.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:06.42 [info     ] CQL_20220422080245: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.00034303127685723277, 'time_algorithm_update': 0.034427455394943324, 'temp_loss': 2.9417575845828634, 'temp': 0.5823857222678345, 'alpha_loss': -34.660723702755966, 'alpha': 1.9931132283513946, 'critic_loss': 3016.3265959458545, 'actor_loss': 3.16446597727737, 'time_step': 0.034856314603992966, 'td_error': 1.2871706840050967, 'init_value': -4.303289890289307, 'ave_value': -4.294231411358258} step=6574
2022-04-22 08:06.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:06.54 [info     ] CQL_20220422080245: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.00034012132986432556, 'time_algorithm_update': 0.03416166967050189, 'temp_loss': 2.858703086141906, 'temp': 0.5660586164176808, 'alpha_loss': -36.00664310786076, 'alpha': 2.0706396530129316, 'critic_loss': 3265.770826747652, 'actor_loss': 3.3222968240693813, 'time_step': 0.03459238730414065, 'td_error': 1.2893951021733323, 'init_value': -4.4503631591796875, 'ave_value': -4.4418371092035285} step=6920
2022-04-22 08:06.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:07.06 [info     ] CQL_20220422080245: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.0003400951451648866, 'time_algorithm_update': 0.033964509219792535, 'temp_loss': 2.7793167174896065, 'temp': 0.5501930059036079, 'alpha_loss': -37.40925731548684, 'alpha': 2.1511973886820623, 'critic_loss': 3518.6705032966042, 'actor_loss': 3.4782190536488, 'time_step': 0.03438909756654949, 'td_error': 1.291915639025403, 'init_value': -4.6155877113342285, 'ave_value': -4.607435882119784} step=7266
2022-04-22 08:07.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:07.19 [info     ] CQL_20220422080245: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.0003403273621046474, 'time_algorithm_update': 0.03465575496585383, 'temp_loss': 2.700340334390629, 'temp': 0.5347754752015792, 'alpha_loss': -38.86417790230988, 'alpha': 2.234899165313368, 'critic_loss': 3806.947418036488, 'actor_loss': 3.6602118352934117, 'time_step': 0.03508362053446687, 'td_error': 1.2944465975399975, 'init_value': -4.777637481689453, 'ave_value': -4.769553804013219} step=7612
2022-04-22 08:07.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:07.31 [info     ] CQL_20220422080245: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.00033796247030269206, 'time_algorithm_update': 0.03405849092957602, 'temp_loss': 2.6258294940683884, 'temp': 0.5197917596797723, 'alpha_loss': -40.378267828439704, 'alpha': 2.3218699287127897, 'critic_loss': 4085.1278817061057, 'actor_loss': 3.843748438565028, 'time_step': 0.03448355542442013, 'td_error': 1.29696929602063, 'init_value': -4.926385402679443, 'ave_value': -4.919389190239723} step=7958
2022-04-22 08:07.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:07.43 [info     ] CQL_20220422080245: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.00034375686866010546, 'time_algorithm_update': 0.034569749942404686, 'temp_loss': 2.5518675188108677, 'temp': 0.5052282600905854, 'alpha_loss': -41.94679421220901, 'alpha': 2.412228855094469, 'critic_loss': 4359.345312217756, 'actor_loss': 4.029585039684538, 'time_step': 0.03501061690336018, 'td_error': 1.2986417979190576, 'init_value': -5.011935234069824, 'ave_value': -5.007376253734894} step=8304
2022-04-22 08:07.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:07.56 [info     ] CQL_20220422080245: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.00035553378176826964, 'time_algorithm_update': 0.03486059718049331, 'temp_loss': 2.479911588519984, 'temp': 0.4910753144521934, 'alpha_loss': -43.580276191579124, 'alpha': 2.506108556179642, 'critic_loss': 4636.407292889722, 'actor_loss': 4.211313087127112, 'time_step': 0.03531508638679637, 'td_error': 1.3023325061977256, 'init_value': -5.2560601234436035, 'ave_value': -5.250100090493518} step=8650
2022-04-22 08:07.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:08.09 [info     ] CQL_20220422080245: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.00034518117849537404, 'time_algorithm_update': 0.03466291992650556, 'temp_loss': 2.4111013632978318, 'temp': 0.47731941662771854, 'alpha_loss': -45.27444566054151, 'alpha': 2.6036446955851735, 'critic_loss': 4876.2627094246745, 'actor_loss': 4.38067597736513, 'time_step': 0.03510132276942964, 'td_error': 1.3039428546880918, 'init_value': -5.333477973937988, 'ave_value': -5.329346632290474} step=8996
2022-04-22 08:08.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:08.21 [info     ] CQL_20220422080245: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.00033818366210584697, 'time_algorithm_update': 0.034206586077034126, 'temp_loss': 2.342920036674235, 'temp': 0.46394917592836943, 'alpha_loss': -47.03137961150594, 'alpha': 2.7049792798268313, 'critic_loss': 5067.325140275018, 'actor_loss': 4.55426709913794, 'time_step': 0.0346380189664102, 'td_error': 1.305398548588353, 'init_value': -5.399868965148926, 'ave_value': -5.397646212227371} step=9342
2022-04-22 08:08.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:08.33 [info     ] CQL_20220422080245: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.00035976536701180344, 'time_algorithm_update': 0.03493083350231193, 'temp_loss': 2.2782405794011376, 'temp': 0.4509533974476632, 'alpha_loss': -48.862932822607846, 'alpha': 2.8102643407149124, 'critic_loss': 5269.59374435513, 'actor_loss': 4.744891549810509, 'time_step': 0.03537623386162554, 'td_error': 1.310230017532331, 'init_value': -5.7146711349487305, 'ave_value': -5.71023214131297} step=9688
2022-04-22 08:08.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:08.46 [info     ] CQL_20220422080245: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.0003518520752129527, 'time_algorithm_update': 0.03535402579114616, 'temp_loss': 2.2138959010901478, 'temp': 0.4383217769895675, 'alpha_loss': -50.76867638295786, 'alpha': 2.9196568527662685, 'critic_loss': 5380.983107726698, 'actor_loss': 4.899996436400221, 'time_step': 0.03580574975537427, 'td_error': 1.3142136272807756, 'init_value': -5.952569484710693, 'ave_value': -5.947531735857497} step=10034
2022-04-22 08:08.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:08.59 [info     ] CQL_20220422080245: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.0003551492801291405, 'time_algorithm_update': 0.035375734285123084, 'temp_loss': 2.151705043853363, 'temp': 0.426045647332434, 'alpha_loss': -52.73997628757719, 'alpha': 3.033308929790651, 'critic_loss': 5607.468064148302, 'actor_loss': 5.154349349137676, 'time_step': 0.035822148267933396, 'td_error': 1.3162254557718394, 'init_value': -6.043872833251953, 'ave_value': -6.040902518593004} step=10380
2022-04-22 08:08.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:09.11 [info     ] CQL_20220422080245: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.00034467264406942905, 'time_algorithm_update': 0.03415041576231146, 'temp_loss': 2.0914748714149343, 'temp': 0.4141126678169118, 'alpha_loss': -54.80631184440128, 'alpha': 3.151378668801633, 'critic_loss': 5835.349829524927, 'actor_loss': 5.3693845451222675, 'time_step': 0.034591039481190584, 'td_error': 1.3209614618466412, 'init_value': -6.323646068572998, 'ave_value': -6.3193611470598245} step=10726
2022-04-22 08:09.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:09.24 [info     ] CQL_20220422080245: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.0003559182834073987, 'time_algorithm_update': 0.0352507926136083, 'temp_loss': 2.032578680901169, 'temp': 0.4025157030262699, 'alpha_loss': -56.9291448538014, 'alpha': 3.274065582049375, 'critic_loss': 6139.6595254357835, 'actor_loss': 5.583563581367448, 'time_step': 0.03569687239696525, 'td_error': 1.3233390470410005, 'init_value': -6.435410976409912, 'ave_value': -6.432692959434108} step=11072
2022-04-22 08:09.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:09.36 [info     ] CQL_20220422080245: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.00034672745390434484, 'time_algorithm_update': 0.03449167681567242, 'temp_loss': 1.9759897513885718, 'temp': 0.3912431979282743, 'alpha_loss': -59.15052581522506, 'alpha': 3.401508276862216, 'critic_loss': 6390.10888107388, 'actor_loss': 5.782434067974201, 'time_step': 0.03492980747553655, 'td_error': 1.3273318992222523, 'init_value': -6.653659820556641, 'ave_value': -6.650399636837927} step=11418
2022-04-22 08:09.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:09.49 [info     ] CQL_20220422080245: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.0003492797730285997, 'time_algorithm_update': 0.034980093812666875, 'temp_loss': 1.9207785849626353, 'temp': 0.38028526435353166, 'alpha_loss': -61.44374156136044, 'alpha': 3.5339190470689985, 'critic_loss': 6558.3655645998915, 'actor_loss': 5.964767647616436, 'time_step': 0.03542708592607796, 'td_error': 1.326789294179266, 'init_value': -6.583569049835205, 'ave_value': -6.584535622449778} step=11764
2022-04-22 08:09.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:10.01 [info     ] CQL_20220422080245: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.0003411955916123583, 'time_algorithm_update': 0.034594836951680266, 'temp_loss': 1.8669001067304887, 'temp': 0.3696330708230851, 'alpha_loss': -63.85213027248493, 'alpha': 3.67148523937071, 'critic_loss': 6160.604194420611, 'actor_loss': 5.960074540507587, 'time_step': 0.03502297194706911, 'td_error': 1.33024972641678, 'init_value': -6.775099277496338, 'ave_value': -6.77529958812701} step=12110
2022-04-22 08:10.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:10.14 [info     ] CQL_20220422080245: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.0003465793036312037, 'time_algorithm_update': 0.03431782074746369, 'temp_loss': 1.8146281507662956, 'temp': 0.35928035376733436, 'alpha_loss': -66.33386239288859, 'alpha': 3.8144093255776204, 'critic_loss': 5918.358401259935, 'actor_loss': 6.167710523384844, 'time_step': 0.03475458566853077, 'td_error': 1.3329369329679317, 'init_value': -6.909717082977295, 'ave_value': -6.910575763961047} step=12456
2022-04-22 08:10.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:10.26 [info     ] CQL_20220422080245: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.0003551403222056483, 'time_algorithm_update': 0.034690962361462546, 'temp_loss': 1.7636557676199545, 'temp': 0.3492179904886753, 'alpha_loss': -68.9200626218939, 'alpha': 3.9628991743043667, 'critic_loss': 5539.240693020683, 'actor_loss': 6.3186727603735955, 'time_step': 0.0351347020595749, 'td_error': 1.3375897696179697, 'init_value': -7.158078670501709, 'ave_value': -7.1576382352594745} step=12802
2022-04-22 08:10.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:10.39 [info     ] CQL_20220422080245: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.00034527144680133445, 'time_algorithm_update': 0.03529132583926868, 'temp_loss': 1.7140069000982825, 'temp': 0.33943839052509023, 'alpha_loss': -71.59831233382914, 'alpha': 4.117154474203297, 'critic_loss': 5394.154713184158, 'actor_loss': 6.594504947607228, 'time_step': 0.03572976589202881, 'td_error': 1.3420998262920922, 'init_value': -7.385812282562256, 'ave_value': -7.384942949095175} step=13148
2022-04-22 08:10.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:10.52 [info     ] CQL_20220422080245: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.00034084416538304675, 'time_algorithm_update': 0.03466351183852708, 'temp_loss': 1.6663835086574443, 'temp': 0.3299313750914756, 'alpha_loss': -74.39233804162527, 'alpha': 4.277438369100493, 'critic_loss': 5184.651487140986, 'actor_loss': 6.816180502059143, 'time_step': 0.03509414540549923, 'td_error': 1.3460487588683516, 'init_value': -7.5738525390625, 'ave_value': -7.57340596155173} step=13494
2022-04-22 08:10.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:11.04 [info     ] CQL_20220422080245: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.00033615917139659726, 'time_algorithm_update': 0.03329904231032884, 'temp_loss': 1.6195597714082355, 'temp': 0.3206915731374928, 'alpha_loss': -77.26912636288328, 'alpha': 4.443931804227002, 'critic_loss': 4985.976000835441, 'actor_loss': 7.068914873751602, 'time_step': 0.03372560415653824, 'td_error': 1.3508599168051008, 'init_value': -7.8042426109313965, 'ave_value': -7.803719965164669} step=13840
2022-04-22 08:11.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:11.16 [info     ] CQL_20220422080245: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.00033792526046664726, 'time_algorithm_update': 0.034403271757798386, 'temp_loss': 1.5747934672873833, 'temp': 0.31170886711922685, 'alpha_loss': -80.27714249715639, 'alpha': 4.616893605689782, 'critic_loss': 4808.639065604679, 'actor_loss': 7.330134915478657, 'time_step': 0.03482801101111263, 'td_error': 1.355583226706833, 'init_value': -8.01961898803711, 'ave_value': -8.019684816797291} step=14186
2022-04-22 08:11.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:11.28 [info     ] CQL_20220422080245: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.0003390629167501637, 'time_algorithm_update': 0.03446584492060491, 'temp_loss': 1.5303674619321879, 'temp': 0.30297692412930416, 'alpha_loss': -83.42370528292794, 'alpha': 4.796611663234027, 'critic_loss': 4625.809639462157, 'actor_loss': 7.598434001724155, 'time_step': 0.03489845129795846, 'td_error': 1.3627180134031578, 'init_value': -8.357105255126953, 'ave_value': -8.35565951018607} step=14532
2022-04-22 08:11.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:11.41 [info     ] CQL_20220422080245: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.0003486658107338613, 'time_algorithm_update': 0.034573847158795835, 'temp_loss': 1.4871753747063565, 'temp': 0.2944917230936833, 'alpha_loss': -86.6713111524637, 'alpha': 4.983344237928446, 'critic_loss': 4636.601802406973, 'actor_loss': 7.968391333012223, 'time_step': 0.035018269726306714, 'td_error': 1.3715864558054658, 'init_value': -8.764442443847656, 'ave_value': -8.761202797362905} step=14878
2022-04-22 08:11.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:11.53 [info     ] CQL_20220422080245: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.0003460418282216684, 'time_algorithm_update': 0.03438710890753421, 'temp_loss': 1.4457472914216147, 'temp': 0.28624427757855786, 'alpha_loss': -90.03941642893533, 'alpha': 5.177337698853774, 'critic_loss': 4870.910247979136, 'actor_loss': 8.360686575057189, 'time_step': 0.03482954005974566, 'td_error': 1.3782856981339044, 'init_value': -9.043306350708008, 'ave_value': -9.04088981225062} step=15224
2022-04-22 08:11.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:12.06 [info     ] CQL_20220422080245: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.0003426598675678231, 'time_algorithm_update': 0.03482465523515822, 'temp_loss': 1.4052000879552322, 'temp': 0.2782268673181534, 'alpha_loss': -93.52900320946137, 'alpha': 5.37886000093008, 'critic_loss': 5006.370012757406, 'actor_loss': 8.658515932932065, 'time_step': 0.03526189079174417, 'td_error': 1.3858450163457066, 'init_value': -9.362184524536133, 'ave_value': -9.3591926145124} step=15570
2022-04-22 08:12.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:12.18 [info     ] CQL_20220422080245: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.00033727615554897774, 'time_algorithm_update': 0.035142423789625225, 'temp_loss': 1.3658184219647005, 'temp': 0.27043433737203565, 'alpha_loss': -97.1818571145824, 'alpha': 5.588239140593248, 'critic_loss': 5200.056077549223, 'actor_loss': 8.969292582804067, 'time_step': 0.0355690125096051, 'td_error': 1.3926732857205577, 'init_value': -9.63386344909668, 'ave_value': -9.632229946709854} step=15916
2022-04-22 08:12.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:12.31 [info     ] CQL_20220422080245: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.00034707612384950496, 'time_algorithm_update': 0.035606515200840946, 'temp_loss': 1.3273712331159955, 'temp': 0.26286086069710685, 'alpha_loss': -100.9513118060338, 'alpha': 5.805757510179729, 'critic_loss': 5350.7568782740245, 'actor_loss': 9.240541284483982, 'time_step': 0.03604431166125171, 'td_error': 1.3963959055487631, 'init_value': -9.76485538482666, 'ave_value': -9.765083457328178} step=16262
2022-04-22 08:12.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:12.44 [info     ] CQL_20220422080245: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.0003515661107322384, 'time_algorithm_update': 0.03648402236100566, 'temp_loss': 1.290281480791941, 'temp': 0.25549893537697765, 'alpha_loss': -104.89607926738056, 'alpha': 6.031754661846712, 'critic_loss': 5254.613605830022, 'actor_loss': 9.425715077130091, 'time_step': 0.036924402148737386, 'td_error': 1.403407558307667, 'init_value': -10.051406860351562, 'ave_value': -10.050336347049083} step=16608
2022-04-22 08:12.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:12.57 [info     ] CQL_20220422080245: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.00034569591456065976, 'time_algorithm_update': 0.034750637291483796, 'temp_loss': 1.2542397372984473, 'temp': 0.24834232708449996, 'alpha_loss': -108.96319549207743, 'alpha': 6.266508674345953, 'critic_loss': 4982.9496491713335, 'actor_loss': 9.616479005427719, 'time_step': 0.03519041552019946, 'td_error': 1.4074405535512708, 'init_value': -10.19363784790039, 'ave_value': -10.194331309036826} step=16954
2022-04-22 08:12.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:13.10 [info     ] CQL_20220422080245: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.0003471415855981022, 'time_algorithm_update': 0.0347250458822085, 'temp_loss': 1.2191243657486976, 'temp': 0.24138705802790691, 'alpha_loss': -113.2252369257756, 'alpha': 6.510445370150439, 'critic_loss': 4816.361555331016, 'actor_loss': 9.86406851917333, 'time_step': 0.03516802484589505, 'td_error': 1.4129245606568617, 'init_value': -10.404281616210938, 'ave_value': -10.404775777683827} step=17300
2022-04-22 08:13.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422080245/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.51910

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 08:13.10 [info     ] FQE_20220422081310: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00012182471263839538, 'time_algorithm_update': 0.0021531797317137203, 'loss': 0.007265087985714157, 'time_step': 0.002331611621810729, 'init_value': -0.5710181593894958, 'ave_value': -0.5252894979406585, 'soft_opc': nan} step=166




2022-04-22 08:13.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.11 [info     ] FQE_20220422081310: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.0001233931047370635, 'time_algorithm_update': 0.002146967922348574, 'loss': 0.004881821927636951, 'time_step': 0.0023286687322409756, 'init_value': -0.6409751772880554, 'ave_value': -0.5500968712608556, 'soft_opc': nan} step=332




2022-04-22 08:13.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.11 [info     ] FQE_20220422081310: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00012239490646913828, 'time_algorithm_update': 0.0023624092699533485, 'loss': 0.0043057432722199305, 'time_step': 0.0025431808218898543, 'init_value': -0.6717336773872375, 'ave_value': -0.5507292439271738, 'soft_opc': nan} step=498




2022-04-22 08:13.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.11 [info     ] FQE_20220422081310: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.0001243640141314771, 'time_algorithm_update': 0.0021835077239806392, 'loss': 0.00401558440896761, 'time_step': 0.0023674476577574947, 'init_value': -0.7461661100387573, 'ave_value': -0.5898122299496118, 'soft_opc': nan} step=664




2022-04-22 08:13.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.12 [info     ] FQE_20220422081310: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.0001246225403015872, 'time_algorithm_update': 0.0023904622319232986, 'loss': 0.003785421313011054, 'time_step': 0.0025759992829288343, 'init_value': -0.7932473421096802, 'ave_value': -0.602619372617017, 'soft_opc': nan} step=830




2022-04-22 08:13.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.12 [info     ] FQE_20220422081310: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00013284510876758988, 'time_algorithm_update': 0.0027575061981936536, 'loss': 0.0036684220841895028, 'time_step': 0.002955129347651838, 'init_value': -0.8103803992271423, 'ave_value': -0.6031903257106875, 'soft_opc': nan} step=996




2022-04-22 08:13.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.13 [info     ] FQE_20220422081310: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00011967463665697948, 'time_algorithm_update': 0.002385837485991329, 'loss': 0.003607898284344519, 'time_step': 0.0025652575205607586, 'init_value': -0.8858991265296936, 'ave_value': -0.648765799644831, 'soft_opc': nan} step=1162




2022-04-22 08:13.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.13 [info     ] FQE_20220422081310: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00011572062251079513, 'time_algorithm_update': 0.002039620675236346, 'loss': 0.0033677254581868828, 'time_step': 0.0022094852953072055, 'init_value': -0.9017535448074341, 'ave_value': -0.6589762340861934, 'soft_opc': nan} step=1328




2022-04-22 08:13.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.14 [info     ] FQE_20220422081310: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00011689117155879377, 'time_algorithm_update': 0.0021128927368715585, 'loss': 0.0031419176950557045, 'time_step': 0.002281780702522002, 'init_value': -0.8781564235687256, 'ave_value': -0.6313226144962214, 'soft_opc': nan} step=1494




2022-04-22 08:13.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.14 [info     ] FQE_20220422081310: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00012385988809976233, 'time_algorithm_update': 0.002234068261571677, 'loss': 0.003103182991932943, 'time_step': 0.0024228900311941124, 'init_value': -0.9052074551582336, 'ave_value': -0.6498373610277971, 'soft_opc': nan} step=1660




2022-04-22 08:13.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.15 [info     ] FQE_20220422081310: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00012123728372964514, 'time_algorithm_update': 0.0021592378616333008, 'loss': 0.0029244834991278552, 'time_step': 0.0023380201983164593, 'init_value': -0.9193418025970459, 'ave_value': -0.6530980844654747, 'soft_opc': nan} step=1826




2022-04-22 08:13.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.15 [info     ] FQE_20220422081310: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00013310507119420063, 'time_algorithm_update': 0.002571026962923716, 'loss': 0.002938780468307907, 'time_step': 0.0027705517159887106, 'init_value': -0.9232383966445923, 'ave_value': -0.6582250862684403, 'soft_opc': nan} step=1992




2022-04-22 08:13.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.16 [info     ] FQE_20220422081310: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00012616077101374246, 'time_algorithm_update': 0.0023247448794813043, 'loss': 0.002941013386063502, 'time_step': 0.0025108650506260885, 'init_value': -0.9318655133247375, 'ave_value': -0.6588613126225568, 'soft_opc': nan} step=2158




2022-04-22 08:13.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.16 [info     ] FQE_20220422081310: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00012898588755044592, 'time_algorithm_update': 0.0024968299521021097, 'loss': 0.0031912213525207467, 'time_step': 0.0026896416422832444, 'init_value': -0.9472677707672119, 'ave_value': -0.6847794891175655, 'soft_opc': nan} step=2324




2022-04-22 08:13.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.17 [info     ] FQE_20220422081310: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00012280423957181265, 'time_algorithm_update': 0.0022433479148221305, 'loss': 0.0032816832865509822, 'time_step': 0.0024224447916789227, 'init_value': -0.9818465113639832, 'ave_value': -0.6979794608567514, 'soft_opc': nan} step=2490




2022-04-22 08:13.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.17 [info     ] FQE_20220422081310: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00012443582695650766, 'time_algorithm_update': 0.0024101102208516686, 'loss': 0.0035497151065901123, 'time_step': 0.002593138131750635, 'init_value': -0.9834224581718445, 'ave_value': -0.7038769515441009, 'soft_opc': nan} step=2656




2022-04-22 08:13.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.17 [info     ] FQE_20220422081310: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00013283074620258376, 'time_algorithm_update': 0.002303307314953172, 'loss': 0.0039037844574314555, 'time_step': 0.0024959006941462137, 'init_value': -1.079681396484375, 'ave_value': -0.7889049125222808, 'soft_opc': nan} step=2822




2022-04-22 08:13.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.18 [info     ] FQE_20220422081310: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00012330405683402555, 'time_algorithm_update': 0.002286842070430158, 'loss': 0.00425811233041045, 'time_step': 0.002469862800046622, 'init_value': -1.0780489444732666, 'ave_value': -0.7874727187283874, 'soft_opc': nan} step=2988




2022-04-22 08:13.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.18 [info     ] FQE_20220422081310: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00011986422251506024, 'time_algorithm_update': 0.002132668552628483, 'loss': 0.004840772890020162, 'time_step': 0.00231377331607313, 'init_value': -1.082076072692871, 'ave_value': -0.7845747774999656, 'soft_opc': nan} step=3154




2022-04-22 08:13.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.19 [info     ] FQE_20220422081310: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00012941676450062948, 'time_algorithm_update': 0.002432589071342744, 'loss': 0.005043903392205206, 'time_step': 0.002624686942043075, 'init_value': -1.1182520389556885, 'ave_value': -0.8103202847445065, 'soft_opc': nan} step=3320




2022-04-22 08:13.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.19 [info     ] FQE_20220422081310: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.000128582299473774, 'time_algorithm_update': 0.002654095730149602, 'loss': 0.005697731180775754, 'time_step': 0.002845150878630489, 'init_value': -1.1556127071380615, 'ave_value': -0.8467314989629059, 'soft_opc': nan} step=3486




2022-04-22 08:13.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.20 [info     ] FQE_20220422081310: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.0001296781631837408, 'time_algorithm_update': 0.0023765420339193688, 'loss': 0.006544755724067968, 'time_step': 0.002569559108780091, 'init_value': -1.1893147230148315, 'ave_value': -0.8888404275433303, 'soft_opc': nan} step=3652




2022-04-22 08:13.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.20 [info     ] FQE_20220422081310: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00012409256165286144, 'time_algorithm_update': 0.0023463849561760224, 'loss': 0.00700175147840237, 'time_step': 0.002534332045589585, 'init_value': -1.209869146347046, 'ave_value': -0.8854142885210479, 'soft_opc': nan} step=3818




2022-04-22 08:13.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.21 [info     ] FQE_20220422081310: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00012657297662941805, 'time_algorithm_update': 0.002285220536840967, 'loss': 0.007514203686466306, 'time_step': 0.002474635480398155, 'init_value': -1.2495732307434082, 'ave_value': -0.9081580738662868, 'soft_opc': nan} step=3984




2022-04-22 08:13.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.21 [info     ] FQE_20220422081310: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00012216510542904037, 'time_algorithm_update': 0.0022883501397558004, 'loss': 0.008364127204102656, 'time_step': 0.0024685945855565817, 'init_value': -1.2740237712860107, 'ave_value': -0.9321310466612681, 'soft_opc': nan} step=4150




2022-04-22 08:13.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.22 [info     ] FQE_20220422081310: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00012817009385809842, 'time_algorithm_update': 0.0026480979230030476, 'loss': 0.008993737221448627, 'time_step': 0.0028420327657676606, 'init_value': -1.316736102104187, 'ave_value': -0.9439010642141707, 'soft_opc': nan} step=4316




2022-04-22 08:13.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.22 [info     ] FQE_20220422081310: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.0001214627760002412, 'time_algorithm_update': 0.00224238418671022, 'loss': 0.010052669694484493, 'time_step': 0.002422572618507477, 'init_value': -1.3553144931793213, 'ave_value': -0.986427205594609, 'soft_opc': nan} step=4482




2022-04-22 08:13.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.23 [info     ] FQE_20220422081310: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00012582755950560053, 'time_algorithm_update': 0.0022829110363879837, 'loss': 0.011016252887202427, 'time_step': 0.0024676983615002, 'init_value': -1.3869174718856812, 'ave_value': -1.0154301763646438, 'soft_opc': nan} step=4648




2022-04-22 08:13.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.23 [info     ] FQE_20220422081310: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00011699889079633965, 'time_algorithm_update': 0.002068006848714438, 'loss': 0.011904518431717396, 'time_step': 0.002241739307541445, 'init_value': -1.4835312366485596, 'ave_value': -1.0897697054369895, 'soft_opc': nan} step=4814




2022-04-22 08:13.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.24 [info     ] FQE_20220422081310: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00012316617620996683, 'time_algorithm_update': 0.0023038085684718855, 'loss': 0.0127604412050295, 'time_step': 0.0024849320032510414, 'init_value': -1.468827486038208, 'ave_value': -1.0786562606809778, 'soft_opc': nan} step=4980




2022-04-22 08:13.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.24 [info     ] FQE_20220422081310: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.0001195065946464079, 'time_algorithm_update': 0.0022357429366513908, 'loss': 0.014189438204665342, 'time_step': 0.002411950065428952, 'init_value': -1.4851592779159546, 'ave_value': -1.0966164737981785, 'soft_opc': nan} step=5146




2022-04-22 08:13.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.24 [info     ] FQE_20220422081310: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00012868714619831867, 'time_algorithm_update': 0.00231952408710158, 'loss': 0.014195434705295364, 'time_step': 0.0025093770888914547, 'init_value': -1.4940714836120605, 'ave_value': -1.0877187939813813, 'soft_opc': nan} step=5312




2022-04-22 08:13.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.25 [info     ] FQE_20220422081310: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.0001226879027952631, 'time_algorithm_update': 0.002161415226488228, 'loss': 0.01478831277892319, 'time_step': 0.0023420661328786827, 'init_value': -1.4947571754455566, 'ave_value': -1.0977143534668932, 'soft_opc': nan} step=5478




2022-04-22 08:13.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.25 [info     ] FQE_20220422081310: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00011883155409112035, 'time_algorithm_update': 0.0021807012787784436, 'loss': 0.016334251882171774, 'time_step': 0.0023555999778839477, 'init_value': -1.5547665357589722, 'ave_value': -1.147517176925599, 'soft_opc': nan} step=5644




2022-04-22 08:13.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.26 [info     ] FQE_20220422081310: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00012782251978495033, 'time_algorithm_update': 0.002302444124796304, 'loss': 0.01667511155436107, 'time_step': 0.002483609211013978, 'init_value': -1.5831854343414307, 'ave_value': -1.1646417581652466, 'soft_opc': nan} step=5810




2022-04-22 08:13.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.26 [info     ] FQE_20220422081310: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00012793885656149992, 'time_algorithm_update': 0.0024110179349600552, 'loss': 0.01753276286061842, 'time_step': 0.0025979740073881954, 'init_value': -1.5222229957580566, 'ave_value': -1.1273368166373656, 'soft_opc': nan} step=5976




2022-04-22 08:13.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.27 [info     ] FQE_20220422081310: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00013213703431278826, 'time_algorithm_update': 0.0025067458669823335, 'loss': 0.01817065290826182, 'time_step': 0.002704024314880371, 'init_value': -1.5421065092086792, 'ave_value': -1.1726411342721532, 'soft_opc': nan} step=6142




2022-04-22 08:13.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.27 [info     ] FQE_20220422081310: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00012643653226185995, 'time_algorithm_update': 0.0023444819163127117, 'loss': 0.018934429548498444, 'time_step': 0.0025350099586578735, 'init_value': -1.432173728942871, 'ave_value': -1.0501667078450188, 'soft_opc': nan} step=6308




2022-04-22 08:13.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.28 [info     ] FQE_20220422081310: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00012457945260656886, 'time_algorithm_update': 0.0023241416517510473, 'loss': 0.019395534874498282, 'time_step': 0.0025061742368950903, 'init_value': -1.4616811275482178, 'ave_value': -1.0872840846981013, 'soft_opc': nan} step=6474




2022-04-22 08:13.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.28 [info     ] FQE_20220422081310: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00013791078544524778, 'time_algorithm_update': 0.0027266453547650075, 'loss': 0.020292725823244574, 'time_step': 0.002933730562049222, 'init_value': -1.5626931190490723, 'ave_value': -1.1670324352712513, 'soft_opc': nan} step=6640




2022-04-22 08:13.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.29 [info     ] FQE_20220422081310: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.0001326052539319877, 'time_algorithm_update': 0.002482254821133901, 'loss': 0.021432330596950804, 'time_step': 0.0026778011436922006, 'init_value': -1.5236070156097412, 'ave_value': -1.1410193472328756, 'soft_opc': nan} step=6806




2022-04-22 08:13.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.29 [info     ] FQE_20220422081310: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00011749583554555134, 'time_algorithm_update': 0.0020542432026690745, 'loss': 0.023578344549736882, 'time_step': 0.002226559512586479, 'init_value': -1.5293869972229004, 'ave_value': -1.1483302284032106, 'soft_opc': nan} step=6972




2022-04-22 08:13.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.30 [info     ] FQE_20220422081310: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00011426856718867658, 'time_algorithm_update': 0.001993195120110569, 'loss': 0.01930819115957739, 'time_step': 0.0021606109228478857, 'init_value': -1.526921272277832, 'ave_value': -1.134461957018252, 'soft_opc': nan} step=7138




2022-04-22 08:13.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.30 [info     ] FQE_20220422081310: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00012085667575698301, 'time_algorithm_update': 0.002207378307020808, 'loss': 0.02360043396853494, 'time_step': 0.0023835854357983693, 'init_value': -1.5432466268539429, 'ave_value': -1.163965808422313, 'soft_opc': nan} step=7304




2022-04-22 08:13.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.31 [info     ] FQE_20220422081310: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00012857799070427217, 'time_algorithm_update': 0.002545628202966897, 'loss': 0.025204658857546747, 'time_step': 0.002736681915191283, 'init_value': -1.4958016872406006, 'ave_value': -1.121328451289787, 'soft_opc': nan} step=7470




2022-04-22 08:13.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.31 [info     ] FQE_20220422081310: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00013147922883550804, 'time_algorithm_update': 0.0026194819484848574, 'loss': 0.024713443976501565, 'time_step': 0.0028107654617493412, 'init_value': -1.4373420476913452, 'ave_value': -1.0358172273313677, 'soft_opc': nan} step=7636




2022-04-22 08:13.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.32 [info     ] FQE_20220422081310: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00012956182640719126, 'time_algorithm_update': 0.0023878582988876894, 'loss': 0.026013630102542845, 'time_step': 0.0025802462934011437, 'init_value': -1.4344048500061035, 'ave_value': -1.0319725885878208, 'soft_opc': nan} step=7802




2022-04-22 08:13.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.32 [info     ] FQE_20220422081310: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00011664269918418792, 'time_algorithm_update': 0.0020747701805758187, 'loss': 0.026863867172926485, 'time_step': 0.0022486419562833854, 'init_value': -1.406787633895874, 'ave_value': -1.007234404653076, 'soft_opc': nan} step=7968




2022-04-22 08:13.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.32 [info     ] FQE_20220422081310: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00011962149516645684, 'time_algorithm_update': 0.0023013224084693267, 'loss': 0.027876141466023626, 'time_step': 0.002475379461265472, 'init_value': -1.5009702444076538, 'ave_value': -1.1181535837920131, 'soft_opc': nan} step=8134




2022-04-22 08:13.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:13.33 [info     ] FQE_20220422081310: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00012129186147666839, 'time_algorithm_update': 0.0022098285606108516, 'loss': 0.028723206697873413, 'time_step': 0.0023879904344857456, 'init_value': -1.5067658424377441, 'ave_value': -1.1120886508006234, 'soft_opc': nan} step=8300




2022-04-22 08:13.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081310/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-22 08:13.34 [debug    ] RoundIterator is selected.
2022-04-22 08:13.34 [info     ] Directory is created at d3rlpy_logs/FQE_20220422081334
2022-04-22 08:13.34 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 08:13.34 [debug    ] Building models...
2022-04-22 08:13.34 [debug    ] Models have been built.
2022-04-22 08:13.34 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422081334/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}},

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 08:13.35 [info     ] FQE_20220422081334: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00013431629469228345, 'time_algorithm_update': 0.0023828848849895387, 'loss': 0.024328453422978867, 'time_step': 0.0025799780390983406, 'init_value': -1.1157976388931274, 'ave_value': -1.1201993381453528, 'soft_opc': nan} step=344




2022-04-22 08:13.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:13.36 [info     ] FQE_20220422081334: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00014140716818876044, 'time_algorithm_update': 0.002308340959770735, 'loss': 0.022677374288959558, 'time_step': 0.0025095801020777503, 'init_value': -2.0580637454986572, 'ave_value': -2.0524499284388784, 'soft_opc': nan} step=688




2022-04-22 08:13.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:13.36 [info     ] FQE_20220422081334: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00013412916383077932, 'time_algorithm_update': 0.002313876567884933, 'loss': 0.02650996100504038, 'time_step': 0.0025089306886806047, 'init_value': -3.1602587699890137, 'ave_value': -3.1260239392519, 'soft_opc': nan} step=1032




2022-04-22 08:13.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:13.37 [info     ] FQE_20220422081334: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00013774910638498705, 'time_algorithm_update': 0.0023318633090618044, 'loss': 0.02960791677334021, 'time_step': 0.0025283604167228523, 'init_value': -4.005589485168457, 'ave_value': -3.970802077731571, 'soft_opc': nan} step=1376




2022-04-22 08:13.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:13.38 [info     ] FQE_20220422081334: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00014415175415748773, 'time_algorithm_update': 0.002584674330644829, 'loss': 0.03450197647433988, 'time_step': 0.002793314151985701, 'init_value': -5.06180477142334, 'ave_value': -5.082580353521012, 'soft_opc': nan} step=1720




2022-04-22 08:13.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:13.39 [info     ] FQE_20220422081334: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00013183438500692678, 'time_algorithm_update': 0.002095227324685385, 'loss': 0.042215687953758727, 'time_step': 0.0022827074971309928, 'init_value': -5.652057647705078, 'ave_value': -5.75504018312639, 'soft_opc': nan} step=2064




2022-04-22 08:13.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:13.40 [info     ] FQE_20220422081334: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.00013997111209603243, 'time_algorithm_update': 0.0024058860401774563, 'loss': 0.053090138279637974, 'time_step': 0.0026079423205797063, 'init_value': -6.382331848144531, 'ave_value': -6.6554959426055085, 'soft_opc': nan} step=2408




2022-04-22 08:13.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:13.41 [info     ] FQE_20220422081334: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.00013128893319950548, 'time_algorithm_update': 0.002300238193467606, 'loss': 0.06367620710976594, 'time_step': 0.0024903271087380343, 'init_value': -6.772102355957031, 'ave_value': -7.334557221990985, 'soft_opc': nan} step=2752




2022-04-22 08:13.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:13.42 [info     ] FQE_20220422081334: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.00014269490574681483, 'time_algorithm_update': 0.002600191637527111, 'loss': 0.0750093610583653, 'time_step': 0.0028082652147426164, 'init_value': -7.1654839515686035, 'ave_value': -7.930338693457144, 'soft_opc': nan} step=3096




2022-04-22 08:13.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:13.43 [info     ] FQE_20220422081334: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00012716789578282557, 'time_algorithm_update': 0.0020960569381713867, 'loss': 0.08838959932760444, 'time_step': 0.0022792490415794904, 'init_value': -7.720037460327148, 'ave_value': -8.859729090534351, 'soft_opc': nan} step=3440




2022-04-22 08:13.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:13.44 [info     ] FQE_20220422081334: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.0001402698283971742, 'time_algorithm_update': 0.0024775100308795307, 'loss': 0.10387430103972208, 'time_step': 0.0026828064474948617, 'init_value': -8.339407920837402, 'ave_value': -9.749405484894911, 'soft_opc': nan} step=3784




2022-04-22 08:13.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:13.45 [info     ] FQE_20220422081334: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00013115586236465808, 'time_algorithm_update': 0.0022214242192201837, 'loss': 0.12420473328174286, 'time_step': 0.0024100881676341213, 'init_value': -9.113048553466797, 'ave_value': -10.816970400659887, 'soft_opc': nan} step=4128




2022-04-22 08:13.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:13.46 [info     ] FQE_20220422081334: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00013931892638982727, 'time_algorithm_update': 0.00230544320372648, 'loss': 0.14425646449807425, 'time_step': 0.00250111554944238, 'init_value': -9.759673118591309, 'ave_value': -11.548785061261675, 'soft_opc': nan} step=4472




2022-04-22 08:13.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:13.47 [info     ] FQE_20220422081334: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00013768187789029853, 'time_algorithm_update': 0.0023348580959231353, 'loss': 0.168788384310468, 'time_step': 0.0025357347588206448, 'init_value': -10.313682556152344, 'ave_value': -12.295524719464886, 'soft_opc': nan} step=4816




2022-04-22 08:13.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:13.48 [info     ] FQE_20220422081334: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00012659957242566486, 'time_algorithm_update': 0.002169322828913844, 'loss': 0.18680576239387656, 'time_step': 0.002351187689359798, 'init_value': -10.905560493469238, 'ave_value': -12.936083274569597, 'soft_opc': nan} step=5160




2022-04-22 08:13.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:13.49 [info     ] FQE_20220422081334: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00013224260751591173, 'time_algorithm_update': 0.002234254465546719, 'loss': 0.20812278312392707, 'time_step': 0.002426737962767135, 'init_value': -11.9899320602417, 'ave_value': -14.084570341174667, 'soft_opc': nan} step=5504




2022-04-22 08:13.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:13.50 [info     ] FQE_20220422081334: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00014093379641688146, 'time_algorithm_update': 0.0023740696352581646, 'loss': 0.23927929739164577, 'time_step': 0.002578821293143339, 'init_value': -12.480255126953125, 'ave_value': -14.750173779382362, 'soft_opc': nan} step=5848




2022-04-22 08:13.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:13.51 [info     ] FQE_20220422081334: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.0001337445059487986, 'time_algorithm_update': 0.0023045158663461377, 'loss': 0.25944240957087034, 'time_step': 0.0024993676085804783, 'init_value': -13.164491653442383, 'ave_value': -15.478037329353727, 'soft_opc': nan} step=6192




2022-04-22 08:13.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:13.52 [info     ] FQE_20220422081334: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.0001469462416892828, 'time_algorithm_update': 0.002419187579044076, 'loss': 0.28840949483280787, 'time_step': 0.0026298449483028678, 'init_value': -13.85614013671875, 'ave_value': -16.3380580336124, 'soft_opc': nan} step=6536




2022-04-22 08:13.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:13.53 [info     ] FQE_20220422081334: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00013324964878170989, 'time_algorithm_update': 0.0023459577283193897, 'loss': 0.3088976413249796, 'time_step': 0.0025407664997633113, 'init_value': -14.492546081542969, 'ave_value': -17.074583963418867, 'soft_opc': nan} step=6880




2022-04-22 08:13.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:13.53 [info     ] FQE_20220422081334: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00013472798258759255, 'time_algorithm_update': 0.002273800761200661, 'loss': 0.3302157104805892, 'time_step': 0.002468345470206682, 'init_value': -14.989479064941406, 'ave_value': -18.021150097143543, 'soft_opc': nan} step=7224




2022-04-22 08:13.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:13.54 [info     ] FQE_20220422081334: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00014303174129752226, 'time_algorithm_update': 0.0025627100190450977, 'loss': 0.36038518963424965, 'time_step': 0.0027660457200782244, 'init_value': -15.820638656616211, 'ave_value': -18.804098168901493, 'soft_opc': nan} step=7568




2022-04-22 08:13.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:13.55 [info     ] FQE_20220422081334: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.0001296539639317712, 'time_algorithm_update': 0.0021650271360264266, 'loss': 0.3789360196665363, 'time_step': 0.0023552033790322237, 'init_value': -16.214391708374023, 'ave_value': -19.424281538928952, 'soft_opc': nan} step=7912




2022-04-22 08:13.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:13.56 [info     ] FQE_20220422081334: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00013551531836043958, 'time_algorithm_update': 0.002415506645690563, 'loss': 0.4074909707888701, 'time_step': 0.002613521592561589, 'init_value': -16.709548950195312, 'ave_value': -20.149198933979413, 'soft_opc': nan} step=8256




2022-04-22 08:13.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:13.57 [info     ] FQE_20220422081334: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.0001331831133642862, 'time_algorithm_update': 0.0022604014984397, 'loss': 0.43123074133100725, 'time_step': 0.0024554015592087148, 'init_value': -16.937925338745117, 'ave_value': -20.643506898536337, 'soft_opc': nan} step=8600




2022-04-22 08:13.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:13.58 [info     ] FQE_20220422081334: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.0001326307307842166, 'time_algorithm_update': 0.002273616402648216, 'loss': 0.44412101810083315, 'time_step': 0.00246402898500132, 'init_value': -17.117645263671875, 'ave_value': -21.17748991906106, 'soft_opc': nan} step=8944




2022-04-22 08:13.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:13.59 [info     ] FQE_20220422081334: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00014203301695890204, 'time_algorithm_update': 0.00244736255601395, 'loss': 0.46946904662628336, 'time_step': 0.002655528312505678, 'init_value': -17.61783218383789, 'ave_value': -21.660049944793855, 'soft_opc': nan} step=9288




2022-04-22 08:13.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:14.00 [info     ] FQE_20220422081334: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.00014556216639141704, 'time_algorithm_update': 0.0025515452373859496, 'loss': 0.5025280140784331, 'time_step': 0.0027589867281359298, 'init_value': -17.91149139404297, 'ave_value': -22.28566478931152, 'soft_opc': nan} step=9632




2022-04-22 08:14.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:14.01 [info     ] FQE_20220422081334: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00014595722043237022, 'time_algorithm_update': 0.0025311542111773823, 'loss': 0.5299143621124067, 'time_step': 0.0027452949867692103, 'init_value': -18.506690979003906, 'ave_value': -23.214570237749868, 'soft_opc': nan} step=9976




2022-04-22 08:14.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:14.02 [info     ] FQE_20220422081334: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00013493105422618778, 'time_algorithm_update': 0.0023023451483526894, 'loss': 0.5696221583165488, 'time_step': 0.00249930038008579, 'init_value': -18.388757705688477, 'ave_value': -23.374002849840902, 'soft_opc': nan} step=10320




2022-04-22 08:14.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:14.03 [info     ] FQE_20220422081334: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00013492135114448014, 'time_algorithm_update': 0.002227187156677246, 'loss': 0.5928998623573936, 'time_step': 0.0024217796880145405, 'init_value': -18.91973114013672, 'ave_value': -23.971806705387326, 'soft_opc': nan} step=10664




2022-04-22 08:14.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:14.04 [info     ] FQE_20220422081334: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.0001280037469642107, 'time_algorithm_update': 0.0021272315535434457, 'loss': 0.6183507459801297, 'time_step': 0.002311396044354106, 'init_value': -18.836956024169922, 'ave_value': -24.218362577514604, 'soft_opc': nan} step=11008




2022-04-22 08:14.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:14.05 [info     ] FQE_20220422081334: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.000135244325149891, 'time_algorithm_update': 0.002409057561741319, 'loss': 0.6484911236966159, 'time_step': 0.002605813880299413, 'init_value': -19.11598014831543, 'ave_value': -24.803655836576812, 'soft_opc': nan} step=11352




2022-04-22 08:14.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:14.06 [info     ] FQE_20220422081334: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.00013236458911452184, 'time_algorithm_update': 0.002200911904490271, 'loss': 0.6780783293855398, 'time_step': 0.002391281516052956, 'init_value': -19.072006225585938, 'ave_value': -25.18449649002638, 'soft_opc': nan} step=11696




2022-04-22 08:14.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:14.07 [info     ] FQE_20220422081334: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00014115280883256778, 'time_algorithm_update': 0.002440214850181757, 'loss': 0.7170717857702267, 'time_step': 0.002644859081090883, 'init_value': -19.267751693725586, 'ave_value': -25.67229047790841, 'soft_opc': nan} step=12040




2022-04-22 08:14.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:14.08 [info     ] FQE_20220422081334: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00013122239778208178, 'time_algorithm_update': 0.002240732658741086, 'loss': 0.7372677535912412, 'time_step': 0.0024317794067915096, 'init_value': -19.224491119384766, 'ave_value': -26.108330868983323, 'soft_opc': nan} step=12384




2022-04-22 08:14.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:14.09 [info     ] FQE_20220422081334: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00013540650522986123, 'time_algorithm_update': 0.002396860788034838, 'loss': 0.753377721141374, 'time_step': 0.0025920659996742424, 'init_value': -18.805356979370117, 'ave_value': -26.019801992529096, 'soft_opc': nan} step=12728




2022-04-22 08:14.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:14.10 [info     ] FQE_20220422081334: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.0001334797504336335, 'time_algorithm_update': 0.0022144567134768462, 'loss': 0.7648923945732328, 'time_step': 0.002407241006230199, 'init_value': -18.685993194580078, 'ave_value': -26.29583959839609, 'soft_opc': nan} step=13072




2022-04-22 08:14.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:14.11 [info     ] FQE_20220422081334: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00014118191807769065, 'time_algorithm_update': 0.0024969931258711706, 'loss': 0.7675129704415625, 'time_step': 0.00270270400269087, 'init_value': -18.633865356445312, 'ave_value': -26.691316421443663, 'soft_opc': nan} step=13416




2022-04-22 08:14.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:14.11 [info     ] FQE_20220422081334: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00013320113337317177, 'time_algorithm_update': 0.0022411020689232404, 'loss': 0.7680468666800406, 'time_step': 0.002435148455375849, 'init_value': -18.780000686645508, 'ave_value': -26.887923128487593, 'soft_opc': nan} step=13760




2022-04-22 08:14.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:14.12 [info     ] FQE_20220422081334: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00013698602831640908, 'time_algorithm_update': 0.0024064391158347908, 'loss': 0.7643302698608835, 'time_step': 0.0026072430056194927, 'init_value': -18.808143615722656, 'ave_value': -27.167105834613352, 'soft_opc': nan} step=14104




2022-04-22 08:14.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:14.13 [info     ] FQE_20220422081334: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00013640938803207044, 'time_algorithm_update': 0.0023457158443539643, 'loss': 0.7715919642373487, 'time_step': 0.002544766941735911, 'init_value': -18.876354217529297, 'ave_value': -27.540214992453436, 'soft_opc': nan} step=14448




2022-04-22 08:14.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:14.14 [info     ] FQE_20220422081334: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00013243597607279933, 'time_algorithm_update': 0.0021530542262764864, 'loss': 0.7561395980179483, 'time_step': 0.002344229886698168, 'init_value': -19.131610870361328, 'ave_value': -27.973603711945113, 'soft_opc': nan} step=14792




2022-04-22 08:14.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:14.15 [info     ] FQE_20220422081334: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.0001316188379775646, 'time_algorithm_update': 0.002241596926090329, 'loss': 0.7494036059198511, 'time_step': 0.0024312956388606584, 'init_value': -18.7736873626709, 'ave_value': -28.004849817174243, 'soft_opc': nan} step=15136




2022-04-22 08:14.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:14.16 [info     ] FQE_20220422081334: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.0001346004563708638, 'time_algorithm_update': 0.002230158378911573, 'loss': 0.7385505197872949, 'time_step': 0.0024266679619633874, 'init_value': -18.968017578125, 'ave_value': -28.233322435067045, 'soft_opc': nan} step=15480




2022-04-22 08:14.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:14.17 [info     ] FQE_20220422081334: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00013273399929667628, 'time_algorithm_update': 0.002179362746172173, 'loss': 0.727944822066875, 'time_step': 0.002373153387114059, 'init_value': -18.877681732177734, 'ave_value': -28.43680882705627, 'soft_opc': nan} step=15824




2022-04-22 08:14.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:14.18 [info     ] FQE_20220422081334: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00012938504995301713, 'time_algorithm_update': 0.0021320013112800067, 'loss': 0.7283543610382218, 'time_step': 0.0023183219654615535, 'init_value': -18.872936248779297, 'ave_value': -28.51670426831052, 'soft_opc': nan} step=16168




2022-04-22 08:14.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:14.19 [info     ] FQE_20220422081334: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.0001397819020027338, 'time_algorithm_update': 0.002451279135637505, 'loss': 0.7232172444170384, 'time_step': 0.0026575978412184606, 'init_value': -19.123828887939453, 'ave_value': -28.88300014982621, 'soft_opc': nan} step=16512




2022-04-22 08:14.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:14.20 [info     ] FQE_20220422081334: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00013962249423182287, 'time_algorithm_update': 0.002472585023835648, 'loss': 0.7231861129186528, 'time_step': 0.002678322237591411, 'init_value': -19.275083541870117, 'ave_value': -29.049341211172642, 'soft_opc': nan} step=16856




2022-04-22 08:14.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:14.21 [info     ] FQE_20220422081334: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00014226311861082565, 'time_algorithm_update': 0.002432658228763314, 'loss': 0.7253799862661507, 'time_step': 0.002640305563460949, 'init_value': -19.071125030517578, 'ave_value': -28.986490760729353, 'soft_opc': nan} step=17200




2022-04-22 08:14.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422081334/model_17200.pt
search iteration:  34
using hyper params:  [0.004618231513938129, 0.00030324753262887534, 2.11290782550058e-05, 3]
2022-04-22 08:14.21 [debug    ] RoundIterator is selected.
2022-04-22 08:14.21 [info     ] Directory is created at d3rlpy_logs/CQL_20220422081421
2022-04-22 08:14.21 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 08:14.21 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 08:14.21 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422081421/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.004618231513938129, 'actor_optim_factory': {'opti

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:14.34 [info     ] CQL_20220422081421: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.00034129412877077314, 'time_algorithm_update': 0.035346236532134125, 'temp_loss': 4.942956158191483, 'temp': 0.9962116455756171, 'alpha_loss': -17.684037903140734, 'alpha': 1.0177324679545585, 'critic_loss': 107.4423016454443, 'actor_loss': -1.8576361153683911, 'time_step': 0.03577426541058314, 'td_error': 1.2441105739640412, 'init_value': -0.11568863689899445, 'ave_value': -0.13157422930024734} step=346
2022-04-22 08:14.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:14.46 [info     ] CQL_20220422081421: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.0003465606987131813, 'time_algorithm_update': 0.035519429024933394, 'temp_loss': 4.990683956642371, 'temp': 0.9888556530709901, 'alpha_loss': -18.34126103682325, 'alpha': 1.0542218974559983, 'critic_loss': 98.67956534148641, 'actor_loss': -1.3785718477874822, 'time_step': 0.03596154320446742, 'td_error': 1.2714131427074438, 'init_value': -0.7615826725959778, 'ave_value': -0.5594083029201635} step=692
2022-04-22 08:14.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:14.59 [info     ] CQL_20220422081421: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.0003447305260366098, 'time_algorithm_update': 0.034465142068146284, 'temp_loss': 4.953616394472949, 'temp': 0.9816549289433253, 'alpha_loss': -19.04976058144101, 'alpha': 1.092509150505066, 'critic_loss': 94.04548715580405, 'actor_loss': -0.357346890429627, 'time_step': 0.03489901909249366, 'td_error': 1.231037422074962, 'init_value': -1.8416274785995483, 'ave_value': -1.4048920679307995} step=1038
2022-04-22 08:14.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:15.11 [info     ] CQL_20220422081421: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.0003382153593735888, 'time_algorithm_update': 0.034626465312318305, 'temp_loss': 4.917222767206975, 'temp': 0.9745452929438884, 'alpha_loss': -19.767928117961553, 'alpha': 1.1327677717098612, 'critic_loss': 97.77224241929247, 'actor_loss': 0.9384263166027262, 'time_step': 0.03505280252136936, 'td_error': 1.2199395957877655, 'init_value': -2.752650737762451, 'ave_value': -2.198175280010054} step=1384
2022-04-22 08:15.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:15.24 [info     ] CQL_20220422081421: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.00034368658341424313, 'time_algorithm_update': 0.035241528053504195, 'temp_loss': 4.884503793165174, 'temp': 0.9675106643941361, 'alpha_loss': -20.470720324213104, 'alpha': 1.174892016228913, 'critic_loss': 107.00789750380324, 'actor_loss': 2.2423946261405945, 'time_step': 0.03568424517019635, 'td_error': 1.2393769642502561, 'init_value': -4.145367622375488, 'ave_value': -3.4806066357636194} step=1730
2022-04-22 08:15.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:15.37 [info     ] CQL_20220422081421: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.0003492749495313347, 'time_algorithm_update': 0.034860289165739376, 'temp_loss': 4.8497223316589535, 'temp': 0.9605435019283626, 'alpha_loss': -21.231054515507868, 'alpha': 1.2189542592605414, 'critic_loss': 120.49511357125519, 'actor_loss': 3.6799483988326407, 'time_step': 0.03531092715401181, 'td_error': 1.2548748851507283, 'init_value': -5.369210720062256, 'ave_value': -4.623722690128359} step=2076
2022-04-22 08:15.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:15.49 [info     ] CQL_20220422081421: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.0003614460112731581, 'time_algorithm_update': 0.03574879389966844, 'temp_loss': 4.815730524889996, 'temp': 0.9536390366581823, 'alpha_loss': -22.021192892438414, 'alpha': 1.2650110290918737, 'critic_loss': 136.88583671702125, 'actor_loss': 5.105491252303812, 'time_step': 0.03619800688903456, 'td_error': 1.2788038698179298, 'init_value': -6.713397026062012, 'ave_value': -5.956443399821146} step=2422
2022-04-22 08:15.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:16.02 [info     ] CQL_20220422081421: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.0003536195424250785, 'time_algorithm_update': 0.0353104992408973, 'temp_loss': 4.7812758691049035, 'temp': 0.9467970687529944, 'alpha_loss': -22.842020073378016, 'alpha': 1.3130980795518512, 'critic_loss': 156.58113697911963, 'actor_loss': 6.490185925037186, 'time_step': 0.035757728394745406, 'td_error': 1.3232682112682557, 'init_value': -8.439977645874023, 'ave_value': -7.63089161439211} step=2768
2022-04-22 08:16.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:16.14 [info     ] CQL_20220422081421: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.0003468590664725772, 'time_algorithm_update': 0.03394338919248195, 'temp_loss': 4.746469583125473, 'temp': 0.9400121704002337, 'alpha_loss': -23.71663019698479, 'alpha': 1.3632683447330673, 'critic_loss': 179.4671273644949, 'actor_loss': 7.818392254713642, 'time_step': 0.03438309851409383, 'td_error': 1.335803580066649, 'init_value': -9.199067115783691, 'ave_value': -8.35642763756643} step=3114
2022-04-22 08:16.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:16.27 [info     ] CQL_20220422081421: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.0003512773899673727, 'time_algorithm_update': 0.034646105904110595, 'temp_loss': 4.712504103004588, 'temp': 0.9332830244406111, 'alpha_loss': -24.62151056080195, 'alpha': 1.415581090257347, 'critic_loss': 203.96549017580946, 'actor_loss': 9.056382292267903, 'time_step': 0.03509355073719356, 'td_error': 1.36369882646183, 'init_value': -10.242324829101562, 'ave_value': -9.438176297395362} step=3460
2022-04-22 08:16.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:16.39 [info     ] CQL_20220422081421: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.00035083982985832787, 'time_algorithm_update': 0.03479504929801632, 'temp_loss': 4.678967627486742, 'temp': 0.9266077865754938, 'alpha_loss': -25.575887112259174, 'alpha': 1.4700889394462453, 'critic_loss': 231.60634683597985, 'actor_loss': 10.166681763753726, 'time_step': 0.0352396337972211, 'td_error': 1.3865670631875977, 'init_value': -11.220698356628418, 'ave_value': -10.412144023199792} step=3806
2022-04-22 08:16.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:16.52 [info     ] CQL_20220422081421: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.0003385233741275148, 'time_algorithm_update': 0.03432347044090315, 'temp_loss': 4.646028994135774, 'temp': 0.9199833156745558, 'alpha_loss': -26.55557214042355, 'alpha': 1.5268436553161269, 'critic_loss': 261.20711318881524, 'actor_loss': 11.120366416225544, 'time_step': 0.03474495590077659, 'td_error': 1.4147530727400317, 'init_value': -12.164417266845703, 'ave_value': -11.36795560984659} step=4152
2022-04-22 08:16.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:17.04 [info     ] CQL_20220422081421: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.0003539888845013745, 'time_algorithm_update': 0.03426785689557908, 'temp_loss': 4.61369448727955, 'temp': 0.9134092038077426, 'alpha_loss': -27.583247752547955, 'alpha': 1.5859043391453738, 'critic_loss': 292.52868511222, 'actor_loss': 11.913475935169727, 'time_step': 0.034702724115007876, 'td_error': 1.4400910823521273, 'init_value': -12.989896774291992, 'ave_value': -12.20975883503331} step=4498
2022-04-22 08:17.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:17.16 [info     ] CQL_20220422081421: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.0003414650183881638, 'time_algorithm_update': 0.0340579065973359, 'temp_loss': 4.580816820177729, 'temp': 0.9068841034966397, 'alpha_loss': -28.650419124978125, 'alpha': 1.6473461971806653, 'critic_loss': 324.5599299083555, 'actor_loss': 12.56364255144417, 'time_step': 0.03448980736594669, 'td_error': 1.4527145669783599, 'init_value': -13.547715187072754, 'ave_value': -12.766651287255213} step=4844
2022-04-22 08:17.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:17.29 [info     ] CQL_20220422081421: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.00035297319379156035, 'time_algorithm_update': 0.0358011881051036, 'temp_loss': 4.547295475281732, 'temp': 0.900408320688788, 'alpha_loss': -29.76353414348095, 'alpha': 1.7112434826834353, 'critic_loss': 359.19964731911017, 'actor_loss': 13.03907931057704, 'time_step': 0.03624156100212494, 'td_error': 1.4640917809244478, 'init_value': -13.939990997314453, 'ave_value': -13.16743165722047} step=5190
2022-04-22 08:17.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:17.42 [info     ] CQL_20220422081421: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.00035017625444886313, 'time_algorithm_update': 0.03470573673358542, 'temp_loss': 4.515576640994563, 'temp': 0.8939803926586416, 'alpha_loss': -30.917860339831755, 'alpha': 1.7776825131708487, 'critic_loss': 393.515836859025, 'actor_loss': 13.340341907016114, 'time_step': 0.035142556091264494, 'td_error': 1.4630426667766387, 'init_value': -13.933725357055664, 'ave_value': -13.177648525771625} step=5536
2022-04-22 08:17.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:17.54 [info     ] CQL_20220422081421: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.0003497476522633106, 'time_algorithm_update': 0.0348407360859689, 'temp_loss': 4.482343854242667, 'temp': 0.8876002008859822, 'alpha_loss': -32.11670903663415, 'alpha': 1.8467443978166305, 'critic_loss': 428.1241283968005, 'actor_loss': 13.445431750633814, 'time_step': 0.03527637299774699, 'td_error': 1.4662102799572343, 'init_value': -14.120302200317383, 'ave_value': -13.418244709477937} step=5882
2022-04-22 08:17.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:18.07 [info     ] CQL_20220422081421: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.00034321663696641866, 'time_algorithm_update': 0.03527391439228389, 'temp_loss': 4.450556542832039, 'temp': 0.8812660821255921, 'alpha_loss': -33.36393688179854, 'alpha': 1.9185239345352085, 'critic_loss': 461.9829442900729, 'actor_loss': 13.31537045770987, 'time_step': 0.035702504174557725, 'td_error': 1.4647588783741654, 'init_value': -14.087965965270996, 'ave_value': -13.445095598465914} step=6228
2022-04-22 08:18.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:18.20 [info     ] CQL_20220422081421: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.0003550121549926052, 'time_algorithm_update': 0.03515698937322363, 'temp_loss': 4.418257341219511, 'temp': 0.874978321652881, 'alpha_loss': -34.66501491745083, 'alpha': 1.9931226675910068, 'critic_loss': 496.21980982433166, 'actor_loss': 13.005789726455777, 'time_step': 0.035597129364234176, 'td_error': 1.4548135604486248, 'init_value': -13.764280319213867, 'ave_value': -13.182752689652784} step=6574
2022-04-22 08:18.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:18.32 [info     ] CQL_20220422081421: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.00034298166374250645, 'time_algorithm_update': 0.0346489848429068, 'temp_loss': 4.386767589977022, 'temp': 0.8687357549377949, 'alpha_loss': -36.01216090207844, 'alpha': 2.0706452778998137, 'critic_loss': 528.111986546158, 'actor_loss': 12.488170957289679, 'time_step': 0.03507675049621935, 'td_error': 1.4448248127049963, 'init_value': -13.366151809692383, 'ave_value': -12.876758663933556} step=6920
2022-04-22 08:18.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:18.45 [info     ] CQL_20220422081421: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.00034232704625653394, 'time_algorithm_update': 0.03480158238052633, 'temp_loss': 4.355985225280586, 'temp': 0.862538063801782, 'alpha_loss': -37.41287648195476, 'alpha': 2.1512008002727705, 'critic_loss': 558.7688618037052, 'actor_loss': 11.734169830476617, 'time_step': 0.03523141593602351, 'td_error': 1.4126568792846943, 'init_value': -12.381528854370117, 'ave_value': -11.921317339327846} step=7266
2022-04-22 08:18.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:18.57 [info     ] CQL_20220422081421: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.00033965000527442536, 'time_algorithm_update': 0.034647210484984294, 'temp_loss': 4.325462191091107, 'temp': 0.8563830049740786, 'alpha_loss': -38.86539246443379, 'alpha': 2.234900770848886, 'critic_loss': 587.027345690424, 'actor_loss': 10.780930830564113, 'time_step': 0.035066871284749467, 'td_error': 1.3859559079295896, 'init_value': -11.346532821655273, 'ave_value': -10.941976889731931} step=7612
2022-04-22 08:18.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:19.10 [info     ] CQL_20220422081421: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.0003445231156542122, 'time_algorithm_update': 0.03400055659299641, 'temp_loss': 4.294475605033036, 'temp': 0.8502732219034537, 'alpha_loss': -40.37912319161299, 'alpha': 2.3218671600253598, 'critic_loss': 614.4995150703916, 'actor_loss': 9.662901958289174, 'time_step': 0.03443382654575943, 'td_error': 1.3617326385209547, 'init_value': -10.24504280090332, 'ave_value': -9.900330073825552} step=7958
2022-04-22 08:19.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:19.22 [info     ] CQL_20220422081421: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.00034378512057265797, 'time_algorithm_update': 0.03547448850091482, 'temp_loss': 4.26351607741648, 'temp': 0.8442072678852632, 'alpha_loss': -41.952407539235374, 'alpha': 2.4122241604534875, 'critic_loss': 639.8888851452425, 'actor_loss': 8.391118993648904, 'time_step': 0.03591098881870336, 'td_error': 1.342642500320835, 'init_value': -9.137370109558105, 'ave_value': -8.895050487975025} step=8304
2022-04-22 08:19.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:19.35 [info     ] CQL_20220422081421: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.00034951336811043626, 'time_algorithm_update': 0.036176965415822285, 'temp_loss': 4.232264528384787, 'temp': 0.8381849501518844, 'alpha_loss': -43.57642043670478, 'alpha': 2.506097091415714, 'critic_loss': 665.3154146933142, 'actor_loss': 7.038563637375143, 'time_step': 0.03662356062431556, 'td_error': 1.3168848104658957, 'init_value': -7.690649509429932, 'ave_value': -7.485979118749619} step=8650
2022-04-22 08:19.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:19.48 [info     ] CQL_20220422081421: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.00034771282548849296, 'time_algorithm_update': 0.034159336475967673, 'temp_loss': 4.202627049705197, 'temp': 0.8322058423406127, 'alpha_loss': -45.27774108489814, 'alpha': 2.6036284810545816, 'critic_loss': 691.6160207759439, 'actor_loss': 5.714448322450495, 'time_step': 0.03460313887954447, 'td_error': 1.302671541593044, 'init_value': -6.593032360076904, 'ave_value': -6.444905773369042} step=8996
2022-04-22 08:19.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:20.01 [info     ] CQL_20220422081421: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.00035917827848754177, 'time_algorithm_update': 0.03649628162384033, 'temp_loss': 4.173597298605594, 'temp': 0.826268635905547, 'alpha_loss': -47.03660779743525, 'alpha': 2.70495871036728, 'critic_loss': 721.2613296067783, 'actor_loss': 4.557589767985261, 'time_step': 0.03695167971484234, 'td_error': 1.2902658155063154, 'init_value': -5.598311901092529, 'ave_value': -5.484284848565454} step=9342
2022-04-22 08:20.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:20.13 [info     ] CQL_20220422081421: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.00035091976209872033, 'time_algorithm_update': 0.03496192990010873, 'temp_loss': 4.143934063828749, 'temp': 0.8203732690025616, 'alpha_loss': -48.86682341691387, 'alpha': 2.810249746879401, 'critic_loss': 754.4324972340137, 'actor_loss': 3.677496096302319, 'time_step': 0.03540738676324745, 'td_error': 1.283711803576333, 'init_value': -4.88613748550415, 'ave_value': -4.806736348352482} step=9688
2022-04-22 08:20.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:20.26 [info     ] CQL_20220422081421: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.0003432504014472741, 'time_algorithm_update': 0.034674639646717575, 'temp_loss': 4.114375355615781, 'temp': 0.81451999279805, 'alpha_loss': -50.76598150881728, 'alpha': 2.9196442717072593, 'critic_loss': 791.453213377495, 'actor_loss': 3.064509423481936, 'time_step': 0.03510852769620157, 'td_error': 1.279468137417819, 'init_value': -4.41450309753418, 'ave_value': -4.353985347132821} step=10034
2022-04-22 08:20.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:20.39 [info     ] CQL_20220422081421: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.0003497042407879251, 'time_algorithm_update': 0.03527529115621754, 'temp_loss': 4.084721817446582, 'temp': 0.8087085252896898, 'alpha_loss': -52.75166815829415, 'alpha': 3.0332978205873786, 'critic_loss': 832.0460213898234, 'actor_loss': 2.677471654952606, 'time_step': 0.03572120211717021, 'td_error': 1.2774122622783495, 'init_value': -4.120029926300049, 'ave_value': -4.070273465169668} step=10380
2022-04-22 08:20.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:20.52 [info     ] CQL_20220422081421: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.00034307193204846686, 'time_algorithm_update': 0.03607471416451338, 'temp_loss': 4.055640132440997, 'temp': 0.8029400372091745, 'alpha_loss': -54.79768266843234, 'alpha': 3.151384253033324, 'critic_loss': 875.3177578435468, 'actor_loss': 2.426079872026609, 'time_step': 0.036510876837493364, 'td_error': 1.2758737867245693, 'init_value': -3.8943135738372803, 'ave_value': -3.8553340821426705} step=10726
2022-04-22 08:20.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:21.04 [info     ] CQL_20220422081421: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.0003512739446121833, 'time_algorithm_update': 0.03540439550587208, 'temp_loss': 4.027125253842745, 'temp': 0.7972118584062323, 'alpha_loss': -56.936613193137106, 'alpha': 3.2740709581816128, 'critic_loss': 922.8687460133106, 'actor_loss': 2.247345827218425, 'time_step': 0.035846022512182336, 'td_error': 1.2749823735005592, 'init_value': -3.757850170135498, 'ave_value': -3.7205435879482596} step=11072
2022-04-22 08:21.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:21.17 [info     ] CQL_20220422081421: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.0003529194462506068, 'time_algorithm_update': 0.03464432121012252, 'temp_loss': 3.9970909233038134, 'temp': 0.7915247632244419, 'alpha_loss': -59.150703330949554, 'alpha': 3.401535151321764, 'critic_loss': 972.4359998758129, 'actor_loss': 2.146478364922408, 'time_step': 0.035085742184192456, 'td_error': 1.2746567630878192, 'init_value': -3.663184642791748, 'ave_value': -3.6330806693289164} step=11418
2022-04-22 08:21.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:21.29 [info     ] CQL_20220422081421: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.00033794799981089684, 'time_algorithm_update': 0.034358917633233045, 'temp_loss': 3.9694037602816015, 'temp': 0.7858789358180382, 'alpha_loss': -61.45351460627738, 'alpha': 3.5339533745208915, 'critic_loss': 1025.8297078568123, 'actor_loss': 2.0784626668588273, 'time_step': 0.03477899049747886, 'td_error': 1.274320707838469, 'init_value': -3.580930709838867, 'ave_value': -3.552807861213268} step=11764
2022-04-22 08:21.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:21.40 [info     ] CQL_20220422081421: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.00031028455392473694, 'time_algorithm_update': 0.029292127300549104, 'temp_loss': 3.9406615971140777, 'temp': 0.7802724049270497, 'alpha_loss': -63.842638324450895, 'alpha': 3.6715273202499215, 'critic_loss': 1081.6041944206106, 'actor_loss': 2.0091332111055453, 'time_step': 0.029676096976836982, 'td_error': 1.2741427370209693, 'init_value': -3.513597249984741, 'ave_value': -3.4900480831330736} step=12110
2022-04-22 08:21.40 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:21.52 [info     ] CQL_20220422081421: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.00034268053969895907, 'time_algorithm_update': 0.03497857510009942, 'temp_loss': 3.911754555784898, 'temp': 0.7747069752285246, 'alpha_loss': -66.32298210452747, 'alpha': 3.8144509847453563, 'critic_loss': 1140.2120809389676, 'actor_loss': 1.9536104419327884, 'time_step': 0.03540418189385034, 'td_error': 1.274296880694818, 'init_value': -3.4847428798675537, 'ave_value': -3.4640351633259105} step=12456
2022-04-22 08:21.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:22.05 [info     ] CQL_20220422081421: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.00033959970308866113, 'time_algorithm_update': 0.03451957799106664, 'temp_loss': 3.884620401211557, 'temp': 0.7691810005317533, 'alpha_loss': -68.90830537211689, 'alpha': 3.9629421916311185, 'critic_loss': 1201.4317761018785, 'actor_loss': 1.9323090270075496, 'time_step': 0.03494719549410605, 'td_error': 1.2744540625209348, 'init_value': -3.451810359954834, 'ave_value': -3.43429911488207} step=12802
2022-04-22 08:22.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:22.18 [info     ] CQL_20220422081421: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.00034799878996920726, 'time_algorithm_update': 0.035129737991818115, 'temp_loss': 3.856700257758874, 'temp': 0.7636940629151515, 'alpha_loss': -71.59802219633423, 'alpha': 4.117227060946426, 'critic_loss': 1264.3890070391528, 'actor_loss': 1.9119641629257642, 'time_step': 0.03556386583802328, 'td_error': 1.2744977110203262, 'init_value': -3.423837661743164, 'ave_value': -3.4098430943296894} step=13148
2022-04-22 08:22.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:22.30 [info     ] CQL_20220422081421: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.000347897496526641, 'time_algorithm_update': 0.03482924858269664, 'temp_loss': 3.8297210938668664, 'temp': 0.7582461027740743, 'alpha_loss': -74.38242064459476, 'alpha': 4.2775121024578295, 'critic_loss': 1329.600009455157, 'actor_loss': 1.923978575736801, 'time_step': 0.035263530091743246, 'td_error': 1.2749777096825547, 'init_value': -3.4384918212890625, 'ave_value': -3.4254640970822474} step=13494
2022-04-22 08:22.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:22.43 [info     ] CQL_20220422081421: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.0003475350451607236, 'time_algorithm_update': 0.035845443692510526, 'temp_loss': 3.8019966603703583, 'temp': 0.7528369345761448, 'alpha_loss': -77.27325027113017, 'alpha': 4.4440323576072736, 'critic_loss': 1396.290456077267, 'actor_loss': 1.9171149792698767, 'time_step': 0.03628389201412311, 'td_error': 1.275088382391445, 'init_value': -3.42901611328125, 'ave_value': -3.416999025059967} step=13840
2022-04-22 08:22.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:22.55 [info     ] CQL_20220422081421: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.0003472415008985927, 'time_algorithm_update': 0.03425628188028501, 'temp_loss': 3.7751884915236102, 'temp': 0.7474661917355708, 'alpha_loss': -80.29048633024182, 'alpha': 4.6170364818132, 'critic_loss': 1466.1877102714054, 'actor_loss': 1.9386858257944184, 'time_step': 0.03468538708769517, 'td_error': 1.2752457837873186, 'init_value': -3.4171295166015625, 'ave_value': -3.40697366832952} step=14186
2022-04-22 08:22.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:23.08 [info     ] CQL_20220422081421: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.00034118249926263887, 'time_algorithm_update': 0.03431798888079693, 'temp_loss': 3.7479265618186464, 'temp': 0.7421342427330899, 'alpha_loss': -83.4116308840713, 'alpha': 4.796784855726826, 'critic_loss': 1532.444201871839, 'actor_loss': 1.9482673568532647, 'time_step': 0.03473555008110973, 'td_error': 1.2755002920115242, 'init_value': -3.4172251224517822, 'ave_value': -3.4081894883164416} step=14532
2022-04-22 08:23.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:23.20 [info     ] CQL_20220422081421: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.00034890078395777356, 'time_algorithm_update': 0.03490465362637029, 'temp_loss': 3.72177818813765, 'temp': 0.7368398810053147, 'alpha_loss': -86.66436106069929, 'alpha': 4.983518091929441, 'critic_loss': 1603.7205990477105, 'actor_loss': 1.9586568660818773, 'time_step': 0.035339057100990605, 'td_error': 1.2760935490336085, 'init_value': -3.453409194946289, 'ave_value': -3.4453046341087084} step=14878
2022-04-22 08:23.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:23.33 [info     ] CQL_20220422081421: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.00034432259598219327, 'time_algorithm_update': 0.03458902670468898, 'temp_loss': 3.694763637002493, 'temp': 0.7315836717627641, 'alpha_loss': -90.03291830162092, 'alpha': 5.177514908630724, 'critic_loss': 1678.5513422089505, 'actor_loss': 1.9888263141488753, 'time_step': 0.03502216849023896, 'td_error': 1.2762461494671238, 'init_value': -3.453875780105591, 'ave_value': -3.4465247490667177} step=15224
2022-04-22 08:23.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:23.45 [info     ] CQL_20220422081421: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.000348878044613524, 'time_algorithm_update': 0.03525241261961832, 'temp_loss': 3.668631490255367, 'temp': 0.7263652615119957, 'alpha_loss': -93.53484637751056, 'alpha': 5.379067930872041, 'critic_loss': 1754.2043001913612, 'actor_loss': 1.9958806213615947, 'time_step': 0.03569358414997255, 'td_error': 1.2765957361205769, 'init_value': -3.4687130451202393, 'ave_value': -3.4612799498864177} step=15570
2022-04-22 08:23.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:23.58 [info     ] CQL_20220422081421: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.00034066914133942886, 'time_algorithm_update': 0.03454598319323766, 'temp_loss': 3.6429681819298363, 'temp': 0.7211830946751413, 'alpha_loss': -97.16834228162821, 'alpha': 5.588451887141763, 'critic_loss': 1830.874672244739, 'actor_loss': 2.011545111678239, 'time_step': 0.03497760419900707, 'td_error': 1.2768479979974885, 'init_value': -3.471083879470825, 'ave_value': -3.465661387355365} step=15916
2022-04-22 08:23.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:24.10 [info     ] CQL_20220422081421: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.00033589112276286747, 'time_algorithm_update': 0.03313856042189405, 'temp_loss': 3.61672493281392, 'temp': 0.716037999171053, 'alpha_loss': -100.9656292689329, 'alpha': 5.806002904914018, 'critic_loss': 1909.6950440158735, 'actor_loss': 2.0505373567515024, 'time_step': 0.033555085259365895, 'td_error': 1.2775339331948714, 'init_value': -3.5230331420898438, 'ave_value': -3.5178783161711387} step=16262
2022-04-22 08:24.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:24.22 [info     ] CQL_20220422081421: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.0003453038331401141, 'time_algorithm_update': 0.0345332236648295, 'temp_loss': 3.5905600620832057, 'temp': 0.7109303986061515, 'alpha_loss': -104.89424975621218, 'alpha': 6.0320421670902675, 'critic_loss': 1987.4600127997426, 'actor_loss': 2.119215074302144, 'time_step': 0.034964832956391266, 'td_error': 1.2781597806027616, 'init_value': -3.5661396980285645, 'ave_value': -3.5614006527231} step=16608
2022-04-22 08:24.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:24.35 [info     ] CQL_20220422081421: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.00035497494515656047, 'time_algorithm_update': 0.035720595045585855, 'temp_loss': 3.564862207181192, 'temp': 0.7058594579986065, 'alpha_loss': -108.97207432123967, 'alpha': 6.266880294491101, 'critic_loss': 2065.134107292043, 'actor_loss': 2.1535001497048176, 'time_step': 0.03616430166828839, 'td_error': 1.278426054316428, 'init_value': -3.575519323348999, 'ave_value': -3.571682237685716} step=16954
2022-04-22 08:24.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:24.48 [info     ] CQL_20220422081421: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.0003475102386033604, 'time_algorithm_update': 0.034817887868495344, 'temp_loss': 3.5398828411377923, 'temp': 0.7008240303896756, 'alpha_loss': -113.21610822291733, 'alpha': 6.510847467907591, 'critic_loss': 2146.52491363349, 'actor_loss': 2.181689316826749, 'time_step': 0.035252323729454436, 'td_error': 1.278975429445498, 'init_value': -3.615342378616333, 'ave_value': -3.6115195777643456} step=17300
2022-04-22 08:24.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422081421/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.5191

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 08:24.48 [info     ] FQE_20220422082448: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.000131934522146202, 'time_algorithm_update': 0.00247434104781553, 'loss': 0.007032718092865165, 'time_step': 0.0026725315185914555, 'init_value': -0.1151408925652504, 'ave_value': -0.07295749506804051, 'soft_opc': nan} step=166




2022-04-22 08:24.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:24.49 [info     ] FQE_20220422082448: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00012631732297230917, 'time_algorithm_update': 0.0023116936166602447, 'loss': 0.004209533380063991, 'time_step': 0.0024942288915795014, 'init_value': -0.1489812284708023, 'ave_value': -0.07214142210311718, 'soft_opc': nan} step=332




2022-04-22 08:24.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:24.49 [info     ] FQE_20220422082448: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.00014056642371487906, 'time_algorithm_update': 0.002196026135639972, 'loss': 0.003517283013101712, 'time_step': 0.0023910482245755485, 'init_value': -0.16908350586891174, 'ave_value': -0.08932211538692852, 'soft_opc': nan} step=498




2022-04-22 08:24.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:24.50 [info     ] FQE_20220422082448: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00012765735028737998, 'time_algorithm_update': 0.002224430980452572, 'loss': 0.00313994465451912, 'time_step': 0.0024086078965520285, 'init_value': -0.21180763840675354, 'ave_value': -0.12025811289613311, 'soft_opc': nan} step=664




2022-04-22 08:24.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:24.50 [info     ] FQE_20220422082448: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00013311512498970492, 'time_algorithm_update': 0.0024667044720017768, 'loss': 0.0028722550101432367, 'time_step': 0.00265794058880174, 'init_value': -0.24850612878799438, 'ave_value': -0.1452593111609285, 'soft_opc': nan} step=830




2022-04-22 08:24.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:24.51 [info     ] FQE_20220422082448: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.0001310842582978398, 'time_algorithm_update': 0.0024695468236164875, 'loss': 0.002599322340866349, 'time_step': 0.0026591168828757413, 'init_value': -0.2824203372001648, 'ave_value': -0.17225411290487458, 'soft_opc': nan} step=996




2022-04-22 08:24.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:24.51 [info     ] FQE_20220422082448: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00012499596699174628, 'time_algorithm_update': 0.0022274040314088383, 'loss': 0.0024036310517778956, 'time_step': 0.0024122373167290746, 'init_value': -0.3135497570037842, 'ave_value': -0.1966948853654636, 'soft_opc': nan} step=1162




2022-04-22 08:24.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:24.52 [info     ] FQE_20220422082448: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.0001390325017722256, 'time_algorithm_update': 0.002532802432416433, 'loss': 0.0021846312622775337, 'time_step': 0.0027299774698464267, 'init_value': -0.331132173538208, 'ave_value': -0.19375048368363768, 'soft_opc': nan} step=1328




2022-04-22 08:24.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:24.52 [info     ] FQE_20220422082448: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.00013221890093332314, 'time_algorithm_update': 0.0024098258420645474, 'loss': 0.0018868205121420998, 'time_step': 0.0026007273110998682, 'init_value': -0.39773738384246826, 'ave_value': -0.23949629631615035, 'soft_opc': nan} step=1494




2022-04-22 08:24.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:24.53 [info     ] FQE_20220422082448: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00011683803006827114, 'time_algorithm_update': 0.0019566854798650168, 'loss': 0.001814934576886807, 'time_step': 0.002122623374663204, 'init_value': -0.47376787662506104, 'ave_value': -0.28327455496465836, 'soft_opc': nan} step=1660




2022-04-22 08:24.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:24.53 [info     ] FQE_20220422082448: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00012240639652114316, 'time_algorithm_update': 0.0023786562034882695, 'loss': 0.001576967429753999, 'time_step': 0.0025600597082850443, 'init_value': -0.5295064449310303, 'ave_value': -0.3075601079207551, 'soft_opc': nan} step=1826




2022-04-22 08:24.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:24.54 [info     ] FQE_20220422082448: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.00012141825204872223, 'time_algorithm_update': 0.002250042306371482, 'loss': 0.0014779347707573548, 'time_step': 0.0024224203753184124, 'init_value': -0.6093500852584839, 'ave_value': -0.35333214132031343, 'soft_opc': nan} step=1992




2022-04-22 08:24.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:24.54 [info     ] FQE_20220422082448: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.0001214871923607516, 'time_algorithm_update': 0.0022272604057587773, 'loss': 0.0016488320403171323, 'time_step': 0.002401071858693318, 'init_value': -0.725500226020813, 'ave_value': -0.42414223351389974, 'soft_opc': nan} step=2158




2022-04-22 08:24.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:24.54 [info     ] FQE_20220422082448: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00012307712830692888, 'time_algorithm_update': 0.0022814202021403484, 'loss': 0.0017010507237654269, 'time_step': 0.0024644653481173226, 'init_value': -0.8289332985877991, 'ave_value': -0.47670667216785856, 'soft_opc': nan} step=2324




2022-04-22 08:24.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:24.55 [info     ] FQE_20220422082448: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00012884226190038473, 'time_algorithm_update': 0.0023305071405617587, 'loss': 0.0018327785964426864, 'time_step': 0.0025240326502236976, 'init_value': -0.9207144975662231, 'ave_value': -0.5223854798890717, 'soft_opc': nan} step=2490




2022-04-22 08:24.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:24.55 [info     ] FQE_20220422082448: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00011943190930837609, 'time_algorithm_update': 0.002205143491905856, 'loss': 0.0021927212263283557, 'time_step': 0.0023802648107689546, 'init_value': -1.0470106601715088, 'ave_value': -0.6010934980938564, 'soft_opc': nan} step=2656




2022-04-22 08:24.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:24.56 [info     ] FQE_20220422082448: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.0001248997378062053, 'time_algorithm_update': 0.002286806164017643, 'loss': 0.002316949330130031, 'time_step': 0.002472923462649426, 'init_value': -1.1554827690124512, 'ave_value': -0.6691993043162264, 'soft_opc': nan} step=2822




2022-04-22 08:24.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:24.56 [info     ] FQE_20220422082448: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00011811773461031627, 'time_algorithm_update': 0.0019711011863616577, 'loss': 0.002605254882322456, 'time_step': 0.002141678189656821, 'init_value': -1.2222578525543213, 'ave_value': -0.6843601321264564, 'soft_opc': nan} step=2988




2022-04-22 08:24.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:24.57 [info     ] FQE_20220422082448: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00012710295527814384, 'time_algorithm_update': 0.0024520230580525226, 'loss': 0.0030361642548744285, 'time_step': 0.002639741782682488, 'init_value': -1.3567235469818115, 'ave_value': -0.7737858965120337, 'soft_opc': nan} step=3154




2022-04-22 08:24.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:24.57 [info     ] FQE_20220422082448: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00013335785233830832, 'time_algorithm_update': 0.0024491218199212865, 'loss': 0.0033030546998257288, 'time_step': 0.0026434975934315876, 'init_value': -1.4727470874786377, 'ave_value': -0.837700512042051, 'soft_opc': nan} step=3320




2022-04-22 08:24.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:24.58 [info     ] FQE_20220422082448: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.0001267726162830031, 'time_algorithm_update': 0.0021920074899512603, 'loss': 0.0036947746318975374, 'time_step': 0.002379145966954978, 'init_value': -1.6048026084899902, 'ave_value': -0.9398866826584479, 'soft_opc': nan} step=3486




2022-04-22 08:24.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:24.58 [info     ] FQE_20220422082448: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00012735286390925027, 'time_algorithm_update': 0.0023200598107763083, 'loss': 0.003946722208989324, 'time_step': 0.0025062618485416272, 'init_value': -1.6748967170715332, 'ave_value': -0.9568977371529416, 'soft_opc': nan} step=3652




2022-04-22 08:24.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:24.59 [info     ] FQE_20220422082448: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00012257300227521415, 'time_algorithm_update': 0.0021992103163018285, 'loss': 0.0044419020852663115, 'time_step': 0.0023813060967318982, 'init_value': -1.8129428625106812, 'ave_value': -1.050091934761217, 'soft_opc': nan} step=3818




2022-04-22 08:24.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:24.59 [info     ] FQE_20220422082448: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.00012913956699601138, 'time_algorithm_update': 0.002416004617530179, 'loss': 0.004790267536096859, 'time_step': 0.0026036414755396097, 'init_value': -1.858871340751648, 'ave_value': -1.0722717394901289, 'soft_opc': nan} step=3984




2022-04-22 08:24.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:25.00 [info     ] FQE_20220422082448: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00012243799416415663, 'time_algorithm_update': 0.002291015831820936, 'loss': 0.005369901794032755, 'time_step': 0.0024707302989729917, 'init_value': -1.9976063966751099, 'ave_value': -1.1588942584355135, 'soft_opc': nan} step=4150




2022-04-22 08:25.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:25.00 [info     ] FQE_20220422082448: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00012314175984945642, 'time_algorithm_update': 0.002148332366024155, 'loss': 0.005805367086691423, 'time_step': 0.0023255175854786335, 'init_value': -2.146528720855713, 'ave_value': -1.2751783610061482, 'soft_opc': nan} step=4316




2022-04-22 08:25.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:25.00 [info     ] FQE_20220422082448: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00012766453156988304, 'time_algorithm_update': 0.0023182429463030345, 'loss': 0.006392027076974747, 'time_step': 0.002506580697484763, 'init_value': -2.210599660873413, 'ave_value': -1.309394079057483, 'soft_opc': nan} step=4482




2022-04-22 08:25.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:25.01 [info     ] FQE_20220422082448: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00012205020490899144, 'time_algorithm_update': 0.0022224719265857376, 'loss': 0.006557277092921929, 'time_step': 0.0024026890835130072, 'init_value': -2.293826103210449, 'ave_value': -1.3394391192106514, 'soft_opc': nan} step=4648




2022-04-22 08:25.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:25.01 [info     ] FQE_20220422082448: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00013530972492263978, 'time_algorithm_update': 0.0026010705764035144, 'loss': 0.00715431063741036, 'time_step': 0.0027985285563641286, 'init_value': -2.3379883766174316, 'ave_value': -1.3299096787432292, 'soft_opc': nan} step=4814




2022-04-22 08:25.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:25.02 [info     ] FQE_20220422082448: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00012793454779199808, 'time_algorithm_update': 0.0023277366017720787, 'loss': 0.007455170852011518, 'time_step': 0.0025190948003745942, 'init_value': -2.4595000743865967, 'ave_value': -1.4299305871397525, 'soft_opc': nan} step=4980




2022-04-22 08:25.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:25.02 [info     ] FQE_20220422082448: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00011788793357021837, 'time_algorithm_update': 0.0020059146076799876, 'loss': 0.008225529691511597, 'time_step': 0.002179866813751588, 'init_value': -2.5712552070617676, 'ave_value': -1.4999743530812026, 'soft_opc': nan} step=5146




2022-04-22 08:25.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:25.03 [info     ] FQE_20220422082448: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00011794107506074102, 'time_algorithm_update': 0.002044802688690553, 'loss': 0.00853254054203147, 'time_step': 0.00221883963389569, 'init_value': -2.700526714324951, 'ave_value': -1.5936848674875659, 'soft_opc': nan} step=5312




2022-04-22 08:25.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:25.03 [info     ] FQE_20220422082448: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00011287396212658251, 'time_algorithm_update': 0.001973471009587667, 'loss': 0.009137102301409244, 'time_step': 0.0021385457142289862, 'init_value': -2.7156224250793457, 'ave_value': -1.5874208287948424, 'soft_opc': nan} step=5478




2022-04-22 08:25.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:25.04 [info     ] FQE_20220422082448: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00011875112372708607, 'time_algorithm_update': 0.0020780750067837268, 'loss': 0.009616038603890485, 'time_step': 0.0022478275988475384, 'init_value': -2.8490705490112305, 'ave_value': -1.6921597260881114, 'soft_opc': nan} step=5644




2022-04-22 08:25.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:25.04 [info     ] FQE_20220422082448: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00012846309018422323, 'time_algorithm_update': 0.0024182020899761155, 'loss': 0.010043386212151766, 'time_step': 0.0026077333703098528, 'init_value': -2.880814552307129, 'ave_value': -1.7009661666377707, 'soft_opc': nan} step=5810




2022-04-22 08:25.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:25.05 [info     ] FQE_20220422082448: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.0001325937638799828, 'time_algorithm_update': 0.0025014144828520626, 'loss': 0.010526069128221035, 'time_step': 0.0026958649417003953, 'init_value': -2.9393675327301025, 'ave_value': -1.7339793839768785, 'soft_opc': nan} step=5976




2022-04-22 08:25.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:25.05 [info     ] FQE_20220422082448: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00014119119529264518, 'time_algorithm_update': 0.0026422451777630544, 'loss': 0.010981783465095178, 'time_step': 0.0028481555272297687, 'init_value': -3.049211025238037, 'ave_value': -1.8241796152548746, 'soft_opc': nan} step=6142




2022-04-22 08:25.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:25.05 [info     ] FQE_20220422082448: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.00012813131493258188, 'time_algorithm_update': 0.0023114566343376435, 'loss': 0.011540944321769716, 'time_step': 0.00249999977019896, 'init_value': -2.964263439178467, 'ave_value': -1.7348527519284067, 'soft_opc': nan} step=6308




2022-04-22 08:25.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:25.06 [info     ] FQE_20220422082448: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00013466053698436324, 'time_algorithm_update': 0.002583049866090338, 'loss': 0.011798274337597394, 'time_step': 0.002775387591626271, 'init_value': -3.0514001846313477, 'ave_value': -1.8005925113821888, 'soft_opc': nan} step=6474




2022-04-22 08:25.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:25.07 [info     ] FQE_20220422082448: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00012954602758568455, 'time_algorithm_update': 0.0025782340980437866, 'loss': 0.011391690600851372, 'time_step': 0.0027684691440628237, 'init_value': -3.1578543186187744, 'ave_value': -1.876590314722276, 'soft_opc': nan} step=6640




2022-04-22 08:25.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:25.07 [info     ] FQE_20220422082448: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00014373336929872813, 'time_algorithm_update': 0.0029085702206715046, 'loss': 0.01271226870896104, 'time_step': 0.0031110637159232632, 'init_value': -3.1590304374694824, 'ave_value': -1.8667869113251432, 'soft_opc': nan} step=6806




2022-04-22 08:25.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:25.08 [info     ] FQE_20220422082448: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00012521140546683805, 'time_algorithm_update': 0.002060239573559129, 'loss': 0.013309310427418047, 'time_step': 0.00224250195974327, 'init_value': -3.2116847038269043, 'ave_value': -1.8953862064593547, 'soft_opc': nan} step=6972




2022-04-22 08:25.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:25.08 [info     ] FQE_20220422082448: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00012625125517328102, 'time_algorithm_update': 0.002170492367572095, 'loss': 0.013413695550484425, 'time_step': 0.002353875033826713, 'init_value': -3.335049629211426, 'ave_value': -2.006111245085527, 'soft_opc': nan} step=7138




2022-04-22 08:25.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:25.08 [info     ] FQE_20220422082448: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00013048533933708467, 'time_algorithm_update': 0.0024464848529861635, 'loss': 0.013777841996900872, 'time_step': 0.0026346990861088396, 'init_value': -3.3667235374450684, 'ave_value': -2.0423866913796545, 'soft_opc': nan} step=7304




2022-04-22 08:25.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:25.09 [info     ] FQE_20220422082448: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.0001309880291122988, 'time_algorithm_update': 0.00236574569380427, 'loss': 0.01450305822493329, 'time_step': 0.0025581839572952456, 'init_value': -3.398252010345459, 'ave_value': -2.049596969971249, 'soft_opc': nan} step=7470




2022-04-22 08:25.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:25.09 [info     ] FQE_20220422082448: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00013861024236104576, 'time_algorithm_update': 0.0025838283171136693, 'loss': 0.015145854073848733, 'time_step': 0.0027862557445664004, 'init_value': -3.490081310272217, 'ave_value': -2.125602589426814, 'soft_opc': nan} step=7636




2022-04-22 08:25.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:25.10 [info     ] FQE_20220422082448: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00013513306537306453, 'time_algorithm_update': 0.0024725916873977846, 'loss': 0.015259154416208369, 'time_step': 0.0026665667453444146, 'init_value': -3.5799312591552734, 'ave_value': -2.2098705562116865, 'soft_opc': nan} step=7802




2022-04-22 08:25.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:25.10 [info     ] FQE_20220422082448: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00013173631874911757, 'time_algorithm_update': 0.0022442125412354985, 'loss': 0.015546177461300135, 'time_step': 0.0024391628173460445, 'init_value': -3.6102819442749023, 'ave_value': -2.213716815653685, 'soft_opc': nan} step=7968




2022-04-22 08:25.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:25.11 [info     ] FQE_20220422082448: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00012114536331360599, 'time_algorithm_update': 0.002187246299651732, 'loss': 0.016341804031981155, 'time_step': 0.0023683510630963796, 'init_value': -3.6710662841796875, 'ave_value': -2.269621993883236, 'soft_opc': nan} step=8134




2022-04-22 08:25.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:25.11 [info     ] FQE_20220422082448: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00012910222432699548, 'time_algorithm_update': 0.002256561474627759, 'loss': 0.01682454285493081, 'time_step': 0.0024446335183568747, 'init_value': -3.6887786388397217, 'ave_value': -2.2822136108805466, 'soft_opc': nan} step=8300




2022-04-22 08:25.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082448/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-22 08:25.12 [debug    ] RoundIterator is selected.
2022-04-22 08:25.12 [info     ] Directory is created at d3rlpy_logs/FQE_20220422082512
2022-04-22 08:25.12 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 08:25.12 [debug    ] Building models...
2022-04-22 08:25.12 [debug    ] Models have been built.
2022-04-22 08:25.12 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422082512/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}},

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 08:25.13 [info     ] FQE_20220422082512: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00012667996938838514, 'time_algorithm_update': 0.0022756096928618673, 'loss': 0.023297495495657936, 'time_step': 0.0024606502333352734, 'init_value': -1.187260389328003, 'ave_value': -1.18269907103197, 'soft_opc': nan} step=344




2022-04-22 08:25.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.13 [info     ] FQE_20220422082512: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00013121546700943347, 'time_algorithm_update': 0.002335454142370889, 'loss': 0.021812694783452467, 'time_step': 0.00252859259760657, 'init_value': -1.984890103340149, 'ave_value': -2.0083914488554, 'soft_opc': nan} step=688




2022-04-22 08:25.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.14 [info     ] FQE_20220422082512: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00013089665146761163, 'time_algorithm_update': 0.0024038677991822708, 'loss': 0.025481188807874745, 'time_step': 0.0025964240695154944, 'init_value': -2.976850986480713, 'ave_value': -3.044914653051544, 'soft_opc': nan} step=1032




2022-04-22 08:25.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.15 [info     ] FQE_20220422082512: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00012614837912626044, 'time_algorithm_update': 0.002242030792458113, 'loss': 0.0276584772983306, 'time_step': 0.0024245880370916324, 'init_value': -3.6955959796905518, 'ave_value': -3.7850157720951345, 'soft_opc': nan} step=1376




2022-04-22 08:25.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.16 [info     ] FQE_20220422082512: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00013859604680260948, 'time_algorithm_update': 0.002587416837381762, 'loss': 0.03448718702041566, 'time_step': 0.002791391555653062, 'init_value': -4.73851203918457, 'ave_value': -4.8883302995482, 'soft_opc': nan} step=1720




2022-04-22 08:25.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.17 [info     ] FQE_20220422082512: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.0001319889412369839, 'time_algorithm_update': 0.002357926479605741, 'loss': 0.041567775895981506, 'time_step': 0.002547835194787314, 'init_value': -5.278901100158691, 'ave_value': -5.504368388545406, 'soft_opc': nan} step=2064




2022-04-22 08:25.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.18 [info     ] FQE_20220422082512: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001292831675950871, 'time_algorithm_update': 0.002328088117200275, 'loss': 0.05069323659192251, 'time_step': 0.002518532581107561, 'init_value': -6.063610553741455, 'ave_value': -6.428269005090267, 'soft_opc': nan} step=2408




2022-04-22 08:25.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.19 [info     ] FQE_20220422082512: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.0001273342343263848, 'time_algorithm_update': 0.002211773118307424, 'loss': 0.058954782721716475, 'time_step': 0.0023975129737410436, 'init_value': -6.483551979064941, 'ave_value': -7.03921528265283, 'soft_opc': nan} step=2752




2022-04-22 08:25.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.20 [info     ] FQE_20220422082512: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.0001364225565001022, 'time_algorithm_update': 0.0024394947429035984, 'loss': 0.06861374452222832, 'time_step': 0.00264352282812429, 'init_value': -6.8076066970825195, 'ave_value': -7.580018934070527, 'soft_opc': nan} step=3096




2022-04-22 08:25.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.21 [info     ] FQE_20220422082512: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00012913484906041346, 'time_algorithm_update': 0.002284162959387136, 'loss': 0.08120274761702519, 'time_step': 0.0024705799513084943, 'init_value': -7.013150215148926, 'ave_value': -8.11671571502009, 'soft_opc': nan} step=3440




2022-04-22 08:25.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.22 [info     ] FQE_20220422082512: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.00012869751730630564, 'time_algorithm_update': 0.0022821364014647726, 'loss': 0.09129420715535796, 'time_step': 0.0024750468342803244, 'init_value': -7.573244094848633, 'ave_value': -8.913082161196717, 'soft_opc': nan} step=3784




2022-04-22 08:25.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.23 [info     ] FQE_20220422082512: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.0001294578230658243, 'time_algorithm_update': 0.0022989275843598124, 'loss': 0.1030532932883605, 'time_step': 0.0024902085925257483, 'init_value': -8.000408172607422, 'ave_value': -9.634183157376341, 'soft_opc': nan} step=4128




2022-04-22 08:25.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.24 [info     ] FQE_20220422082512: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00013007674106331758, 'time_algorithm_update': 0.0022493995899377866, 'loss': 0.11483657330025507, 'time_step': 0.00244105901829032, 'init_value': -8.175288200378418, 'ave_value': -10.072919903883525, 'soft_opc': nan} step=4472




2022-04-22 08:25.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.25 [info     ] FQE_20220422082512: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00012534302334452784, 'time_algorithm_update': 0.0021837097267771877, 'loss': 0.12929556430534048, 'time_step': 0.002368359371673229, 'init_value': -8.938803672790527, 'ave_value': -10.968179435940744, 'soft_opc': nan} step=4816




2022-04-22 08:25.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.26 [info     ] FQE_20220422082512: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00012138347293055335, 'time_algorithm_update': 0.0021100196727486544, 'loss': 0.14755910252448345, 'time_step': 0.0022883761760800385, 'init_value': -9.384071350097656, 'ave_value': -11.66672423646391, 'soft_opc': nan} step=5160




2022-04-22 08:25.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.27 [info     ] FQE_20220422082512: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00013485204341799715, 'time_algorithm_update': 0.002505880455638087, 'loss': 0.16688335995558043, 'time_step': 0.0027049890784330145, 'init_value': -9.926321983337402, 'ave_value': -12.365926025566218, 'soft_opc': nan} step=5504




2022-04-22 08:25.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.28 [info     ] FQE_20220422082512: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00012510391168816146, 'time_algorithm_update': 0.0021808126638101976, 'loss': 0.19065658814479533, 'time_step': 0.0023661387521167134, 'init_value': -10.073005676269531, 'ave_value': -12.717735294644461, 'soft_opc': nan} step=5848




2022-04-22 08:25.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.28 [info     ] FQE_20220422082512: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.00012693363566731298, 'time_algorithm_update': 0.0023360411788142005, 'loss': 0.21447841778159316, 'time_step': 0.0025225773800251097, 'init_value': -10.534282684326172, 'ave_value': -13.399021619018525, 'soft_opc': nan} step=6192




2022-04-22 08:25.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.29 [info     ] FQE_20220422082512: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00012633966845135357, 'time_algorithm_update': 0.0022112976673037505, 'loss': 0.24128063083193158, 'time_step': 0.0023963825647221053, 'init_value': -11.032934188842773, 'ave_value': -14.354159830550575, 'soft_opc': nan} step=6536




2022-04-22 08:25.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.30 [info     ] FQE_20220422082512: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.00013050783512204192, 'time_algorithm_update': 0.002260723086290581, 'loss': 0.2641397351886384, 'time_step': 0.002451737952786823, 'init_value': -11.288363456726074, 'ave_value': -14.862586277106312, 'soft_opc': nan} step=6880




2022-04-22 08:25.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.31 [info     ] FQE_20220422082512: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.0001298008963119152, 'time_algorithm_update': 0.0023807107016097667, 'loss': 0.29405313837991726, 'time_step': 0.0025732434073159863, 'init_value': -11.31236457824707, 'ave_value': -15.20876303880207, 'soft_opc': nan} step=7224




2022-04-22 08:25.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.32 [info     ] FQE_20220422082512: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00013475639875545057, 'time_algorithm_update': 0.002580690522526586, 'loss': 0.312679759646376, 'time_step': 0.002782224222671154, 'init_value': -11.538125991821289, 'ave_value': -15.701456089367301, 'soft_opc': nan} step=7568




2022-04-22 08:25.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.33 [info     ] FQE_20220422082512: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00013129932935847791, 'time_algorithm_update': 0.0023200726786325146, 'loss': 0.3407457135292853, 'time_step': 0.0025121867656707764, 'init_value': -11.987112045288086, 'ave_value': -16.565142345885214, 'soft_opc': nan} step=7912




2022-04-22 08:25.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.34 [info     ] FQE_20220422082512: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.0001342206500297369, 'time_algorithm_update': 0.002405005138973857, 'loss': 0.3607390433289977, 'time_step': 0.0026019964107247286, 'init_value': -12.461776733398438, 'ave_value': -17.215824825456785, 'soft_opc': nan} step=8256




2022-04-22 08:25.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.35 [info     ] FQE_20220422082512: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00013121616008669832, 'time_algorithm_update': 0.0023188604864963266, 'loss': 0.3846140784904534, 'time_step': 0.0025094608927881995, 'init_value': -12.480361938476562, 'ave_value': -17.635459409917804, 'soft_opc': nan} step=8600




2022-04-22 08:25.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.36 [info     ] FQE_20220422082512: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.00013825782509737237, 'time_algorithm_update': 0.0024156667465387387, 'loss': 0.40295297854450035, 'time_step': 0.002615471218907556, 'init_value': -12.618682861328125, 'ave_value': -18.193825282804827, 'soft_opc': nan} step=8944




2022-04-22 08:25.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.37 [info     ] FQE_20220422082512: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00012014563693556674, 'time_algorithm_update': 0.0020339204821475717, 'loss': 0.42493206341642625, 'time_step': 0.0022101388421169547, 'init_value': -12.335514068603516, 'ave_value': -18.303222339871542, 'soft_opc': nan} step=9288




2022-04-22 08:25.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.38 [info     ] FQE_20220422082512: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.0001301550587942434, 'time_algorithm_update': 0.002279280230056408, 'loss': 0.4566928179079104, 'time_step': 0.0024689110212547834, 'init_value': -12.818973541259766, 'ave_value': -19.038377428634472, 'soft_opc': nan} step=9632




2022-04-22 08:25.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.39 [info     ] FQE_20220422082512: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.0001296837662541589, 'time_algorithm_update': 0.0022815895635028218, 'loss': 0.487593166194423, 'time_step': 0.0024707053982934287, 'init_value': -13.312954902648926, 'ave_value': -19.680943260265533, 'soft_opc': nan} step=9976




2022-04-22 08:25.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.40 [info     ] FQE_20220422082512: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.00013585977776106015, 'time_algorithm_update': 0.002266060474307038, 'loss': 0.5220122270903356, 'time_step': 0.002462524314259374, 'init_value': -13.547744750976562, 'ave_value': -20.10397438195341, 'soft_opc': nan} step=10320




2022-04-22 08:25.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.41 [info     ] FQE_20220422082512: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00012448637984519782, 'time_algorithm_update': 0.0022627004357271417, 'loss': 0.5399120390512656, 'time_step': 0.0024465329425279484, 'init_value': -13.8840913772583, 'ave_value': -20.646510463966443, 'soft_opc': nan} step=10664




2022-04-22 08:25.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.42 [info     ] FQE_20220422082512: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00013400718223216922, 'time_algorithm_update': 0.0024796350057735, 'loss': 0.5529801119281369, 'time_step': 0.00267818292906118, 'init_value': -14.550992965698242, 'ave_value': -21.64082279909247, 'soft_opc': nan} step=11008




2022-04-22 08:25.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.43 [info     ] FQE_20220422082512: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00013541204984797987, 'time_algorithm_update': 0.00247483960417814, 'loss': 0.5811946628571942, 'time_step': 0.0026771169762278713, 'init_value': -14.42896842956543, 'ave_value': -21.742765014789377, 'soft_opc': nan} step=11352




2022-04-22 08:25.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.43 [info     ] FQE_20220422082512: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.0001274562159249949, 'time_algorithm_update': 0.0021657839763996214, 'loss': 0.6147449632868344, 'time_step': 0.002355556155360022, 'init_value': -14.788899421691895, 'ave_value': -22.318125238410104, 'soft_opc': nan} step=11696




2022-04-22 08:25.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.44 [info     ] FQE_20220422082512: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.00013686820518138797, 'time_algorithm_update': 0.0025271676307500796, 'loss': 0.620998838997641, 'time_step': 0.002728535685428353, 'init_value': -15.290887832641602, 'ave_value': -23.020445250849416, 'soft_opc': nan} step=12040




2022-04-22 08:25.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.45 [info     ] FQE_20220422082512: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.0001362756241199582, 'time_algorithm_update': 0.002418554799501286, 'loss': 0.636312972689264, 'time_step': 0.0026183578857155733, 'init_value': -15.275117874145508, 'ave_value': -23.39697867040732, 'soft_opc': nan} step=12384




2022-04-22 08:25.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.46 [info     ] FQE_20220422082512: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00012668620708376863, 'time_algorithm_update': 0.0021457824596138888, 'loss': 0.6613074763363964, 'time_step': 0.00232939179553542, 'init_value': -15.489299774169922, 'ave_value': -23.879601979949765, 'soft_opc': nan} step=12728




2022-04-22 08:25.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.47 [info     ] FQE_20220422082512: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00013497887655746106, 'time_algorithm_update': 0.0023908968581709752, 'loss': 0.6809285821198204, 'time_step': 0.002591347278550614, 'init_value': -15.817448616027832, 'ave_value': -24.47197550455033, 'soft_opc': nan} step=13072




2022-04-22 08:25.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.48 [info     ] FQE_20220422082512: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00013176646343497343, 'time_algorithm_update': 0.0023331656012424203, 'loss': 0.6859558656605956, 'time_step': 0.0025254910768464553, 'init_value': -15.569368362426758, 'ave_value': -24.693790054965664, 'soft_opc': nan} step=13416




2022-04-22 08:25.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.49 [info     ] FQE_20220422082512: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.00013367381206778593, 'time_algorithm_update': 0.002383539843004803, 'loss': 0.7015924098803986, 'time_step': 0.002581942220066869, 'init_value': -15.861457824707031, 'ave_value': -25.27695395792121, 'soft_opc': nan} step=13760




2022-04-22 08:25.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.50 [info     ] FQE_20220422082512: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.0001322190428889075, 'time_algorithm_update': 0.002279644095620444, 'loss': 0.7017362313182635, 'time_step': 0.0024723431398702223, 'init_value': -16.002853393554688, 'ave_value': -25.606278863945253, 'soft_opc': nan} step=14104




2022-04-22 08:25.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.51 [info     ] FQE_20220422082512: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.0001295846562052882, 'time_algorithm_update': 0.0022563657095265944, 'loss': 0.7237637587768827, 'time_step': 0.0024464580901833468, 'init_value': -16.110553741455078, 'ave_value': -25.98014283349385, 'soft_opc': nan} step=14448




2022-04-22 08:25.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.52 [info     ] FQE_20220422082512: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.0001314483409704164, 'time_algorithm_update': 0.0023649854715480363, 'loss': 0.7469310545102629, 'time_step': 0.002558792053267013, 'init_value': -16.3472843170166, 'ave_value': -26.481944377988846, 'soft_opc': nan} step=14792




2022-04-22 08:25.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.53 [info     ] FQE_20220422082512: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00012792681538781455, 'time_algorithm_update': 0.002332758764887965, 'loss': 0.7447384335208944, 'time_step': 0.002519492493119351, 'init_value': -16.897472381591797, 'ave_value': -27.129370436501933, 'soft_opc': nan} step=15136




2022-04-22 08:25.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.54 [info     ] FQE_20220422082512: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00013027634731558867, 'time_algorithm_update': 0.0022781685341236205, 'loss': 0.7471970307510779, 'time_step': 0.0024691799352335375, 'init_value': -17.321754455566406, 'ave_value': -27.70023480626914, 'soft_opc': nan} step=15480




2022-04-22 08:25.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.55 [info     ] FQE_20220422082512: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.0001287099926970726, 'time_algorithm_update': 0.002207926539487617, 'loss': 0.736505783910235, 'time_step': 0.002397906641627467, 'init_value': -17.29891014099121, 'ave_value': -27.84534899860889, 'soft_opc': nan} step=15824




2022-04-22 08:25.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.56 [info     ] FQE_20220422082512: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.00012784919073415357, 'time_algorithm_update': 0.0023193109867184663, 'loss': 0.7393423792050589, 'time_step': 0.0025096473305724386, 'init_value': -17.187545776367188, 'ave_value': -28.082262759836944, 'soft_opc': nan} step=16168




2022-04-22 08:25.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.57 [info     ] FQE_20220422082512: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.00013081417527309683, 'time_algorithm_update': 0.0022766756456951763, 'loss': 0.725872435996872, 'time_step': 0.0024676170460013456, 'init_value': -17.598026275634766, 'ave_value': -28.63610117395182, 'soft_opc': nan} step=16512




2022-04-22 08:25.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.58 [info     ] FQE_20220422082512: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.00012778889301211336, 'time_algorithm_update': 0.0021290488021318303, 'loss': 0.7100378721735852, 'time_step': 0.002317740473636361, 'init_value': -17.877166748046875, 'ave_value': -28.95153968583893, 'soft_opc': nan} step=16856




2022-04-22 08:25.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:25.58 [info     ] FQE_20220422082512: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00013086892837701843, 'time_algorithm_update': 0.002278002888657326, 'loss': 0.6794851287592982, 'time_step': 0.002470793419106062, 'init_value': -17.81093978881836, 'ave_value': -28.909394123304534, 'soft_opc': nan} step=17200




2022-04-22 08:25.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422082512/model_17200.pt
search iteration:  35
using hyper params:  [0.00324956527587998, 0.0033126082043154178, 8.218924350533324e-05, 3]
2022-04-22 08:25.59 [debug    ] RoundIterator is selected.
2022-04-22 08:25.59 [info     ] Directory is created at d3rlpy_logs/CQL_20220422082559
2022-04-22 08:25.59 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 08:25.59 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 08:25.59 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422082559/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.00324956527587998, 'actor_optim_factory': {'optim_

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:26.11 [info     ] CQL_20220422082559: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.0003569739402374091, 'time_algorithm_update': 0.035309331954559146, 'temp_loss': 4.87692047957051, 'temp': 0.9853555694480852, 'alpha_loss': -17.694428146230003, 'alpha': 1.017745245054278, 'critic_loss': 63.454577098692084, 'actor_loss': 0.9093137645523328, 'time_step': 0.03576017390785879, 'td_error': 1.2536444796018278, 'init_value': -3.724827766418457, 'ave_value': -3.5098132810986837} step=346
2022-04-22 08:26.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:26.24 [info     ] CQL_20220422082559: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.0003468576883305015, 'time_algorithm_update': 0.03516662534261714, 'temp_loss': 4.833072837377559, 'temp': 0.9575347175143357, 'alpha_loss': -18.358853637827615, 'alpha': 1.0542341456936963, 'critic_loss': 89.69038775890549, 'actor_loss': 4.111452759345832, 'time_step': 0.03560989302706856, 'td_error': 1.26608669842967, 'init_value': -5.9855055809021, 'ave_value': -5.526952750646656} step=692
2022-04-22 08:26.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:26.36 [info     ] CQL_20220422082559: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.00034666957193716416, 'time_algorithm_update': 0.034649089581704554, 'temp_loss': 4.699895249625851, 'temp': 0.9311409844139408, 'alpha_loss': -19.029822371598613, 'alpha': 1.092518682080197, 'critic_loss': 166.05289082168844, 'actor_loss': 6.498985422828983, 'time_step': 0.03508968918309736, 'td_error': 1.292592084712311, 'init_value': -7.6834516525268555, 'ave_value': -7.182632654648135} step=1038
2022-04-22 08:26.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:26.49 [info     ] CQL_20220422082559: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.000337310609100871, 'time_algorithm_update': 0.03435069288132508, 'temp_loss': 4.5731526316934925, 'temp': 0.90580857902593, 'alpha_loss': -19.722224439499694, 'alpha': 1.1326914626049858, 'critic_loss': 283.73119213126296, 'actor_loss': 7.5621939284264, 'time_step': 0.034778091948845485, 'td_error': 1.3114063120340063, 'init_value': -8.664999961853027, 'ave_value': -8.18957746072989} step=1384
2022-04-22 08:26.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:27.02 [info     ] CQL_20220422082559: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.0003425096500815684, 'time_algorithm_update': 0.03554225932655996, 'temp_loss': 4.449777166278376, 'temp': 0.8814030673807067, 'alpha_loss': -20.44412100246187, 'alpha': 1.17478075950821, 'critic_loss': 447.21706491812114, 'actor_loss': 6.892498484925728, 'time_step': 0.03598232007440115, 'td_error': 1.280820804995753, 'init_value': -6.94577169418335, 'ave_value': -6.628405210721996} step=1730
2022-04-22 08:27.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:27.14 [info     ] CQL_20220422082559: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.000345386521664658, 'time_algorithm_update': 0.034227096965547245, 'temp_loss': 4.331369834139168, 'temp': 0.85784589107326, 'alpha_loss': -21.205741926424764, 'alpha': 1.2188320483775497, 'critic_loss': 659.8734330193845, 'actor_loss': 4.6690769305808, 'time_step': 0.034669304169671386, 'td_error': 1.267548764392496, 'init_value': -4.881070137023926, 'ave_value': -4.737842809006522} step=2076
2022-04-22 08:27.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:27.27 [info     ] CQL_20220422082559: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.0003549336008942885, 'time_algorithm_update': 0.036066392253589076, 'temp_loss': 4.216382765356516, 'temp': 0.8350684029518525, 'alpha_loss': -21.99994740458582, 'alpha': 1.2648869584061506, 'critic_loss': 905.5513845454751, 'actor_loss': 2.963591049861357, 'time_step': 0.036518004588309054, 'td_error': 1.2707940130346238, 'init_value': -4.140602111816406, 'ave_value': -4.066947944472898} step=2422
2022-04-22 08:27.27 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:27.39 [info     ] CQL_20220422082559: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.0003433248211193636, 'time_algorithm_update': 0.034836793221490234, 'temp_loss': 4.105052038424277, 'temp': 0.8130112068156976, 'alpha_loss': -22.833869691529024, 'alpha': 1.31298716254317, 'critic_loss': 1156.2736650588195, 'actor_loss': 2.549430832697477, 'time_step': 0.03527194158190248, 'td_error': 1.2745738763690342, 'init_value': -4.107532978057861, 'ave_value': -4.047780661630427} step=2768
2022-04-22 08:27.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:27.52 [info     ] CQL_20220422082559: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.0003388499937994632, 'time_algorithm_update': 0.03483799840673546, 'temp_loss': 3.9973467588424683, 'temp': 0.7916315525253385, 'alpha_loss': -23.70649486056642, 'alpha': 1.36317877893503, 'critic_loss': 1405.0016938137871, 'actor_loss': 2.5588064035239246, 'time_step': 0.03526587086605888, 'td_error': 1.2779385894384152, 'init_value': -4.1439714431762695, 'ave_value': -4.102612484140156} step=3114
2022-04-22 08:27.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:28.05 [info     ] CQL_20220422082559: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.00034957400636176844, 'time_algorithm_update': 0.03482575223625051, 'temp_loss': 3.8923887896399965, 'temp': 0.7708840258204179, 'alpha_loss': -24.61538467517478, 'alpha': 1.4155059363800666, 'critic_loss': 1670.9437929715725, 'actor_loss': 2.7362589091923883, 'time_step': 0.0352645209758957, 'td_error': 1.282191086114722, 'init_value': -4.336658954620361, 'ave_value': -4.304148708445702} step=3460
2022-04-22 08:28.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:28.17 [info     ] CQL_20220422082559: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.0003501934812248098, 'time_algorithm_update': 0.0349127026651636, 'temp_loss': 3.791567977453243, 'temp': 0.7507362377781399, 'alpha_loss': -25.562939991151666, 'alpha': 1.4700239624591231, 'critic_loss': 1955.0481401564757, 'actor_loss': 2.9623230709505908, 'time_step': 0.03536037420261802, 'td_error': 1.2873893343621263, 'init_value': -4.6234450340271, 'ave_value': -4.596556599120949} step=3806
2022-04-22 08:28.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:28.30 [info     ] CQL_20220422082559: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.0003507040828638683, 'time_algorithm_update': 0.035210181522920644, 'temp_loss': 3.6929409118056986, 'temp': 0.7311561719186044, 'alpha_loss': -26.55203368898072, 'alpha': 1.5267845698863785, 'critic_loss': 2266.11741188358, 'actor_loss': 3.266020382070817, 'time_step': 0.035657467869664894, 'td_error': 1.2931237666707234, 'init_value': -4.95560359954834, 'ave_value': -4.932622779676167} step=4152
2022-04-22 08:28.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:28.42 [info     ] CQL_20220422082559: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.0003390160599195888, 'time_algorithm_update': 0.03451819915991987, 'temp_loss': 3.597603802047024, 'temp': 0.712117893847427, 'alpha_loss': -27.573988302594664, 'alpha': 1.5858518853352939, 'critic_loss': 2594.0039104836524, 'actor_loss': 3.5803993781867054, 'time_step': 0.034946890235636276, 'td_error': 1.297603312166541, 'init_value': -5.110770225524902, 'ave_value': -5.096286314163462} step=4498
2022-04-22 08:28.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:28.55 [info     ] CQL_20220422082559: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.0003585753413294092, 'time_algorithm_update': 0.034445953506954834, 'temp_loss': 3.503349306266432, 'temp': 0.6936045950547808, 'alpha_loss': -28.64659556350267, 'alpha': 1.6472976080255013, 'critic_loss': 2950.991285732027, 'actor_loss': 3.970450573573912, 'time_step': 0.03489401850397187, 'td_error': 1.3045140868225111, 'init_value': -5.521890163421631, 'ave_value': -5.508199516366929} step=4844
2022-04-22 08:28.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:29.07 [info     ] CQL_20220422082559: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.00034296512603759766, 'time_algorithm_update': 0.033721372571294705, 'temp_loss': 3.411964561208824, 'temp': 0.6755938969251049, 'alpha_loss': -29.754002510467707, 'alpha': 1.7112009353031312, 'critic_loss': 3326.0490129944906, 'actor_loss': 4.37914995926653, 'time_step': 0.0341516230147698, 'td_error': 1.3120119894247517, 'init_value': -5.956265926361084, 'ave_value': -5.942891076595764} step=5190
2022-04-22 08:29.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:29.20 [info     ] CQL_20220422082559: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.0003550693478887481, 'time_algorithm_update': 0.03548674845282053, 'temp_loss': 3.3243556291381746, 'temp': 0.6580640973383292, 'alpha_loss': -30.91011229829292, 'alpha': 1.777636133866503, 'critic_loss': 3729.9388646473085, 'actor_loss': 4.818101129090855, 'time_step': 0.035934560560766673, 'td_error': 1.3209429133599244, 'init_value': -6.466873645782471, 'ave_value': -6.451310701311455} step=5536
2022-04-22 08:29.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:29.32 [info     ] CQL_20220422082559: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.0003534314260317411, 'time_algorithm_update': 0.03402302444325706, 'temp_loss': 3.2376532933615536, 'temp': 0.6409996220831237, 'alpha_loss': -32.115146234545406, 'alpha': 1.8467033278046316, 'critic_loss': 4145.812120382497, 'actor_loss': 5.2871542969191, 'time_step': 0.034464123621152316, 'td_error': 1.3280354985853133, 'init_value': -6.7935662269592285, 'ave_value': -6.781590013517594} step=5882
2022-04-22 08:29.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:29.45 [info     ] CQL_20220422082559: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.00034942241073343793, 'time_algorithm_update': 0.0352422178136131, 'temp_loss': 3.154234168157412, 'temp': 0.624386840300753, 'alpha_loss': -33.35948079721087, 'alpha': 1.918487342451349, 'critic_loss': 4593.426787448067, 'actor_loss': 5.748131312386838, 'time_step': 0.03568567568167096, 'td_error': 1.3365306564705188, 'init_value': -7.209059238433838, 'ave_value': -7.199332821205251} step=6228
2022-04-22 08:29.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:29.57 [info     ] CQL_20220422082559: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.00034619135663688526, 'time_algorithm_update': 0.034811908110028746, 'temp_loss': 3.072090705006109, 'temp': 0.6082118301722356, 'alpha_loss': -34.65865276314619, 'alpha': 1.9930888300686214, 'critic_loss': 5040.294861192649, 'actor_loss': 6.199478474655592, 'time_step': 0.035248553132735234, 'td_error': 1.3450936382896204, 'init_value': -7.596729278564453, 'ave_value': -7.589807321721199} step=6574
2022-04-22 08:29.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:30.10 [info     ] CQL_20220422082559: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.000355290539691903, 'time_algorithm_update': 0.03448159984081467, 'temp_loss': 2.992628998150026, 'temp': 0.592460049197853, 'alpha_loss': -36.005016238703206, 'alpha': 2.070608376767594, 'critic_loss': 5507.589646179552, 'actor_loss': 6.694320940557932, 'time_step': 0.03493075150285842, 'td_error': 1.3565454269920298, 'init_value': -8.15600299835205, 'ave_value': -8.147600014959256} step=6920
2022-04-22 08:30.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:30.23 [info     ] CQL_20220422082559: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.0003416186812296079, 'time_algorithm_update': 0.0358121705193051, 'temp_loss': 2.9144306286221986, 'temp': 0.5771221444441405, 'alpha_loss': -37.4062756003672, 'alpha': 2.1511616679285304, 'critic_loss': 5980.920026305094, 'actor_loss': 7.143567012224583, 'time_step': 0.03624866325731222, 'td_error': 1.3659648456056903, 'init_value': -8.557144165039062, 'ave_value': -8.550580489301975} step=7266
2022-04-22 08:30.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:30.35 [info     ] CQL_20220422082559: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.000344847668113047, 'time_algorithm_update': 0.034260809766074825, 'temp_loss': 2.839453756464699, 'temp': 0.5621835897768164, 'alpha_loss': -38.864339751315256, 'alpha': 2.2348680013866096, 'critic_loss': 6470.09078644328, 'actor_loss': 7.67300863486494, 'time_step': 0.034698980392059146, 'td_error': 1.3773309555133304, 'init_value': -9.041104316711426, 'ave_value': -9.034860634114178} step=7612
2022-04-22 08:30.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:30.47 [info     ] CQL_20220422082559: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.00034338201401550646, 'time_algorithm_update': 0.03433816143543045, 'temp_loss': 2.766453031859646, 'temp': 0.5476326194801772, 'alpha_loss': -40.374212628844155, 'alpha': 2.321833209495324, 'critic_loss': 7032.365870834086, 'actor_loss': 8.203427321649011, 'time_step': 0.03477745180185131, 'td_error': 1.386373156018323, 'init_value': -9.391573905944824, 'ave_value': -9.388794923676535} step=7958
2022-04-22 08:30.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:31.00 [info     ] CQL_20220422082559: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.00034179990691256664, 'time_algorithm_update': 0.03472416249313796, 'temp_loss': 2.6943357790136613, 'temp': 0.5334575744722619, 'alpha_loss': -41.947791347613915, 'alpha': 2.41219395984804, 'critic_loss': 7489.393063583815, 'actor_loss': 8.639896469998222, 'time_step': 0.035158153903277624, 'td_error': 1.3992571227322643, 'init_value': -9.918074607849121, 'ave_value': -9.913177485169816} step=8304
2022-04-22 08:31.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:31.12 [info     ] CQL_20220422082559: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.00034701755281128636, 'time_algorithm_update': 0.03485428322257334, 'temp_loss': 2.6245618162816657, 'temp': 0.5196538503803959, 'alpha_loss': -43.57974036994008, 'alpha': 2.5060709887157286, 'critic_loss': 8008.970667844563, 'actor_loss': 9.156995310259692, 'time_step': 0.035293360666043494, 'td_error': 1.4108447023550517, 'init_value': -10.355223655700684, 'ave_value': -10.352275339400615} step=8650
2022-04-22 08:31.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:31.25 [info     ] CQL_20220422082559: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.0003435866681137526, 'time_algorithm_update': 0.03548755190965068, 'temp_loss': 2.5566317718153053, 'temp': 0.5062064245946145, 'alpha_loss': -45.27321072396516, 'alpha': 2.60361278470541, 'critic_loss': 8564.892772873012, 'actor_loss': 9.658512796280702, 'time_step': 0.03592292697443438, 'td_error': 1.4264498554236869, 'init_value': -10.951377868652344, 'ave_value': -10.945137172968481} step=8996
2022-04-22 08:31.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:31.38 [info     ] CQL_20220422082559: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.0003481448730292348, 'time_algorithm_update': 0.034678467436332926, 'temp_loss': 2.490678470947839, 'temp': 0.4931073721089115, 'alpha_loss': -47.03209458058969, 'alpha': 2.70493984153505, 'critic_loss': 9042.143489771495, 'actor_loss': 10.11528641364478, 'time_step': 0.035121037780894024, 'td_error': 1.4356653404928545, 'init_value': -11.257007598876953, 'ave_value': -11.254807009455037} step=9342
2022-04-22 08:31.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:31.50 [info     ] CQL_20220422082559: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.00033706254352723934, 'time_algorithm_update': 0.035074316697313604, 'temp_loss': 2.4266195889842304, 'temp': 0.48034585719508244, 'alpha_loss': -48.874556910784946, 'alpha': 2.8102337797253116, 'critic_loss': 8793.583651327674, 'actor_loss': 10.379763892620286, 'time_step': 0.0355009129970749, 'td_error': 1.4390163089468009, 'init_value': -11.328691482543945, 'ave_value': -11.333687511605072} step=9688
2022-04-22 08:31.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:32.03 [info     ] CQL_20220422082559: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.0003520525948849717, 'time_algorithm_update': 0.03523553037919061, 'temp_loss': 2.3629744500783136, 'temp': 0.46791709752785676, 'alpha_loss': -50.77532182814758, 'alpha': 2.919623840061915, 'critic_loss': 8215.534085135929, 'actor_loss': 10.7650352571741, 'time_step': 0.03568168664943276, 'td_error': 1.4564882175164386, 'init_value': -11.972757339477539, 'ave_value': -11.971677717748781} step=10034
2022-04-22 08:32.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:32.16 [info     ] CQL_20220422082559: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.0003443329320477612, 'time_algorithm_update': 0.034790050776707646, 'temp_loss': 2.3020827928719494, 'temp': 0.4558111834043712, 'alpha_loss': -52.74256591576372, 'alpha': 3.033261138579749, 'critic_loss': 7775.785297371748, 'actor_loss': 11.210946168513656, 'time_step': 0.03522806222728222, 'td_error': 1.4666854764249553, 'init_value': -12.302901268005371, 'ave_value': -12.303349157688798} step=10380
2022-04-22 08:32.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:32.28 [info     ] CQL_20220422082559: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.00033841105554834267, 'time_algorithm_update': 0.03528752216713966, 'temp_loss': 2.2422594652010526, 'temp': 0.44401871830741796, 'alpha_loss': -54.797069406233774, 'alpha': 3.1513214049311733, 'critic_loss': 7017.9958072728505, 'actor_loss': 11.477780692150137, 'time_step': 0.03572230531990184, 'td_error': 1.4719951026540217, 'init_value': -12.450427055358887, 'ave_value': -12.454914779084278} step=10726
2022-04-22 08:32.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:32.41 [info     ] CQL_20220422082559: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.00034171101674868194, 'time_algorithm_update': 0.03562517524454635, 'temp_loss': 2.1854218857825836, 'temp': 0.4325285393723174, 'alpha_loss': -56.93383717399112, 'alpha': 3.273994312121, 'critic_loss': 6059.122894463511, 'actor_loss': 11.856401575783085, 'time_step': 0.03605637040441436, 'td_error': 1.4870270715050569, 'init_value': -12.960955619812012, 'ave_value': -12.961754700311161} step=11072
2022-04-22 08:32.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:32.53 [info     ] CQL_20220422082559: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.00034759981783828297, 'time_algorithm_update': 0.034000671667859736, 'temp_loss': 2.128438387992065, 'temp': 0.42133483135631317, 'alpha_loss': -59.155006937897966, 'alpha': 3.4014475014857473, 'critic_loss': 5331.005378149837, 'actor_loss': 12.243448552368694, 'time_step': 0.034439456945209834, 'td_error': 1.5000772550929578, 'init_value': -13.377293586730957, 'ave_value': -13.37695267778146} step=11418
2022-04-22 08:32.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:33.06 [info     ] CQL_20220422082559: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.00034483664297644114, 'time_algorithm_update': 0.034939342151487494, 'temp_loss': 2.073089851809375, 'temp': 0.41043205438666264, 'alpha_loss': -61.45498351830278, 'alpha': 3.5338670559701204, 'critic_loss': 5112.466949286489, 'actor_loss': 12.886688450168323, 'time_step': 0.03537389997802029, 'td_error': 1.5191685028753334, 'init_value': -13.974254608154297, 'ave_value': -13.971603438050446} step=11764
2022-04-22 08:33.06 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:33.18 [info     ] CQL_20220422082559: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.00034752953259242065, 'time_algorithm_update': 0.034491519707475785, 'temp_loss': 2.0194959733527518, 'temp': 0.3998142229339291, 'alpha_loss': -63.83905737110645, 'alpha': 3.671417048900803, 'critic_loss': 4932.617200200957, 'actor_loss': 13.448452365191686, 'time_step': 0.03493092445968893, 'td_error': 1.5415220248375032, 'init_value': -14.662294387817383, 'ave_value': -14.654711868029288} step=12110
2022-04-22 08:33.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:33.31 [info     ] CQL_20220422082559: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.0003502582539023692, 'time_algorithm_update': 0.03425720868083094, 'temp_loss': 1.9673255654428736, 'temp': 0.389468762571412, 'alpha_loss': -66.3302627519376, 'alpha': 3.8143234197804006, 'critic_loss': 5070.093051447345, 'actor_loss': 14.124375004299804, 'time_step': 0.034692383225942626, 'td_error': 1.5597236034953255, 'init_value': -15.147234916687012, 'ave_value': -15.143942839910544} step=12456
2022-04-22 08:33.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:33.43 [info     ] CQL_20220422082559: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.0003490131025369457, 'time_algorithm_update': 0.03517706132348562, 'temp_loss': 1.9166289512821704, 'temp': 0.37939132741420944, 'alpha_loss': -68.90498173443568, 'alpha': 3.9628043471044196, 'critic_loss': 4976.464619366419, 'actor_loss': 14.60745948725353, 'time_step': 0.03561818392979616, 'td_error': 1.5761767081193, 'init_value': -15.578279495239258, 'ave_value': -15.577943192685934} step=12802
2022-04-22 08:33.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:33.56 [info     ] CQL_20220422082559: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.00034150085008213284, 'time_algorithm_update': 0.03455884401508839, 'temp_loss': 1.866405526337596, 'temp': 0.369574763699074, 'alpha_loss': -71.59553033905911, 'alpha': 4.11706064891264, 'critic_loss': 4655.745326047688, 'actor_loss': 15.140098919069146, 'time_step': 0.03498825034654209, 'td_error': 1.5970978385096848, 'init_value': -16.150243759155273, 'ave_value': -16.148246631016377} step=13148
2022-04-22 08:33.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:34.08 [info     ] CQL_20220422082559: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.0003520477713877066, 'time_algorithm_update': 0.034795779024245424, 'temp_loss': 1.818556557845518, 'temp': 0.36001301518065393, 'alpha_loss': -74.37860451406137, 'alpha': 4.277335639633884, 'critic_loss': 4777.662091029173, 'actor_loss': 15.854247644457514, 'time_step': 0.03523646338137588, 'td_error': 1.6212142635647802, 'init_value': -16.779672622680664, 'ave_value': -16.776977996410153} step=13494
2022-04-22 08:34.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:34.21 [info     ] CQL_20220422082559: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.0003436831380590538, 'time_algorithm_update': 0.03396681691869835, 'temp_loss': 1.7713396625022668, 'temp': 0.3506993206939256, 'alpha_loss': -77.26998074481942, 'alpha': 4.443846352527596, 'critic_loss': 4915.093078260476, 'actor_loss': 16.42605631613318, 'time_step': 0.0344034557397655, 'td_error': 1.6438233254642935, 'init_value': -17.34343910217285, 'ave_value': -17.342148976622173} step=13840
2022-04-22 08:34.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:34.33 [info     ] CQL_20220422082559: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.0003508811741205998, 'time_algorithm_update': 0.03485686861710741, 'temp_loss': 1.7252450147805187, 'temp': 0.34162500078623004, 'alpha_loss': -80.27755047131136, 'alpha': 4.616816697093103, 'critic_loss': 5042.542919357388, 'actor_loss': 17.01561990638689, 'time_step': 0.035293877469321895, 'td_error': 1.664018603595849, 'init_value': -17.823139190673828, 'ave_value': -17.82459333183755} step=14186
2022-04-22 08:34.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:34.46 [info     ] CQL_20220422082559: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.0003516460429726308, 'time_algorithm_update': 0.03458939535769424, 'temp_loss': 1.6807532444854691, 'temp': 0.33278591836118976, 'alpha_loss': -83.4057743976571, 'alpha': 4.7965523163018196, 'critic_loss': 4855.693342440391, 'actor_loss': 17.44608416584875, 'time_step': 0.03503548754432987, 'td_error': 1.6822369453561552, 'init_value': -18.260210037231445, 'ave_value': -18.261300136987607} step=14532
2022-04-22 08:34.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:34.58 [info     ] CQL_20220422082559: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.00034299544516326375, 'time_algorithm_update': 0.03439177047310537, 'temp_loss': 1.6369724311580547, 'temp': 0.3241769233880015, 'alpha_loss': -86.64571411485616, 'alpha': 4.983256785166746, 'critic_loss': 4846.928406114524, 'actor_loss': 18.048484956598006, 'time_step': 0.03482348794882008, 'td_error': 1.7035755358780036, 'init_value': -18.74765968322754, 'ave_value': -18.751161990430244} step=14878
2022-04-22 08:34.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:35.10 [info     ] CQL_20220422082559: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.00033695918287155943, 'time_algorithm_update': 0.03407777527164173, 'temp_loss': 1.5949975865424713, 'temp': 0.31578945398675223, 'alpha_loss': -90.02283559369214, 'alpha': 5.177249033327048, 'critic_loss': 4730.766600151283, 'actor_loss': 18.51692203565829, 'time_step': 0.03450557124407994, 'td_error': 1.732422908686377, 'init_value': -19.460159301757812, 'ave_value': -19.455011199943936} step=15224
2022-04-22 08:35.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:35.23 [info     ] CQL_20220422082559: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.0003399146085529658, 'time_algorithm_update': 0.035534909005799045, 'temp_loss': 1.5531473290713536, 'temp': 0.3076197023508866, 'alpha_loss': -93.53016631727274, 'alpha': 5.378784170040506, 'critic_loss': 4981.144843129065, 'actor_loss': 19.119888785257505, 'time_step': 0.035962772507198974, 'td_error': 1.7541932380333398, 'init_value': -19.930456161499023, 'ave_value': -19.92729947735428} step=15570
2022-04-22 08:35.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:35.36 [info     ] CQL_20220422082559: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.00034237665937126026, 'time_algorithm_update': 0.03512568901039961, 'temp_loss': 1.513416005352329, 'temp': 0.29966161395773033, 'alpha_loss': -97.18215274259533, 'alpha': 5.58817237098782, 'critic_loss': 4973.764270231214, 'actor_loss': 19.55637182665698, 'time_step': 0.03555482453693544, 'td_error': 1.7747245734716415, 'init_value': -20.36862564086914, 'ave_value': -20.36712773613819} step=15916
2022-04-22 08:35.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:35.48 [info     ] CQL_20220422082559: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.0003421678708467869, 'time_algorithm_update': 0.03489315303074831, 'temp_loss': 1.474711417118249, 'temp': 0.2919063394986136, 'alpha_loss': -100.95771983593185, 'alpha': 5.8057215324027, 'critic_loss': 4944.085539536669, 'actor_loss': 20.017829316199858, 'time_step': 0.035326239001544225, 'td_error': 1.7987355524083708, 'init_value': -20.901342391967773, 'ave_value': -20.896456047029165} step=16262
2022-04-22 08:35.48 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:36.01 [info     ] CQL_20220422082559: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.0003365078413417574, 'time_algorithm_update': 0.03460933914074319, 'temp_loss': 1.4360982789469592, 'temp': 0.2843532382236051, 'alpha_loss': -104.89494286245004, 'alpha': 6.031731372623774, 'critic_loss': 5226.24608810513, 'actor_loss': 20.54528529106537, 'time_step': 0.035028964797885434, 'td_error': 1.8194859767779104, 'init_value': -21.317367553710938, 'ave_value': -21.315542751943518} step=16608
2022-04-22 08:36.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:36.13 [info     ] CQL_20220422082559: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.0003450309610091193, 'time_algorithm_update': 0.03422540460707824, 'temp_loss': 1.3991095878485311, 'temp': 0.27699523716303653, 'alpha_loss': -108.96022437211406, 'alpha': 6.266530790769985, 'critic_loss': 5298.837146913385, 'actor_loss': 20.96357860454934, 'time_step': 0.03465692500847613, 'td_error': 1.8326897122689845, 'init_value': -21.569047927856445, 'ave_value': -21.571026884723807} step=16954
2022-04-22 08:36.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:36.26 [info     ] CQL_20220422082559: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.00034364868450716053, 'time_algorithm_update': 0.035424947049576425, 'temp_loss': 1.3626405751774078, 'temp': 0.26982947219313913, 'alpha_loss': -113.22291273877799, 'alpha': 6.510467708455345, 'critic_loss': 4957.027274600343, 'actor_loss': 21.233057727703468, 'time_step': 0.03585176798649606, 'td_error': 1.8475780036283813, 'init_value': -21.8730525970459, 'ave_value': -21.87523753237193} step=17300
2022-04-22 08:36.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422082559/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.51

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 08:36.27 [info     ] FQE_20220422083626: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00012241501406014683, 'time_algorithm_update': 0.002306929553847715, 'loss': 0.0072785033311425565, 'time_step': 0.002486260540514107, 'init_value': -0.049871765077114105, 'ave_value': 0.00444215748895396, 'soft_opc': nan} step=166




2022-04-22 08:36.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.27 [info     ] FQE_20220422083626: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.00012415144816938653, 'time_algorithm_update': 0.0022293027625026472, 'loss': 0.0044051011244441015, 'time_step': 0.0024105396615453512, 'init_value': -0.13627341389656067, 'ave_value': -0.06427316931915378, 'soft_opc': nan} step=332




2022-04-22 08:36.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.27 [info     ] FQE_20220422083626: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.0001210003014070442, 'time_algorithm_update': 0.002286457153687994, 'loss': 0.003584612415491951, 'time_step': 0.0024660265589334877, 'init_value': -0.14749440550804138, 'ave_value': -0.0679813914974262, 'soft_opc': nan} step=498




2022-04-22 08:36.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.28 [info     ] FQE_20220422083626: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00013556106981024686, 'time_algorithm_update': 0.0025899970387837975, 'loss': 0.003157973541255412, 'time_step': 0.0027928452893912076, 'init_value': -0.19256749749183655, 'ave_value': -0.09859896636492498, 'soft_opc': nan} step=664




2022-04-22 08:36.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.28 [info     ] FQE_20220422083626: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00012214499783803182, 'time_algorithm_update': 0.0021087778619973056, 'loss': 0.0027565847190254063, 'time_step': 0.0022827889545854315, 'init_value': -0.26166918873786926, 'ave_value': -0.16085973792239552, 'soft_opc': nan} step=830




2022-04-22 08:36.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.29 [info     ] FQE_20220422083626: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00013369824512895332, 'time_algorithm_update': 0.0020153192152459936, 'loss': 0.0024448477470944354, 'time_step': 0.002199737422437553, 'init_value': -0.27926793694496155, 'ave_value': -0.17009308336445272, 'soft_opc': nan} step=996




2022-04-22 08:36.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.29 [info     ] FQE_20220422083626: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00013174637254462186, 'time_algorithm_update': 0.002462178827768349, 'loss': 0.002212210991055177, 'time_step': 0.002653837203979492, 'init_value': -0.3340771794319153, 'ave_value': -0.20777642294378565, 'soft_opc': nan} step=1162




2022-04-22 08:36.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.30 [info     ] FQE_20220422083626: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00012182471263839538, 'time_algorithm_update': 0.002236696610967797, 'loss': 0.0019193581888923444, 'time_step': 0.002410097294543163, 'init_value': -0.3896595537662506, 'ave_value': -0.24935056196760622, 'soft_opc': nan} step=1328




2022-04-22 08:36.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.30 [info     ] FQE_20220422083626: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.0001293837306011154, 'time_algorithm_update': 0.002488689250256642, 'loss': 0.0016585253438553267, 'time_step': 0.002682209014892578, 'init_value': -0.42773187160491943, 'ave_value': -0.27750552470317563, 'soft_opc': nan} step=1494




2022-04-22 08:36.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.31 [info     ] FQE_20220422083626: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.00011843371104045087, 'time_algorithm_update': 0.002104802304003612, 'loss': 0.0017359337470696185, 'time_step': 0.002276233879916639, 'init_value': -0.5053868889808655, 'ave_value': -0.3266611482192938, 'soft_opc': nan} step=1660




2022-04-22 08:36.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.31 [info     ] FQE_20220422083626: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.00011776441551116575, 'time_algorithm_update': 0.002047278794897608, 'loss': 0.001448972658071869, 'time_step': 0.0022175283317106315, 'init_value': -0.5749137997627258, 'ave_value': -0.3745346275284081, 'soft_opc': nan} step=1826




2022-04-22 08:36.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.32 [info     ] FQE_20220422083626: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.0001312321927174028, 'time_algorithm_update': 0.002585365111569324, 'loss': 0.0014019328145329919, 'time_step': 0.002778017377278891, 'init_value': -0.6033323407173157, 'ave_value': -0.3851121839765157, 'soft_opc': nan} step=1992




2022-04-22 08:36.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.32 [info     ] FQE_20220422083626: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00011745992913303605, 'time_algorithm_update': 0.002174107425184135, 'loss': 0.0014421639592998314, 'time_step': 0.002343832728374435, 'init_value': -0.715097188949585, 'ave_value': -0.46834108946794595, 'soft_opc': nan} step=2158




2022-04-22 08:36.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.33 [info     ] FQE_20220422083626: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00013886589601815464, 'time_algorithm_update': 0.002900590379554105, 'loss': 0.0014825397362534225, 'time_step': 0.0031045072049979703, 'init_value': -0.7566258907318115, 'ave_value': -0.4836762945545284, 'soft_opc': nan} step=2324




2022-04-22 08:36.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.33 [info     ] FQE_20220422083626: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00012953884630318148, 'time_algorithm_update': 0.0024623267621879117, 'loss': 0.0014688471061842382, 'time_step': 0.0026511097528848305, 'init_value': -0.8161072731018066, 'ave_value': -0.5183439105732349, 'soft_opc': nan} step=2490




2022-04-22 08:36.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.34 [info     ] FQE_20220422083626: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00012146133974374058, 'time_algorithm_update': 0.002309072448546628, 'loss': 0.0015642663035457618, 'time_step': 0.002491586179618376, 'init_value': -0.9016130566596985, 'ave_value': -0.5764748521149159, 'soft_opc': nan} step=2656




2022-04-22 08:36.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.34 [info     ] FQE_20220422083626: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00012668356837996518, 'time_algorithm_update': 0.0025289949164333113, 'loss': 0.0016430826738114883, 'time_step': 0.0027179287140627942, 'init_value': -1.011704683303833, 'ave_value': -0.6654365705108052, 'soft_opc': nan} step=2822




2022-04-22 08:36.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.35 [info     ] FQE_20220422083626: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00012673958238348904, 'time_algorithm_update': 0.0023582513073840774, 'loss': 0.0017583381738064878, 'time_step': 0.0025453826031052924, 'init_value': -1.0636235475540161, 'ave_value': -0.7001690085406775, 'soft_opc': nan} step=2988




2022-04-22 08:36.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.35 [info     ] FQE_20220422083626: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00013135571077645543, 'time_algorithm_update': 0.002443220241960273, 'loss': 0.0019971344415561283, 'time_step': 0.002636607870998153, 'init_value': -1.1096000671386719, 'ave_value': -0.7302369410188886, 'soft_opc': nan} step=3154




2022-04-22 08:36.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.35 [info     ] FQE_20220422083626: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00012085811201348363, 'time_algorithm_update': 0.0021287992776158346, 'loss': 0.0019744861873108, 'time_step': 0.0023045123341571853, 'init_value': -1.137556791305542, 'ave_value': -0.7279494208829091, 'soft_opc': nan} step=3320




2022-04-22 08:36.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.36 [info     ] FQE_20220422083626: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00011636406542306922, 'time_algorithm_update': 0.002057138695774308, 'loss': 0.002008951310116057, 'time_step': 0.0022296546453452974, 'init_value': -1.1951889991760254, 'ave_value': -0.7661233281196507, 'soft_opc': nan} step=3486




2022-04-22 08:36.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.36 [info     ] FQE_20220422083626: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.00011650194604712797, 'time_algorithm_update': 0.0020738251237984165, 'loss': 0.002159487400505501, 'time_step': 0.0022425263761037804, 'init_value': -1.258373737335205, 'ave_value': -0.815675810786585, 'soft_opc': nan} step=3652




2022-04-22 08:36.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.37 [info     ] FQE_20220422083626: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00013444653476577206, 'time_algorithm_update': 0.0025266294019768037, 'loss': 0.0022192763757936836, 'time_step': 0.0027217089411724045, 'init_value': -1.3069705963134766, 'ave_value': -0.8281606532566183, 'soft_opc': nan} step=3818




2022-04-22 08:36.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.37 [info     ] FQE_20220422083626: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.0001443021268729704, 'time_algorithm_update': 0.0023953383227428757, 'loss': 0.0025198133167928555, 'time_step': 0.0026057312287479997, 'init_value': -1.3437429666519165, 'ave_value': -0.8542856850283774, 'soft_opc': nan} step=3984




2022-04-22 08:36.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.38 [info     ] FQE_20220422083626: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.00012807530092905802, 'time_algorithm_update': 0.002496817025793604, 'loss': 0.00265571013119644, 'time_step': 0.002685396068067436, 'init_value': -1.4197694063186646, 'ave_value': -0.9139698490701818, 'soft_opc': nan} step=4150




2022-04-22 08:36.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.38 [info     ] FQE_20220422083626: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.0001198843301060688, 'time_algorithm_update': 0.0021442232361759046, 'loss': 0.002583979848310834, 'time_step': 0.0023224569228758297, 'init_value': -1.4869720935821533, 'ave_value': -0.9630888644136019, 'soft_opc': nan} step=4316




2022-04-22 08:36.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.39 [info     ] FQE_20220422083626: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00012662755437644132, 'time_algorithm_update': 0.0022779128637658544, 'loss': 0.0026880608256541596, 'time_step': 0.0024678146982767494, 'init_value': -1.4874430894851685, 'ave_value': -0.966635270656699, 'soft_opc': nan} step=4482




2022-04-22 08:36.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.39 [info     ] FQE_20220422083626: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.00012663042688944252, 'time_algorithm_update': 0.0023261552833649048, 'loss': 0.002954125431182525, 'time_step': 0.002507751246532762, 'init_value': -1.579486608505249, 'ave_value': -1.020063629028228, 'soft_opc': nan} step=4648




2022-04-22 08:36.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.40 [info     ] FQE_20220422083626: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00012840563992419876, 'time_algorithm_update': 0.0024782706456012034, 'loss': 0.0031122097332473755, 'time_step': 0.0026666873908904663, 'init_value': -1.6566295623779297, 'ave_value': -1.0842033584191053, 'soft_opc': nan} step=4814




2022-04-22 08:36.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.40 [info     ] FQE_20220422083626: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.0001264092433883483, 'time_algorithm_update': 0.0022574663162231445, 'loss': 0.0033209528353244783, 'time_step': 0.00244347302310438, 'init_value': -1.7126078605651855, 'ave_value': -1.1253904190954862, 'soft_opc': nan} step=4980




2022-04-22 08:36.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.41 [info     ] FQE_20220422083626: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00012174715478736234, 'time_algorithm_update': 0.002113778907132436, 'loss': 0.0034799267953761056, 'time_step': 0.0022911134972629778, 'init_value': -1.7900402545928955, 'ave_value': -1.1750699515211152, 'soft_opc': nan} step=5146




2022-04-22 08:36.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.41 [info     ] FQE_20220422083626: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00012323511652199618, 'time_algorithm_update': 0.0022733441318374082, 'loss': 0.0037854243412120156, 'time_step': 0.002454111375004412, 'init_value': -1.8492580652236938, 'ave_value': -1.2029404991818173, 'soft_opc': nan} step=5312




2022-04-22 08:36.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.41 [info     ] FQE_20220422083626: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00012978013739528427, 'time_algorithm_update': 0.0023247362619423003, 'loss': 0.00369742642074728, 'time_step': 0.0025170696787087314, 'init_value': -1.8531677722930908, 'ave_value': -1.2028901698848082, 'soft_opc': nan} step=5478




2022-04-22 08:36.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.42 [info     ] FQE_20220422083626: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.00013525658343211715, 'time_algorithm_update': 0.002546494265636766, 'loss': 0.004116685316071801, 'time_step': 0.0027431378881615327, 'init_value': -1.9351617097854614, 'ave_value': -1.2618743966023127, 'soft_opc': nan} step=5644




2022-04-22 08:36.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.42 [info     ] FQE_20220422083626: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00013003579105239316, 'time_algorithm_update': 0.0024057368198073053, 'loss': 0.0044771274260710925, 'time_step': 0.0025934483631547675, 'init_value': -2.03409481048584, 'ave_value': -1.312943636568951, 'soft_opc': nan} step=5810




2022-04-22 08:36.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.43 [info     ] FQE_20220422083626: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00012602719915918558, 'time_algorithm_update': 0.002349112407270684, 'loss': 0.004743839915475741, 'time_step': 0.0025324749659342938, 'init_value': -2.0611720085144043, 'ave_value': -1.3381826545741107, 'soft_opc': nan} step=5976




2022-04-22 08:36.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.43 [info     ] FQE_20220422083626: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00011749727180205195, 'time_algorithm_update': 0.002067897693220391, 'loss': 0.004897369652690579, 'time_step': 0.002238786364176187, 'init_value': -2.1548125743865967, 'ave_value': -1.4079414455304007, 'soft_opc': nan} step=6142




2022-04-22 08:36.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.44 [info     ] FQE_20220422083626: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.0001307682818677052, 'time_algorithm_update': 0.0025205324931317067, 'loss': 0.004682994481310787, 'time_step': 0.002718343792191471, 'init_value': -2.179715156555176, 'ave_value': -1.4176271379799457, 'soft_opc': nan} step=6308




2022-04-22 08:36.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.44 [info     ] FQE_20220422083626: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.00013287957892360458, 'time_algorithm_update': 0.002653282808970256, 'loss': 0.005351054351370086, 'time_step': 0.002849703811737428, 'init_value': -2.25722599029541, 'ave_value': -1.4768191842412626, 'soft_opc': nan} step=6474




2022-04-22 08:36.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.45 [info     ] FQE_20220422083626: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.0001274016966302711, 'time_algorithm_update': 0.0024156857685870433, 'loss': 0.005606642907242717, 'time_step': 0.0026077606591833643, 'init_value': -2.280453681945801, 'ave_value': -1.4854461318032968, 'soft_opc': nan} step=6640




2022-04-22 08:36.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.45 [info     ] FQE_20220422083626: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.00012783257358045462, 'time_algorithm_update': 0.0023888306445386037, 'loss': 0.005660306066218539, 'time_step': 0.0025778448725321205, 'init_value': -2.269080638885498, 'ave_value': -1.4721734182086883, 'soft_opc': nan} step=6806




2022-04-22 08:36.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.46 [info     ] FQE_20220422083626: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00013163721705057533, 'time_algorithm_update': 0.0023550154214881987, 'loss': 0.00568453744821761, 'time_step': 0.0025482393172850093, 'init_value': -2.291252851486206, 'ave_value': -1.4733068513658805, 'soft_opc': nan} step=6972




2022-04-22 08:36.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.46 [info     ] FQE_20220422083626: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00012034393218626459, 'time_algorithm_update': 0.002122828759342791, 'loss': 0.005901847093601723, 'time_step': 0.002299116318484387, 'init_value': -2.3448832035064697, 'ave_value': -1.503316623302227, 'soft_opc': nan} step=7138




2022-04-22 08:36.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.47 [info     ] FQE_20220422083626: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.0001340372016630977, 'time_algorithm_update': 0.0026057183024394944, 'loss': 0.006124345516801408, 'time_step': 0.00280451918222818, 'init_value': -2.334787368774414, 'ave_value': -1.4972904074643445, 'soft_opc': nan} step=7304




2022-04-22 08:36.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.47 [info     ] FQE_20220422083626: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.00012455216373305722, 'time_algorithm_update': 0.002317046544638025, 'loss': 0.006162607139263986, 'time_step': 0.0024988277848944605, 'init_value': -2.3531196117401123, 'ave_value': -1.5063789740708229, 'soft_opc': nan} step=7470




2022-04-22 08:36.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.48 [info     ] FQE_20220422083626: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.0001187654862920922, 'time_algorithm_update': 0.0021148532269948937, 'loss': 0.005955831009048845, 'time_step': 0.002290946891508907, 'init_value': -2.3728270530700684, 'ave_value': -1.5085579345451652, 'soft_opc': nan} step=7636




2022-04-22 08:36.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.48 [info     ] FQE_20220422083626: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00011581254292683429, 'time_algorithm_update': 0.0019234145980283439, 'loss': 0.006244821514456017, 'time_step': 0.002093328050820224, 'init_value': -2.4197049140930176, 'ave_value': -1.5428464209006445, 'soft_opc': nan} step=7802




2022-04-22 08:36.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.48 [info     ] FQE_20220422083626: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.00012631732297230917, 'time_algorithm_update': 0.0023415490805384622, 'loss': 0.0066327897464317906, 'time_step': 0.0025306078324834986, 'init_value': -2.474057674407959, 'ave_value': -1.5719445235297285, 'soft_opc': nan} step=7968




2022-04-22 08:36.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.49 [info     ] FQE_20220422083626: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.0001283869685896908, 'time_algorithm_update': 0.002445051468998553, 'loss': 0.006628049155507876, 'time_step': 0.002630284033625959, 'init_value': -2.505967140197754, 'ave_value': -1.5856921909708395, 'soft_opc': nan} step=8134




2022-04-22 08:36.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:36.49 [info     ] FQE_20220422083626: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00013050544692809322, 'time_algorithm_update': 0.0023510728973940194, 'loss': 0.006943563851443714, 'time_step': 0.0025448899671255826, 'init_value': -2.5188815593719482, 'ave_value': -1.5978040715415232, 'soft_opc': nan} step=8300




2022-04-22 08:36.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083626/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-22 08:36.50 [debug    ] RoundIterator is selected.
2022-04-22 08:36.50 [info     ] Directory is created at d3rlpy_logs/FQE_20220422083650
2022-04-22 08:36.50 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 08:36.50 [debug    ] Building models...
2022-04-22 08:36.50 [debug    ] Models have been built.
2022-04-22 08:36.50 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422083650/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}},

Epoch 1/50:   0%|          | 0/355 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 08:36.51 [info     ] FQE_20220422083650: epoch=1 step=355 epoch=1 metrics={'time_sample_batch': 0.00012316300835407955, 'time_algorithm_update': 0.0021212812880395163, 'loss': 0.024461703151989152, 'time_step': 0.002302484780969754, 'init_value': -1.1023199558258057, 'ave_value': -1.1047763284645793, 'soft_opc': nan} step=355




2022-04-22 08:36.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_355.pt


Epoch 2/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:36.52 [info     ] FQE_20220422083650: epoch=2 step=710 epoch=2 metrics={'time_sample_batch': 0.0001251845292642083, 'time_algorithm_update': 0.0021960003275266833, 'loss': 0.02334358909390342, 'time_step': 0.0023777552053961953, 'init_value': -2.0323843955993652, 'ave_value': -2.0817207954258223, 'soft_opc': nan} step=710




2022-04-22 08:36.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_710.pt


Epoch 3/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:36.52 [info     ] FQE_20220422083650: epoch=3 step=1065 epoch=3 metrics={'time_sample_batch': 0.00012682189404124945, 'time_algorithm_update': 0.0022272486082265076, 'loss': 0.02500243223154209, 'time_step': 0.002410577720319721, 'init_value': -2.315819501876831, 'ave_value': -2.43723545006957, 'soft_opc': nan} step=1065




2022-04-22 08:36.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_1065.pt


Epoch 4/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:36.54 [info     ] FQE_20220422083650: epoch=4 step=1420 epoch=4 metrics={'time_sample_batch': 0.0001325714756065691, 'time_algorithm_update': 0.002522749296376403, 'loss': 0.03141150486091493, 'time_step': 0.0027164358488270934, 'init_value': -3.003926992416382, 'ave_value': -3.2457296476707804, 'soft_opc': nan} step=1420




2022-04-22 08:36.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_1420.pt


Epoch 5/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:36.54 [info     ] FQE_20220422083650: epoch=5 step=1775 epoch=5 metrics={'time_sample_batch': 0.00012658146065725408, 'time_algorithm_update': 0.0022731109404228104, 'loss': 0.0395998555432323, 'time_step': 0.0024585146299550232, 'init_value': -3.2976796627044678, 'ave_value': -3.6833214831474974, 'soft_opc': nan} step=1775




2022-04-22 08:36.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_1775.pt


Epoch 6/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:36.55 [info     ] FQE_20220422083650: epoch=6 step=2130 epoch=6 metrics={'time_sample_batch': 0.00012867887255171655, 'time_algorithm_update': 0.002243627628809969, 'loss': 0.04922735254267152, 'time_step': 0.002431209322432397, 'init_value': -3.84875750541687, 'ave_value': -4.380429188226334, 'soft_opc': nan} step=2130




2022-04-22 08:36.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_2130.pt


Epoch 7/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:36.56 [info     ] FQE_20220422083650: epoch=7 step=2485 epoch=7 metrics={'time_sample_batch': 0.00012708113227092045, 'time_algorithm_update': 0.0022359733850183623, 'loss': 0.05915182109554888, 'time_step': 0.0024220943450927734, 'init_value': -4.381556987762451, 'ave_value': -5.005468393415405, 'soft_opc': nan} step=2485




2022-04-22 08:36.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_2485.pt


Epoch 8/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:36.57 [info     ] FQE_20220422083650: epoch=8 step=2840 epoch=8 metrics={'time_sample_batch': 0.00012812480120591714, 'time_algorithm_update': 0.0022896471157879896, 'loss': 0.0723708037654279, 'time_step': 0.002481202676262654, 'init_value': -4.8459320068359375, 'ave_value': -5.582672304935284, 'soft_opc': nan} step=2840




2022-04-22 08:36.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_2840.pt


Epoch 9/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:36.58 [info     ] FQE_20220422083650: epoch=9 step=3195 epoch=9 metrics={'time_sample_batch': 0.00013088306910555126, 'time_algorithm_update': 0.002358899317996603, 'loss': 0.0818955080190175, 'time_step': 0.0025521553738016476, 'init_value': -5.39803409576416, 'ave_value': -6.218513141313575, 'soft_opc': nan} step=3195




2022-04-22 08:36.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_3195.pt


Epoch 10/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:36.59 [info     ] FQE_20220422083650: epoch=10 step=3550 epoch=10 metrics={'time_sample_batch': 0.00013028601525535047, 'time_algorithm_update': 0.002345061637985874, 'loss': 0.0957983228207474, 'time_step': 0.0025365903343952875, 'init_value': -6.061707496643066, 'ave_value': -6.8982832577851445, 'soft_opc': nan} step=3550




2022-04-22 08:36.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_3550.pt


Epoch 11/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.00 [info     ] FQE_20220422083650: epoch=11 step=3905 epoch=11 metrics={'time_sample_batch': 0.00013113357651401575, 'time_algorithm_update': 0.0023184997934690664, 'loss': 0.10703284530047799, 'time_step': 0.0025120526971951335, 'init_value': -6.661652565002441, 'ave_value': -7.682135988248361, 'soft_opc': nan} step=3905




2022-04-22 08:37.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_3905.pt


Epoch 12/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.01 [info     ] FQE_20220422083650: epoch=12 step=4260 epoch=12 metrics={'time_sample_batch': 0.00012838605424048195, 'time_algorithm_update': 0.002332520820725132, 'loss': 0.12014327505734605, 'time_step': 0.002521046114639497, 'init_value': -6.952672481536865, 'ave_value': -8.016319882240689, 'soft_opc': nan} step=4260




2022-04-22 08:37.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_4260.pt


Epoch 13/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.02 [info     ] FQE_20220422083650: epoch=13 step=4615 epoch=13 metrics={'time_sample_batch': 0.00012468687245543574, 'time_algorithm_update': 0.002221050396771498, 'loss': 0.13560458418034332, 'time_step': 0.002403054438846212, 'init_value': -7.462361812591553, 'ave_value': -8.474064753429923, 'soft_opc': nan} step=4615




2022-04-22 08:37.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_4615.pt


Epoch 14/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.03 [info     ] FQE_20220422083650: epoch=14 step=4970 epoch=14 metrics={'time_sample_batch': 0.00013510274215483328, 'time_algorithm_update': 0.002405572273361851, 'loss': 0.15438978014592553, 'time_step': 0.002602225961819501, 'init_value': -7.739429950714111, 'ave_value': -8.824205463963587, 'soft_opc': nan} step=4970




2022-04-22 08:37.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_4970.pt


Epoch 15/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.04 [info     ] FQE_20220422083650: epoch=15 step=5325 epoch=15 metrics={'time_sample_batch': 0.0001239776611328125, 'time_algorithm_update': 0.002120996528947857, 'loss': 0.17640391350002357, 'time_step': 0.0023019649613071494, 'init_value': -8.237730026245117, 'ave_value': -9.332993239289967, 'soft_opc': nan} step=5325




2022-04-22 08:37.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_5325.pt


Epoch 16/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.05 [info     ] FQE_20220422083650: epoch=16 step=5680 epoch=16 metrics={'time_sample_batch': 0.00012718388732050505, 'time_algorithm_update': 0.002206485372194102, 'loss': 0.19546769878394168, 'time_step': 0.0023934337454782407, 'init_value': -8.778374671936035, 'ave_value': -9.942607460089478, 'soft_opc': nan} step=5680




2022-04-22 08:37.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_5680.pt


Epoch 17/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.06 [info     ] FQE_20220422083650: epoch=17 step=6035 epoch=17 metrics={'time_sample_batch': 0.00012199442151566627, 'time_algorithm_update': 0.0021602825379707443, 'loss': 0.21634652879561336, 'time_step': 0.002341661990528375, 'init_value': -9.29926586151123, 'ave_value': -10.51927050594696, 'soft_opc': nan} step=6035




2022-04-22 08:37.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_6035.pt


Epoch 18/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.07 [info     ] FQE_20220422083650: epoch=18 step=6390 epoch=18 metrics={'time_sample_batch': 0.00013583948914433868, 'time_algorithm_update': 0.0024328943709252585, 'loss': 0.23548334306499488, 'time_step': 0.0026342499424034443, 'init_value': -9.685140609741211, 'ave_value': -10.86843192532486, 'soft_opc': nan} step=6390




2022-04-22 08:37.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_6390.pt


Epoch 19/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.08 [info     ] FQE_20220422083650: epoch=19 step=6745 epoch=19 metrics={'time_sample_batch': 0.00014123513664997798, 'time_algorithm_update': 0.002568908476493728, 'loss': 0.25379583007118234, 'time_step': 0.0027768450723567478, 'init_value': -10.158510208129883, 'ave_value': -11.407923682806882, 'soft_opc': nan} step=6745




2022-04-22 08:37.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_6745.pt


Epoch 20/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.09 [info     ] FQE_20220422083650: epoch=20 step=7100 epoch=20 metrics={'time_sample_batch': 0.00013578777581873074, 'time_algorithm_update': 0.0024569424105362154, 'loss': 0.2791117851583051, 'time_step': 0.002655167646811042, 'init_value': -10.371773719787598, 'ave_value': -11.617976072735537, 'soft_opc': nan} step=7100




2022-04-22 08:37.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_7100.pt


Epoch 21/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.10 [info     ] FQE_20220422083650: epoch=21 step=7455 epoch=21 metrics={'time_sample_batch': 0.0001401001298931283, 'time_algorithm_update': 0.0024893303992043077, 'loss': 0.3062279211068657, 'time_step': 0.002699711625005158, 'init_value': -11.00871467590332, 'ave_value': -12.099185953195299, 'soft_opc': nan} step=7455




2022-04-22 08:37.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_7455.pt


Epoch 22/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.11 [info     ] FQE_20220422083650: epoch=22 step=7810 epoch=22 metrics={'time_sample_batch': 0.00013000327096858495, 'time_algorithm_update': 0.002296902428210621, 'loss': 0.3238377851657045, 'time_step': 0.0024893176387733138, 'init_value': -11.463519096374512, 'ave_value': -12.487147192775897, 'soft_opc': nan} step=7810




2022-04-22 08:37.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_7810.pt


Epoch 23/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.12 [info     ] FQE_20220422083650: epoch=23 step=8165 epoch=23 metrics={'time_sample_batch': 0.0001333129238074934, 'time_algorithm_update': 0.0024462760334283532, 'loss': 0.34721863489650506, 'time_step': 0.002642474375980001, 'init_value': -11.924132347106934, 'ave_value': -12.965200754915733, 'soft_opc': nan} step=8165




2022-04-22 08:37.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_8165.pt


Epoch 24/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.13 [info     ] FQE_20220422083650: epoch=24 step=8520 epoch=24 metrics={'time_sample_batch': 0.00012961911483549735, 'time_algorithm_update': 0.0021940822332677705, 'loss': 0.36601680966330247, 'time_step': 0.0023850340238759215, 'init_value': -11.98475170135498, 'ave_value': -12.899947612128548, 'soft_opc': nan} step=8520




2022-04-22 08:37.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_8520.pt


Epoch 25/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.14 [info     ] FQE_20220422083650: epoch=25 step=8875 epoch=25 metrics={'time_sample_batch': 0.0001323793975400253, 'time_algorithm_update': 0.0023343133254789972, 'loss': 0.3750661498958796, 'time_step': 0.0025268769600022005, 'init_value': -12.428175926208496, 'ave_value': -13.209535947087264, 'soft_opc': nan} step=8875




2022-04-22 08:37.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_8875.pt


Epoch 26/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.15 [info     ] FQE_20220422083650: epoch=26 step=9230 epoch=26 metrics={'time_sample_batch': 0.00012863521844568387, 'time_algorithm_update': 0.002336150827542157, 'loss': 0.40034739876716907, 'time_step': 0.002527371930404448, 'init_value': -12.928772926330566, 'ave_value': -13.537807465528774, 'soft_opc': nan} step=9230




2022-04-22 08:37.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_9230.pt


Epoch 27/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.16 [info     ] FQE_20220422083650: epoch=27 step=9585 epoch=27 metrics={'time_sample_batch': 0.00013193211085359816, 'time_algorithm_update': 0.002347551936834631, 'loss': 0.4114174623409627, 'time_step': 0.0025410873789182852, 'init_value': -13.495124816894531, 'ave_value': -13.989831300152218, 'soft_opc': nan} step=9585




2022-04-22 08:37.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_9585.pt


Epoch 28/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.17 [info     ] FQE_20220422083650: epoch=28 step=9940 epoch=28 metrics={'time_sample_batch': 0.0001240663125481404, 'time_algorithm_update': 0.002105515439745406, 'loss': 0.428204165986726, 'time_step': 0.0022882313795492683, 'init_value': -14.197823524475098, 'ave_value': -14.476400472415536, 'soft_opc': nan} step=9940




2022-04-22 08:37.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_9940.pt


Epoch 29/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.17 [info     ] FQE_20220422083650: epoch=29 step=10295 epoch=29 metrics={'time_sample_batch': 0.00012599851044130996, 'time_algorithm_update': 0.002129349910037618, 'loss': 0.437251021125367, 'time_step': 0.002313545388235173, 'init_value': -13.991941452026367, 'ave_value': -14.154242486217116, 'soft_opc': nan} step=10295




2022-04-22 08:37.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_10295.pt


Epoch 30/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.18 [info     ] FQE_20220422083650: epoch=30 step=10650 epoch=30 metrics={'time_sample_batch': 0.00013771661570374395, 'time_algorithm_update': 0.0025243154713805293, 'loss': 0.4495903983030101, 'time_step': 0.0027263809257829694, 'init_value': -14.858787536621094, 'ave_value': -14.741701518116157, 'soft_opc': nan} step=10650




2022-04-22 08:37.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_10650.pt


Epoch 31/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.19 [info     ] FQE_20220422083650: epoch=31 step=11005 epoch=31 metrics={'time_sample_batch': 0.00012830546204472933, 'time_algorithm_update': 0.0022960071832361355, 'loss': 0.4692637050393182, 'time_step': 0.002488001971177652, 'init_value': -15.62492561340332, 'ave_value': -15.3531978558188, 'soft_opc': nan} step=11005




2022-04-22 08:37.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_11005.pt


Epoch 32/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.20 [info     ] FQE_20220422083650: epoch=32 step=11360 epoch=32 metrics={'time_sample_batch': 0.00013018258860413457, 'time_algorithm_update': 0.002352985194031621, 'loss': 0.4856670600217833, 'time_step': 0.0025447455930038237, 'init_value': -15.854722023010254, 'ave_value': -15.438206705797171, 'soft_opc': nan} step=11360




2022-04-22 08:37.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_11360.pt


Epoch 33/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.21 [info     ] FQE_20220422083650: epoch=33 step=11715 epoch=33 metrics={'time_sample_batch': 0.00012845590081013425, 'time_algorithm_update': 0.002275709367134202, 'loss': 0.5052774572141574, 'time_step': 0.0024675026745863363, 'init_value': -16.54534912109375, 'ave_value': -15.887458852692442, 'soft_opc': nan} step=11715




2022-04-22 08:37.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_11715.pt


Epoch 34/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.22 [info     ] FQE_20220422083650: epoch=34 step=12070 epoch=34 metrics={'time_sample_batch': 0.00012969231941330602, 'time_algorithm_update': 0.0022384683850785376, 'loss': 0.5169107245202635, 'time_step': 0.00243068815956653, 'init_value': -16.403961181640625, 'ave_value': -15.71700846366776, 'soft_opc': nan} step=12070




2022-04-22 08:37.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_12070.pt


Epoch 35/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.23 [info     ] FQE_20220422083650: epoch=35 step=12425 epoch=35 metrics={'time_sample_batch': 0.00012213075664681448, 'time_algorithm_update': 0.002112547108824824, 'loss': 0.5250157079331472, 'time_step': 0.0022940958049935353, 'init_value': -16.412891387939453, 'ave_value': -15.575763879235993, 'soft_opc': nan} step=12425




2022-04-22 08:37.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_12425.pt


Epoch 36/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.24 [info     ] FQE_20220422083650: epoch=36 step=12780 epoch=36 metrics={'time_sample_batch': 0.00012191114291338854, 'time_algorithm_update': 0.002075518352884642, 'loss': 0.541344012865718, 'time_step': 0.002256192623729437, 'init_value': -16.55588722229004, 'ave_value': -15.715638155857414, 'soft_opc': nan} step=12780




2022-04-22 08:37.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_12780.pt


Epoch 37/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.25 [info     ] FQE_20220422083650: epoch=37 step=13135 epoch=37 metrics={'time_sample_batch': 0.00012960433959960938, 'time_algorithm_update': 0.0023036809034750493, 'loss': 0.5461546805280615, 'time_step': 0.0024941739901690415, 'init_value': -16.247974395751953, 'ave_value': -15.292275185822636, 'soft_opc': nan} step=13135




2022-04-22 08:37.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_13135.pt


Epoch 38/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.26 [info     ] FQE_20220422083650: epoch=38 step=13490 epoch=38 metrics={'time_sample_batch': 0.0001341571270580023, 'time_algorithm_update': 0.002426325435369787, 'loss': 0.5684721520249273, 'time_step': 0.0026251544415111273, 'init_value': -16.1771183013916, 'ave_value': -15.075479677565962, 'soft_opc': nan} step=13490




2022-04-22 08:37.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_13490.pt


Epoch 39/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.27 [info     ] FQE_20220422083650: epoch=39 step=13845 epoch=39 metrics={'time_sample_batch': 0.00012999789815553478, 'time_algorithm_update': 0.002270105523122868, 'loss': 0.5918114164398169, 'time_step': 0.0024574125316781056, 'init_value': -16.383586883544922, 'ave_value': -15.253975591343496, 'soft_opc': nan} step=13845




2022-04-22 08:37.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_13845.pt


Epoch 40/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.28 [info     ] FQE_20220422083650: epoch=40 step=14200 epoch=40 metrics={'time_sample_batch': 0.00012734910132179797, 'time_algorithm_update': 0.0023617428793034083, 'loss': 0.6190956217197465, 'time_step': 0.0025489269847601232, 'init_value': -16.464414596557617, 'ave_value': -15.105172343220337, 'soft_opc': nan} step=14200




2022-04-22 08:37.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_14200.pt


Epoch 41/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.29 [info     ] FQE_20220422083650: epoch=41 step=14555 epoch=41 metrics={'time_sample_batch': 0.00013108589279819542, 'time_algorithm_update': 0.0022929211737404407, 'loss': 0.6382894199708817, 'time_step': 0.0024868971864942095, 'init_value': -16.170007705688477, 'ave_value': -14.77970611975364, 'soft_opc': nan} step=14555




2022-04-22 08:37.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_14555.pt


Epoch 42/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.30 [info     ] FQE_20220422083650: epoch=42 step=14910 epoch=42 metrics={'time_sample_batch': 0.000139015493258624, 'time_algorithm_update': 0.002547270143535775, 'loss': 0.6760454556576803, 'time_step': 0.0027529985132351727, 'init_value': -16.55245590209961, 'ave_value': -15.156363058759515, 'soft_opc': nan} step=14910




2022-04-22 08:37.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_14910.pt


Epoch 43/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.31 [info     ] FQE_20220422083650: epoch=43 step=15265 epoch=43 metrics={'time_sample_batch': 0.00013871192932128905, 'time_algorithm_update': 0.002675760296029104, 'loss': 0.7012590046132534, 'time_step': 0.0028811266724492464, 'init_value': -16.974605560302734, 'ave_value': -15.256700277965194, 'soft_opc': nan} step=15265




2022-04-22 08:37.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_15265.pt


Epoch 44/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.32 [info     ] FQE_20220422083650: epoch=44 step=15620 epoch=44 metrics={'time_sample_batch': 0.00013662526305292694, 'time_algorithm_update': 0.0025265854848942285, 'loss': 0.7416407401188159, 'time_step': 0.0027289175651442838, 'init_value': -16.648019790649414, 'ave_value': -15.021371221959475, 'soft_opc': nan} step=15620




2022-04-22 08:37.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_15620.pt


Epoch 45/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.33 [info     ] FQE_20220422083650: epoch=45 step=15975 epoch=45 metrics={'time_sample_batch': 0.00013192203682912908, 'time_algorithm_update': 0.0024861241730166155, 'loss': 0.740237785277652, 'time_step': 0.0026844494779345016, 'init_value': -16.370983123779297, 'ave_value': -14.904286524803021, 'soft_opc': nan} step=15975




2022-04-22 08:37.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_15975.pt


Epoch 46/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.34 [info     ] FQE_20220422083650: epoch=46 step=16330 epoch=46 metrics={'time_sample_batch': 0.00013364536661497305, 'time_algorithm_update': 0.002400348555873817, 'loss': 0.7662398114170826, 'time_step': 0.0025976563843203263, 'init_value': -16.531190872192383, 'ave_value': -14.975403772479588, 'soft_opc': nan} step=16330




2022-04-22 08:37.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_16330.pt


Epoch 47/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.35 [info     ] FQE_20220422083650: epoch=47 step=16685 epoch=47 metrics={'time_sample_batch': 0.00013318733430244553, 'time_algorithm_update': 0.0025352431015229562, 'loss': 0.7617977213324375, 'time_step': 0.0027313809999277893, 'init_value': -16.487056732177734, 'ave_value': -14.802594507255973, 'soft_opc': nan} step=16685




2022-04-22 08:37.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_16685.pt


Epoch 48/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.36 [info     ] FQE_20220422083650: epoch=48 step=17040 epoch=48 metrics={'time_sample_batch': 0.00012907780392069213, 'time_algorithm_update': 0.0022783077938455932, 'loss': 0.7785398251258991, 'time_step': 0.002467052701493384, 'init_value': -16.4555721282959, 'ave_value': -14.845749047244665, 'soft_opc': nan} step=17040




2022-04-22 08:37.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_17040.pt


Epoch 49/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.37 [info     ] FQE_20220422083650: epoch=49 step=17395 epoch=49 metrics={'time_sample_batch': 0.00013078232886086047, 'time_algorithm_update': 0.00224421393703407, 'loss': 0.8053472821089164, 'time_step': 0.0024314665458571743, 'init_value': -16.252920150756836, 'ave_value': -14.424127238136423, 'soft_opc': nan} step=17395




2022-04-22 08:37.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_17395.pt


Epoch 50/50:   0%|          | 0/355 [00:00<?, ?it/s]



2022-04-22 08:37.38 [info     ] FQE_20220422083650: epoch=50 step=17750 epoch=50 metrics={'time_sample_batch': 0.00013787511368872413, 'time_algorithm_update': 0.00233046571973344, 'loss': 0.8201915225879827, 'time_step': 0.0025326842993078098, 'init_value': -16.841981887817383, 'ave_value': -14.999913804482386, 'soft_opc': nan} step=17750




2022-04-22 08:37.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422083650/model_17750.pt
search iteration:  36
using hyper params:  [0.006693992988103271, 0.005822259761589165, 2.3244912637727628e-05, 7]
2022-04-22 08:37.38 [debug    ] RoundIterator is selected.
2022-04-22 08:37.38 [info     ] Directory is created at d3rlpy_logs/CQL_20220422083738
2022-04-22 08:37.38 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 08:37.38 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 08:37.38 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422083738/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.006693992988103271, 'actor_optim_factory': {'opti

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:37.51 [info     ] CQL_20220422083738: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.00040865357900630534, 'time_algorithm_update': 0.03512645594646476, 'temp_loss': 4.972573195578735, 'temp': 0.9958612648393378, 'alpha_loss': -17.80148362837775, 'alpha': 1.017733967373137, 'critic_loss': 151.99439283602499, 'actor_loss': 6.4012805775475465, 'time_step': 0.03563339448388601, 'td_error': 1.4131484355425443, 'init_value': -11.600194931030273, 'ave_value': -11.114027386335696} step=346
2022-04-22 08:37.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:38.03 [info     ] CQL_20220422083738: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.00042231096697680523, 'time_algorithm_update': 0.03552164163203598, 'temp_loss': 4.982195873481, 'temp': 0.9878278388453356, 'alpha_loss': -18.406637958019456, 'alpha': 1.0541250688492219, 'critic_loss': 223.88724270859205, 'actor_loss': 14.32258624424135, 'time_step': 0.036048331701686614, 'td_error': 1.6638216348862156, 'init_value': -17.692825317382812, 'ave_value': -17.067900188323048} step=692
2022-04-22 08:38.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:38.16 [info     ] CQL_20220422083738: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.00040365988119489196, 'time_algorithm_update': 0.03482776156739693, 'temp_loss': 4.9442877094180595, 'temp': 0.979939728286225, 'alpha_loss': -19.068890974011726, 'alpha': 1.092347802109801, 'critic_loss': 441.9062239806776, 'actor_loss': 18.766404173966777, 'time_step': 0.035323898227228596, 'td_error': 1.7503925560195848, 'init_value': -20.11551856994629, 'ave_value': -19.429556959214626} step=1038
2022-04-22 08:38.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:38.29 [info     ] CQL_20220422083738: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.0004081181708098836, 'time_algorithm_update': 0.034932904160780716, 'temp_loss': 4.906790430146145, 'temp': 0.9721497100901741, 'alpha_loss': -19.753790215949792, 'alpha': 1.1324721230247807, 'critic_loss': 764.8769196085847, 'actor_loss': 18.674972826345808, 'time_step': 0.03543198177580199, 'td_error': 1.5893674740980839, 'init_value': -17.5965576171875, 'ave_value': -17.071532743923356} step=1384
2022-04-22 08:38.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:38.41 [info     ] CQL_20220422083738: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.0003919759926768397, 'time_algorithm_update': 0.03453298386810832, 'temp_loss': 4.869145134280871, 'temp': 0.9644442889731744, 'alpha_loss': -20.44780214122265, 'alpha': 1.1744894357774989, 'critic_loss': 1208.3191979909907, 'actor_loss': 13.139234912188757, 'time_step': 0.03501920686291821, 'td_error': 1.384552001770845, 'init_value': -11.042241096496582, 'ave_value': -10.839581150463713} step=1730
2022-04-22 08:38.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:38.53 [info     ] CQL_20220422083738: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.00039197392546372605, 'time_algorithm_update': 0.03360777438720527, 'temp_loss': 4.831096934445331, 'temp': 0.9568189220621407, 'alpha_loss': -21.195329506273215, 'alpha': 1.218449718001261, 'critic_loss': 1729.5543608031521, 'actor_loss': 7.751927313777063, 'time_step': 0.03409159872573235, 'td_error': 1.34692189258427, 'init_value': -8.523580551147461, 'ave_value': -8.446601318668675} step=2076
2022-04-22 08:38.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:39.05 [info     ] CQL_20220422083738: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.0003854773637187274, 'time_algorithm_update': 0.03326285850105947, 'temp_loss': 4.793577953570151, 'temp': 0.949269605165272, 'alpha_loss': -21.98947103864196, 'alpha': 1.2644315885670612, 'critic_loss': 2201.617997891641, 'actor_loss': 6.754349457735271, 'time_step': 0.03373747340516548, 'td_error': 1.3489415554559478, 'init_value': -8.307455062866211, 'ave_value': -8.259249414341774} step=2422
2022-04-22 08:39.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:39.18 [info     ] CQL_20220422083738: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.0003994999593392962, 'time_algorithm_update': 0.03434641581739305, 'temp_loss': 4.7566394213307115, 'temp': 0.94179155888585, 'alpha_loss': -22.82561063490851, 'alpha': 1.3124787976287005, 'critic_loss': 2591.7942698925217, 'actor_loss': 6.855810311488334, 'time_step': 0.03483457923624557, 'td_error': 1.3594619584860803, 'init_value': -8.669944763183594, 'ave_value': -8.627393274546913} step=2768
2022-04-22 08:39.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:39.30 [info     ] CQL_20220422083738: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.0004104789281856118, 'time_algorithm_update': 0.03533454024033739, 'temp_loss': 4.717811733312001, 'temp': 0.9343827912917716, 'alpha_loss': -23.694901868787117, 'alpha': 1.3626232285031004, 'critic_loss': 2919.6098329400743, 'actor_loss': 7.223315426379959, 'time_step': 0.03584190600180213, 'td_error': 1.3709881317087944, 'init_value': -9.098721504211426, 'ave_value': -9.066680459899324} step=3114
2022-04-22 08:39.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:39.43 [info     ] CQL_20220422083738: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.0004208673631524764, 'time_algorithm_update': 0.03575507271496547, 'temp_loss': 4.681210148541225, 'temp': 0.9270411447293496, 'alpha_loss': -24.604059478451063, 'alpha': 1.4149104787435145, 'critic_loss': 3178.8313439870844, 'actor_loss': 7.724460914644892, 'time_step': 0.03626824321085318, 'td_error': 1.3833478649879285, 'init_value': -9.537920951843262, 'ave_value': -9.512080810170652} step=3460
2022-04-22 08:39.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:39.56 [info     ] CQL_20220422083738: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.00040646922381627076, 'time_algorithm_update': 0.03506063863721197, 'temp_loss': 4.64465517942616, 'temp': 0.9197626672039142, 'alpha_loss': -25.548544018254805, 'alpha': 1.4693882324102987, 'critic_loss': 3361.047174178107, 'actor_loss': 8.332658121351562, 'time_step': 0.03555978171398185, 'td_error': 1.4003599859118696, 'init_value': -10.219840049743652, 'ave_value': -10.196504422644972} step=3806
2022-04-22 08:39.56 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:40.09 [info     ] CQL_20220422083738: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.0004022355713596234, 'time_algorithm_update': 0.03501967198586877, 'temp_loss': 4.608657135439746, 'temp': 0.9125460599888267, 'alpha_loss': -26.538348341263788, 'alpha': 1.5261161675342936, 'critic_loss': 3463.415266974124, 'actor_loss': 9.04410800217204, 'time_step': 0.03551631235662912, 'td_error': 1.4162827441633095, 'init_value': -10.650565147399902, 'ave_value': -10.641279809381116} step=4152
2022-04-22 08:40.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:40.21 [info     ] CQL_20220422083738: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.0003978503232746455, 'time_algorithm_update': 0.03448545381512945, 'temp_loss': 4.571728565789371, 'temp': 0.9053886801521213, 'alpha_loss': -27.56269414163049, 'alpha': 1.5851554880941534, 'critic_loss': 3517.0671979430094, 'actor_loss': 9.903949737548828, 'time_step': 0.034979075365672914, 'td_error': 1.4461532198364264, 'init_value': -11.762412071228027, 'ave_value': -11.750305740298565} step=4498
2022-04-22 08:40.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:40.34 [info     ] CQL_20220422083738: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.00039739002382135116, 'time_algorithm_update': 0.03452361801456165, 'temp_loss': 4.535621401891543, 'temp': 0.8982926583014472, 'alpha_loss': -28.63463090885581, 'alpha': 1.6465722108851968, 'critic_loss': 3531.030600134348, 'actor_loss': 10.873856109001734, 'time_step': 0.03500792056838901, 'td_error': 1.4789176724219508, 'init_value': -12.85566234588623, 'ave_value': -12.844742337240886} step=4844
2022-04-22 08:40.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:40.46 [info     ] CQL_20220422083738: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.0004233452626046418, 'time_algorithm_update': 0.03349245008016597, 'temp_loss': 4.501342397204714, 'temp': 0.8912529704198672, 'alpha_loss': -29.745715510638462, 'alpha': 1.7104501676008192, 'critic_loss': 3510.9823287346458, 'actor_loss': 11.9880447442821, 'time_step': 0.033997938812123554, 'td_error': 1.5166281033912221, 'init_value': -13.98885440826416, 'ave_value': -13.981059340426796} step=5190
2022-04-22 08:40.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:40.59 [info     ] CQL_20220422083738: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.00041724078228018874, 'time_algorithm_update': 0.0356317000582039, 'temp_loss': 4.466787030931153, 'temp': 0.8842692240814253, 'alpha_loss': -30.89803789392372, 'alpha': 1.7768597261754076, 'critic_loss': 3480.921800205473, 'actor_loss': 13.17356116509851, 'time_step': 0.03613349812568268, 'td_error': 1.5596155831905427, 'init_value': -15.22342586517334, 'ave_value': -15.217030043260584} step=5536
2022-04-22 08:40.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:41.11 [info     ] CQL_20220422083738: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.0003993917751863513, 'time_algorithm_update': 0.03532943284580473, 'temp_loss': 4.430106555795394, 'temp': 0.877342598458935, 'alpha_loss': -32.097794841479704, 'alpha': 1.845893355807817, 'critic_loss': 3429.3630222915913, 'actor_loss': 14.43129252560566, 'time_step': 0.035815145928046604, 'td_error': 1.6083493883548046, 'init_value': -16.54973793029785, 'ave_value': -16.54247486318893} step=5882
2022-04-22 08:41.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:41.24 [info     ] CQL_20220422083738: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.0004103052822840696, 'time_algorithm_update': 0.03490230458320221, 'temp_loss': 4.395188277856463, 'temp': 0.870471778288053, 'alpha_loss': -33.34778161682834, 'alpha': 1.9176421871764122, 'critic_loss': 3380.231975224666, 'actor_loss': 15.7929142279432, 'time_step': 0.03539318500915704, 'td_error': 1.6586521197916142, 'init_value': -17.774473190307617, 'ave_value': -17.767919364140575} step=6228
2022-04-22 08:41.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:41.36 [info     ] CQL_20220422083738: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.0003903745915848396, 'time_algorithm_update': 0.03471362452975588, 'temp_loss': 4.361469599552926, 'temp': 0.8636551490753372, 'alpha_loss': -34.642537056366145, 'alpha': 1.992212847478128, 'critic_loss': 3304.1732749277458, 'actor_loss': 17.189449437091806, 'time_step': 0.035188771396703115, 'td_error': 1.7227068473209253, 'init_value': -19.28314781188965, 'ave_value': -19.277425883923055} step=6574
2022-04-22 08:41.36 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:41.49 [info     ] CQL_20220422083738: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.0004045026150742018, 'time_algorithm_update': 0.03420007642293941, 'temp_loss': 4.326265368158418, 'temp': 0.8568927047914163, 'alpha_loss': -35.99166302322652, 'alpha': 2.0696985769823106, 'critic_loss': 3233.3147127890174, 'actor_loss': 18.608709318789444, 'time_step': 0.034687458435235, 'td_error': 1.7840892531262893, 'init_value': -20.62917137145996, 'ave_value': -20.62309820384084} step=6920
2022-04-22 08:41.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:42.01 [info     ] CQL_20220422083738: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.000419966058234948, 'time_algorithm_update': 0.034759096327544634, 'temp_loss': 4.293739981733995, 'temp': 0.8501830602312364, 'alpha_loss': -37.388272114571805, 'alpha': 2.1502173401716815, 'critic_loss': 3135.1353541026915, 'actor_loss': 20.042996224640422, 'time_step': 0.035264402455677184, 'td_error': 1.8568290056163819, 'init_value': -22.148330688476562, 'ave_value': -22.140644959512624} step=7266
2022-04-22 08:42.01 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:42.14 [info     ] CQL_20220422083738: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.0004091104331044103, 'time_algorithm_update': 0.03412069681751935, 'temp_loss': 4.259777365392343, 'temp': 0.8435255034121475, 'alpha_loss': -38.84286339158957, 'alpha': 2.2338771730489126, 'critic_loss': 3039.1259314035406, 'actor_loss': 21.476331760428543, 'time_step': 0.034616175414509856, 'td_error': 1.929127598769752, 'init_value': -23.55010414123535, 'ave_value': -23.54443990084276} step=7612
2022-04-22 08:42.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:42.26 [info     ] CQL_20220422083738: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.0003944359762820205, 'time_algorithm_update': 0.034268214523447735, 'temp_loss': 4.226459063546506, 'temp': 0.8369210636340125, 'alpha_loss': -40.3534821637104, 'alpha': 2.320803572676774, 'critic_loss': 2945.579628652231, 'actor_loss': 22.88584149090541, 'time_step': 0.03474522877290759, 'td_error': 2.0061476994952083, 'init_value': -24.988388061523438, 'ave_value': -24.980014043783566} step=7958
2022-04-22 08:42.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:42.39 [info     ] CQL_20220422083738: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.0004049415533253223, 'time_algorithm_update': 0.03573848332972885, 'temp_loss': 4.1943495824846915, 'temp': 0.8303684208089905, 'alpha_loss': -41.93012002713419, 'alpha': 2.4111177900623035, 'critic_loss': 2846.6253542155887, 'actor_loss': 24.304988861083984, 'time_step': 0.03623158394256768, 'td_error': 2.0847039904406, 'init_value': -26.38614273071289, 'ave_value': -26.378185877883645} step=8304
2022-04-22 08:42.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:42.51 [info     ] CQL_20220422083738: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.00042166324020121137, 'time_algorithm_update': 0.034475750316774224, 'temp_loss': 4.160689949300248, 'temp': 0.8238659404605799, 'alpha_loss': -43.555117502377904, 'alpha': 2.50495786336116, 'critic_loss': 2759.3475871003434, 'actor_loss': 25.652000399683253, 'time_step': 0.03498294932304779, 'td_error': 2.1601602500156902, 'init_value': -27.594070434570312, 'ave_value': -27.591357219744882} step=8650
2022-04-22 08:42.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:43.04 [info     ] CQL_20220422083738: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.00039166039814149715, 'time_algorithm_update': 0.0341002052229953, 'temp_loss': 4.128758972090793, 'temp': 0.8174155206349544, 'alpha_loss': -45.25735407481993, 'alpha': 2.602454188242124, 'critic_loss': 2691.553224773076, 'actor_loss': 27.032261567308723, 'time_step': 0.03457170897136534, 'td_error': 2.2447215966790703, 'init_value': -28.967805862426758, 'ave_value': -28.961210712271882} step=8996
2022-04-22 08:43.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:43.16 [info     ] CQL_20220422083738: epoch=27 step=9342 epoch=27 metrics={'time_sample_batch': 0.00040092495824560264, 'time_algorithm_update': 0.03540515968565307, 'temp_loss': 4.095685698393452, 'temp': 0.81101577612706, 'alpha_loss': -47.01620311406307, 'alpha': 2.703753553373965, 'critic_loss': 2615.1521250112896, 'actor_loss': 28.336433465770213, 'time_step': 0.0358946971121551, 'td_error': 2.332847325706256, 'init_value': -30.367965698242188, 'ave_value': -30.357146361157927} step=9342
2022-04-22 08:43.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_9342.pt


Epoch 28/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:43.30 [info     ] CQL_20220422083738: epoch=28 step=9688 epoch=28 metrics={'time_sample_batch': 0.00040456807682279905, 'time_algorithm_update': 0.03541947651460681, 'temp_loss': 4.06387097022437, 'temp': 0.8046663157168151, 'alpha_loss': -48.84435466259201, 'alpha': 2.808992446502509, 'critic_loss': 2553.0767603526915, 'actor_loss': 29.625162168734335, 'time_step': 0.0359068805771756, 'td_error': 2.412349653572356, 'init_value': -31.484336853027344, 'ave_value': -31.48203854323451} step=9688
2022-04-22 08:43.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_9688.pt


Epoch 29/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:43.42 [info     ] CQL_20220422083738: epoch=29 step=10034 epoch=29 metrics={'time_sample_batch': 0.0003923377549717192, 'time_algorithm_update': 0.03468690442212055, 'temp_loss': 4.031403686958931, 'temp': 0.7983673611128261, 'alpha_loss': -50.74511439814044, 'alpha': 2.9183351862637292, 'critic_loss': 2511.684372742052, 'actor_loss': 30.801784829597253, 'time_step': 0.03516171984589858, 'td_error': 2.504470391054614, 'init_value': -32.88361740112305, 'ave_value': -32.86884320595073} step=10034
2022-04-22 08:43.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_10034.pt


Epoch 30/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:43.55 [info     ] CQL_20220422083738: epoch=30 step=10380 epoch=30 metrics={'time_sample_batch': 0.0003977352484113219, 'time_algorithm_update': 0.03523228623274434, 'temp_loss': 4.00040779913092, 'temp': 0.7921175781012959, 'alpha_loss': -52.7178559606475, 'alpha': 3.031930175819838, 'critic_loss': 2477.3077336129427, 'actor_loss': 31.964913158747503, 'time_step': 0.0357161374450419, 'td_error': 2.581033912733937, 'init_value': -33.90479278564453, 'ave_value': -33.8970415501529} step=10380
2022-04-22 08:43.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_10380.pt


Epoch 31/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:44.08 [info     ] CQL_20220422083738: epoch=31 step=10726 epoch=31 metrics={'time_sample_batch': 0.0003899811320222182, 'time_algorithm_update': 0.03538030489331725, 'temp_loss': 3.9691406088757377, 'temp': 0.7859155484017609, 'alpha_loss': -54.774351274347026, 'alpha': 3.1499576251630836, 'critic_loss': 2449.236556036624, 'actor_loss': 33.0101427397976, 'time_step': 0.035855520667368274, 'td_error': 2.6588146925531992, 'init_value': -34.951324462890625, 'ave_value': -34.94160277329106} step=10726
2022-04-22 08:44.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_10726.pt


Epoch 32/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:44.20 [info     ] CQL_20220422083738: epoch=32 step=11072 epoch=32 metrics={'time_sample_batch': 0.0004023933686272946, 'time_algorithm_update': 0.03495654963344508, 'temp_loss': 3.9377877409058497, 'temp': 0.77976318349728, 'alpha_loss': -56.90513415694926, 'alpha': 3.272577134860044, 'critic_loss': 2431.8313771506955, 'actor_loss': 34.01996188081069, 'time_step': 0.03544787864464556, 'td_error': 2.7273181293903277, 'init_value': -35.80543899536133, 'ave_value': -35.80329878740596} step=11072
2022-04-22 08:44.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_11072.pt


Epoch 33/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:44.33 [info     ] CQL_20220422083738: epoch=33 step=11418 epoch=33 metrics={'time_sample_batch': 0.000396225493767358, 'time_algorithm_update': 0.03514999047869203, 'temp_loss': 3.907088691788602, 'temp': 0.7736584967960513, 'alpha_loss': -59.123400010125486, 'alpha': 3.399981923186021, 'critic_loss': 2439.208329570087, 'actor_loss': 34.979981868942346, 'time_step': 0.03563314228388616, 'td_error': 2.8015139666598645, 'init_value': -36.7701301574707, 'ave_value': -36.76551942983585} step=11418
2022-04-22 08:44.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_11418.pt


Epoch 34/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:44.46 [info     ] CQL_20220422083738: epoch=34 step=11764 epoch=34 metrics={'time_sample_batch': 0.00039558810305733214, 'time_algorithm_update': 0.03596037729627135, 'temp_loss': 3.876390540530916, 'temp': 0.7676010274818178, 'alpha_loss': -61.421595303309445, 'alpha': 3.5323426964655087, 'critic_loss': 2451.189839798591, 'actor_loss': 35.88729121919312, 'time_step': 0.03644595807687396, 'td_error': 2.869816572256534, 'init_value': -37.58870315551758, 'ave_value': -37.589642538918085} step=11764
2022-04-22 08:44.46 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_11764.pt


Epoch 35/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:44.59 [info     ] CQL_20220422083738: epoch=35 step=12110 epoch=35 metrics={'time_sample_batch': 0.0004150102593306172, 'time_algorithm_update': 0.03585206221982923, 'temp_loss': 3.846566442809353, 'temp': 0.7615917197541694, 'alpha_loss': -63.8159806466516, 'alpha': 3.66985636501643, 'critic_loss': 2516.7749305680995, 'actor_loss': 36.84391534397368, 'time_step': 0.03635296036053255, 'td_error': 2.963667753853815, 'init_value': -38.82279968261719, 'ave_value': -38.81209274498192} step=12110
2022-04-22 08:44.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_12110.pt


Epoch 36/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:45.11 [info     ] CQL_20220422083738: epoch=36 step=12456 epoch=36 metrics={'time_sample_batch': 0.00040084778228936167, 'time_algorithm_update': 0.035152260278690756, 'temp_loss': 3.816206186493008, 'temp': 0.7556286767728067, 'alpha_loss': -66.29461544235318, 'alpha': 3.8127254192539723, 'critic_loss': 2584.4562099214236, 'actor_loss': 37.78093545285264, 'time_step': 0.03564404132049208, 'td_error': 3.034359114300656, 'init_value': -39.60688400268555, 'ave_value': -39.603649879734114} step=12456
2022-04-22 08:45.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_12456.pt


Epoch 37/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:45.24 [info     ] CQL_20220422083738: epoch=37 step=12802 epoch=37 metrics={'time_sample_batch': 0.0004368531221599248, 'time_algorithm_update': 0.03543922529055204, 'temp_loss': 3.784955019895741, 'temp': 0.7497138479196956, 'alpha_loss': -68.87800869362891, 'alpha': 3.9611544305878565, 'critic_loss': 2636.475569708499, 'actor_loss': 38.63413419337631, 'time_step': 0.03596746232468269, 'td_error': 3.1117351245298313, 'init_value': -40.535404205322266, 'ave_value': -40.529737159978616} step=12802
2022-04-22 08:45.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_12802.pt


Epoch 38/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:45.37 [info     ] CQL_20220422083738: epoch=38 step=13148 epoch=38 metrics={'time_sample_batch': 0.0004300113358249554, 'time_algorithm_update': 0.0355288265757478, 'temp_loss': 3.756441536666341, 'temp': 0.7438448948322693, 'alpha_loss': -71.55776726165948, 'alpha': 4.115361001450203, 'critic_loss': 2668.6493413847993, 'actor_loss': 39.47282341312122, 'time_step': 0.036050740694034994, 'td_error': 3.1732326650814406, 'init_value': -41.173126220703125, 'ave_value': -41.174290200781066} step=13148
2022-04-22 08:45.37 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_13148.pt


Epoch 39/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:45.50 [info     ] CQL_20220422083738: epoch=39 step=13494 epoch=39 metrics={'time_sample_batch': 0.00041035282818568236, 'time_algorithm_update': 0.035575856363153184, 'temp_loss': 3.7279905218609497, 'temp': 0.7380208171516485, 'alpha_loss': -74.35816278623018, 'alpha': 4.2755659563693005, 'critic_loss': 2704.563937325009, 'actor_loss': 40.34184088734533, 'time_step': 0.03607976781150509, 'td_error': 3.260630277201182, 'init_value': -42.223751068115234, 'ave_value': -42.21615099502209} step=13494
2022-04-22 08:45.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_13494.pt


Epoch 40/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:46.03 [info     ] CQL_20220422083738: epoch=40 step=13840 epoch=40 metrics={'time_sample_batch': 0.00040036749977596925, 'time_algorithm_update': 0.035226649631654595, 'temp_loss': 3.698470310668725, 'temp': 0.732241867077833, 'alpha_loss': -77.23801100185152, 'alpha': 4.442013974823704, 'critic_loss': 2889.413357596866, 'actor_loss': 41.23318969858864, 'time_step': 0.035712512242311686, 'td_error': 3.3336944739170145, 'init_value': -43.02253341674805, 'ave_value': -43.0161750194754} step=13840
2022-04-22 08:46.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_13840.pt


Epoch 41/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:46.15 [info     ] CQL_20220422083738: epoch=41 step=14186 epoch=41 metrics={'time_sample_batch': 0.0004081484899355497, 'time_algorithm_update': 0.03527808602834712, 'temp_loss': 3.6683524171741024, 'temp': 0.7265094503157401, 'alpha_loss': -80.24475439435484, 'alpha': 4.61491833119034, 'critic_loss': 2932.057798528947, 'actor_loss': 41.93405008040411, 'time_step': 0.035779441023148555, 'td_error': 3.399970458764226, 'init_value': -43.688167572021484, 'ave_value': -43.69054370317599} step=14186
2022-04-22 08:46.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_14186.pt


Epoch 42/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:46.28 [info     ] CQL_20220422083738: epoch=42 step=14532 epoch=42 metrics={'time_sample_batch': 0.00039654866808411704, 'time_algorithm_update': 0.03475877797672514, 'temp_loss': 3.6405087468252018, 'temp': 0.7208223511717912, 'alpha_loss': -83.37209408269452, 'alpha': 4.794572744755387, 'critic_loss': 3022.219976347995, 'actor_loss': 42.72553170485303, 'time_step': 0.03524470191470461, 'td_error': 3.4864362765404575, 'init_value': -44.68850326538086, 'ave_value': -44.67785604322279} step=14532
2022-04-22 08:46.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_14532.pt


Epoch 43/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:46.41 [info     ] CQL_20220422083738: epoch=43 step=14878 epoch=43 metrics={'time_sample_batch': 0.00040770403911612624, 'time_algorithm_update': 0.03502144152029401, 'temp_loss': 3.612088880787006, 'temp': 0.7151790905550036, 'alpha_loss': -86.61478417457184, 'alpha': 4.981225278336189, 'critic_loss': 3154.67528732388, 'actor_loss': 43.51890266286156, 'time_step': 0.03552164645553324, 'td_error': 3.5463364070242718, 'init_value': -45.28425216674805, 'ave_value': -45.278370990092974} step=14878
2022-04-22 08:46.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_14878.pt


Epoch 44/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:46.54 [info     ] CQL_20220422083738: epoch=44 step=15224 epoch=44 metrics={'time_sample_batch': 0.0004142391888392454, 'time_algorithm_update': 0.03596351325856468, 'temp_loss': 3.58272025075262, 'temp': 0.7095811363589557, 'alpha_loss': -89.9838192228637, 'alpha': 5.175139808930414, 'critic_loss': 3078.917734487897, 'actor_loss': 44.13256899883292, 'time_step': 0.036476618292703794, 'td_error': 3.5984609149919256, 'init_value': -45.78801345825195, 'ave_value': -45.78623081115028} step=15224
2022-04-22 08:46.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_15224.pt


Epoch 45/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:47.06 [info     ] CQL_20220422083738: epoch=45 step=15570 epoch=45 metrics={'time_sample_batch': 0.0003975457538759088, 'time_algorithm_update': 0.03569113863685917, 'temp_loss': 3.5559255181020393, 'temp': 0.7040261057759986, 'alpha_loss': -93.49952843285709, 'alpha': 5.376604661776152, 'critic_loss': 3003.993378567558, 'actor_loss': 44.7839509379657, 'time_step': 0.0361867832999698, 'td_error': 3.6749295099912915, 'init_value': -46.60315704345703, 'ave_value': -46.596365651455805} step=15570
2022-04-22 08:47.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_15570.pt


Epoch 46/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:47.19 [info     ] CQL_20220422083738: epoch=46 step=15916 epoch=46 metrics={'time_sample_batch': 0.0004078218702636013, 'time_algorithm_update': 0.03521619022237083, 'temp_loss': 3.527293141177624, 'temp': 0.6985140444570883, 'alpha_loss': -97.13306457872335, 'alpha': 5.5859155269027445, 'critic_loss': 3180.4705944612538, 'actor_loss': 45.636705089855745, 'time_step': 0.03571911629913859, 'td_error': 3.758760460514513, 'init_value': -47.4356803894043, 'ave_value': -47.431322217699815} step=15916
2022-04-22 08:47.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_15916.pt


Epoch 47/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:47.32 [info     ] CQL_20220422083738: epoch=47 step=16262 epoch=47 metrics={'time_sample_batch': 0.0004016994740921638, 'time_algorithm_update': 0.03499343009353373, 'temp_loss': 3.498794659024718, 'temp': 0.6930463813632899, 'alpha_loss': -100.92211062921955, 'alpha': 5.803374579876144, 'critic_loss': 3379.587192072345, 'actor_loss': 46.37937716665984, 'time_step': 0.035482988192166894, 'td_error': 3.825391575778595, 'init_value': -48.09392166137695, 'ave_value': -48.08892062509608} step=16262
2022-04-22 08:47.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_16262.pt


Epoch 48/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:47.44 [info     ] CQL_20220422083738: epoch=48 step=16608 epoch=48 metrics={'time_sample_batch': 0.00038160133913073236, 'time_algorithm_update': 0.032509267674705196, 'temp_loss': 3.473108796715047, 'temp': 0.6876207391995226, 'alpha_loss': -104.85820216801814, 'alpha': 6.029311983571576, 'critic_loss': 3507.9877936743587, 'actor_loss': 46.98151883913602, 'time_step': 0.03297095767335396, 'td_error': 3.888159765000939, 'init_value': -48.71529769897461, 'ave_value': -48.712054934259726} step=16608
2022-04-22 08:47.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_16608.pt


Epoch 49/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:47.55 [info     ] CQL_20220422083738: epoch=49 step=16954 epoch=49 metrics={'time_sample_batch': 0.0003726923396821656, 'time_algorithm_update': 0.031730528511752974, 'temp_loss': 3.445236064795125, 'temp': 0.682237110386005, 'alpha_loss': -108.92180360121534, 'alpha': 6.264029755068652, 'critic_loss': 3502.9294998080745, 'actor_loss': 47.52604346743898, 'time_step': 0.03218317927652701, 'td_error': 3.9248698268256503, 'init_value': -49.002586364746094, 'ave_value': -49.00519083818026} step=16954
2022-04-22 08:47.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_16954.pt


Epoch 50/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:48.08 [info     ] CQL_20220422083738: epoch=50 step=17300 epoch=50 metrics={'time_sample_batch': 0.0004040457609760968, 'time_algorithm_update': 0.036207810302690276, 'temp_loss': 3.4193446353680828, 'temp': 0.6768954509944585, 'alpha_loss': -113.16804943194968, 'alpha': 6.507871778025104, 'critic_loss': 3494.2001741442377, 'actor_loss': 48.08755161307451, 'time_step': 0.03670483999858702, 'td_error': 4.019639829652259, 'init_value': -49.99565124511719, 'ave_value': -49.98682240160114} step=17300
2022-04-22 08:48.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422083738/model_17300.pt
start
[ 0.00000000e+00  7.95731469e+08 -3.14589108e-01  2.42000047e-02
 -1.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.25589108e-01  3.16000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.5191

Epoch 1/50:   0%|          | 0/166 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 08:48.09 [info     ] FQE_20220422084808: epoch=1 step=166 epoch=1 metrics={'time_sample_batch': 0.00012386132435626294, 'time_algorithm_update': 0.0021112970558993787, 'loss': 0.003779864310376705, 'time_step': 0.0022923429328275016, 'init_value': 0.15765811502933502, 'ave_value': 0.19416364776054482, 'soft_opc': nan} step=166




2022-04-22 08:48.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_166.pt


Epoch 2/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.09 [info     ] FQE_20220422084808: epoch=2 step=332 epoch=2 metrics={'time_sample_batch': 0.000130669180169163, 'time_algorithm_update': 0.0023110013410269498, 'loss': 0.0024072649230877587, 'time_step': 0.0024971933249967643, 'init_value': 0.10889279842376709, 'ave_value': 0.17298638818802328, 'soft_opc': nan} step=332




2022-04-22 08:48.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_332.pt


Epoch 3/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.10 [info     ] FQE_20220422084808: epoch=3 step=498 epoch=3 metrics={'time_sample_batch': 0.0001303934189210455, 'time_algorithm_update': 0.0021712119320789016, 'loss': 0.001997751384378826, 'time_step': 0.0023565450346613504, 'init_value': 0.06795106828212738, 'ave_value': 0.14826744315675922, 'soft_opc': nan} step=498




2022-04-22 08:48.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_498.pt


Epoch 4/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.10 [info     ] FQE_20220422084808: epoch=4 step=664 epoch=4 metrics={'time_sample_batch': 0.00012662180935043887, 'time_algorithm_update': 0.0021510468908103116, 'loss': 0.001871260933589235, 'time_step': 0.0023324705031980953, 'init_value': -0.014600373804569244, 'ave_value': 0.0839961260792103, 'soft_opc': nan} step=664




2022-04-22 08:48.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_664.pt


Epoch 5/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.11 [info     ] FQE_20220422084808: epoch=5 step=830 epoch=5 metrics={'time_sample_batch': 0.00012853921177875563, 'time_algorithm_update': 0.0021506476115031414, 'loss': 0.0016953235166708388, 'time_step': 0.0023309767964374587, 'init_value': -0.07463674247264862, 'ave_value': 0.04162210100456267, 'soft_opc': nan} step=830




2022-04-22 08:48.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_830.pt


Epoch 6/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.11 [info     ] FQE_20220422084808: epoch=6 step=996 epoch=6 metrics={'time_sample_batch': 0.00012678697884800923, 'time_algorithm_update': 0.002357627972062812, 'loss': 0.0015388783642528467, 'time_step': 0.0025403557053531507, 'init_value': -0.085710808634758, 'ave_value': 0.04213956002783668, 'soft_opc': nan} step=996




2022-04-22 08:48.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_996.pt


Epoch 7/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.11 [info     ] FQE_20220422084808: epoch=7 step=1162 epoch=7 metrics={'time_sample_batch': 0.00013654203300016472, 'time_algorithm_update': 0.0024484783770090126, 'loss': 0.0014558360457465232, 'time_step': 0.002643359712807529, 'init_value': -0.14819678664207458, 'ave_value': -0.003786203038652201, 'soft_opc': nan} step=1162




2022-04-22 08:48.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_1162.pt


Epoch 8/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.12 [info     ] FQE_20220422084808: epoch=8 step=1328 epoch=8 metrics={'time_sample_batch': 0.00012269508407776616, 'time_algorithm_update': 0.002067060355680535, 'loss': 0.001378859464129635, 'time_step': 0.002239885100399155, 'init_value': -0.1956358402967453, 'ave_value': -0.03266502923525132, 'soft_opc': nan} step=1328




2022-04-22 08:48.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_1328.pt


Epoch 9/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.12 [info     ] FQE_20220422084808: epoch=9 step=1494 epoch=9 metrics={'time_sample_batch': 0.0001352034419415945, 'time_algorithm_update': 0.0022978495402508473, 'loss': 0.0012783165074357517, 'time_step': 0.002490812037364546, 'init_value': -0.23525869846343994, 'ave_value': -0.059646701969642626, 'soft_opc': nan} step=1494




2022-04-22 08:48.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_1494.pt


Epoch 10/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.13 [info     ] FQE_20220422084808: epoch=10 step=1660 epoch=10 metrics={'time_sample_batch': 0.0001407919159854751, 'time_algorithm_update': 0.00251838098089379, 'loss': 0.0012304103748833604, 'time_step': 0.002721764955175928, 'init_value': -0.30343419313430786, 'ave_value': -0.11095869433185136, 'soft_opc': nan} step=1660




2022-04-22 08:48.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_1660.pt


Epoch 11/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.13 [info     ] FQE_20220422084808: epoch=11 step=1826 epoch=11 metrics={'time_sample_batch': 0.0001279632729220103, 'time_algorithm_update': 0.002074622246156256, 'loss': 0.0011555056230737322, 'time_step': 0.0022604666560529225, 'init_value': -0.3697999119758606, 'ave_value': -0.15701547851450406, 'soft_opc': nan} step=1826




2022-04-22 08:48.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_1826.pt


Epoch 12/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.14 [info     ] FQE_20220422084808: epoch=12 step=1992 epoch=12 metrics={'time_sample_batch': 0.0001200782247336514, 'time_algorithm_update': 0.0019450273858495504, 'loss': 0.0011531733508019831, 'time_step': 0.0021150499941354774, 'init_value': -0.38846760988235474, 'ave_value': -0.1655484832235956, 'soft_opc': nan} step=1992




2022-04-22 08:48.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_1992.pt


Epoch 13/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.14 [info     ] FQE_20220422084808: epoch=13 step=2158 epoch=13 metrics={'time_sample_batch': 0.00013154098786503435, 'time_algorithm_update': 0.00215637109365808, 'loss': 0.001075853965685602, 'time_step': 0.00234647256782256, 'init_value': -0.44789376854896545, 'ave_value': -0.2093164408587859, 'soft_opc': nan} step=2158




2022-04-22 08:48.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_2158.pt


Epoch 14/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.15 [info     ] FQE_20220422084808: epoch=14 step=2324 epoch=14 metrics={'time_sample_batch': 0.00014137072735522166, 'time_algorithm_update': 0.0025442896119083265, 'loss': 0.0010287807326895713, 'time_step': 0.0027491155877170793, 'init_value': -0.527449369430542, 'ave_value': -0.27873374933779643, 'soft_opc': nan} step=2324




2022-04-22 08:48.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_2324.pt


Epoch 15/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.15 [info     ] FQE_20220422084808: epoch=15 step=2490 epoch=15 metrics={'time_sample_batch': 0.00012495287929672794, 'time_algorithm_update': 0.0020435057490705007, 'loss': 0.0009774036548754021, 'time_step': 0.0022247110504701913, 'init_value': -0.57627934217453, 'ave_value': -0.31402377386803965, 'soft_opc': nan} step=2490




2022-04-22 08:48.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_2490.pt


Epoch 16/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.16 [info     ] FQE_20220422084808: epoch=16 step=2656 epoch=16 metrics={'time_sample_batch': 0.00013596753039992, 'time_algorithm_update': 0.0024703166571008153, 'loss': 0.0009905463566720284, 'time_step': 0.0026661157608032227, 'init_value': -0.6450680494308472, 'ave_value': -0.3657900066140133, 'soft_opc': nan} step=2656




2022-04-22 08:48.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_2656.pt


Epoch 17/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.16 [info     ] FQE_20220422084808: epoch=17 step=2822 epoch=17 metrics={'time_sample_batch': 0.00013750863362507648, 'time_algorithm_update': 0.0024836781513260073, 'loss': 0.0009793895242401927, 'time_step': 0.0026780769049403177, 'init_value': -0.7183560729026794, 'ave_value': -0.4295727971219667, 'soft_opc': nan} step=2822




2022-04-22 08:48.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_2822.pt


Epoch 18/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.17 [info     ] FQE_20220422084808: epoch=18 step=2988 epoch=18 metrics={'time_sample_batch': 0.00013809749879032732, 'time_algorithm_update': 0.002551972147930099, 'loss': 0.0010018441875195916, 'time_step': 0.0027596792542790793, 'init_value': -0.7577649354934692, 'ave_value': -0.45967891789502924, 'soft_opc': nan} step=2988




2022-04-22 08:48.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_2988.pt


Epoch 19/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.17 [info     ] FQE_20220422084808: epoch=19 step=3154 epoch=19 metrics={'time_sample_batch': 0.00013339950377682606, 'time_algorithm_update': 0.00228276166571192, 'loss': 0.0010322519696909509, 'time_step': 0.0024798203663653636, 'init_value': -0.84065842628479, 'ave_value': -0.516300964504816, 'soft_opc': nan} step=3154




2022-04-22 08:48.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_3154.pt


Epoch 20/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.17 [info     ] FQE_20220422084808: epoch=20 step=3320 epoch=20 metrics={'time_sample_batch': 0.00013525658343211715, 'time_algorithm_update': 0.002427239015877965, 'loss': 0.001061659341177297, 'time_step': 0.002622979233063847, 'init_value': -0.9079312086105347, 'ave_value': -0.5502831349787008, 'soft_opc': nan} step=3320




2022-04-22 08:48.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_3320.pt


Epoch 21/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.18 [info     ] FQE_20220422084808: epoch=21 step=3486 epoch=21 metrics={'time_sample_batch': 0.00012550152927996163, 'time_algorithm_update': 0.0020983563848288663, 'loss': 0.001130065855644475, 'time_step': 0.0022778654673013344, 'init_value': -0.9825729131698608, 'ave_value': -0.617422650217473, 'soft_opc': nan} step=3486




2022-04-22 08:48.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_3486.pt


Epoch 22/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.18 [info     ] FQE_20220422084808: epoch=22 step=3652 epoch=22 metrics={'time_sample_batch': 0.0001192882836583149, 'time_algorithm_update': 0.0019647816577589654, 'loss': 0.0012254714865885473, 'time_step': 0.002137791679566165, 'init_value': -1.0557949542999268, 'ave_value': -0.6563912991165847, 'soft_opc': nan} step=3652




2022-04-22 08:48.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_3652.pt


Epoch 23/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.19 [info     ] FQE_20220422084808: epoch=23 step=3818 epoch=23 metrics={'time_sample_batch': 0.00012622253004326877, 'time_algorithm_update': 0.0020549311695328676, 'loss': 0.001353368770958104, 'time_step': 0.002238391393638519, 'init_value': -1.1256911754608154, 'ave_value': -0.6936350396322506, 'soft_opc': nan} step=3818




2022-04-22 08:48.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_3818.pt


Epoch 24/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.19 [info     ] FQE_20220422084808: epoch=24 step=3984 epoch=24 metrics={'time_sample_batch': 0.0001250692160732775, 'time_algorithm_update': 0.0021181681069983057, 'loss': 0.0014919153450082157, 'time_step': 0.0022991680237184086, 'init_value': -1.1879587173461914, 'ave_value': -0.7495690881132006, 'soft_opc': nan} step=3984




2022-04-22 08:48.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_3984.pt


Epoch 25/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.20 [info     ] FQE_20220422084808: epoch=25 step=4150 epoch=25 metrics={'time_sample_batch': 0.0001289715249854398, 'time_algorithm_update': 0.002241687602307423, 'loss': 0.001641842745088815, 'time_step': 0.002429177962153791, 'init_value': -1.3083248138427734, 'ave_value': -0.8517436922133506, 'soft_opc': nan} step=4150




2022-04-22 08:48.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_4150.pt


Epoch 26/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.20 [info     ] FQE_20220422084808: epoch=26 step=4316 epoch=26 metrics={'time_sample_batch': 0.00012294212019587137, 'time_algorithm_update': 0.0019686480602586127, 'loss': 0.0017575842238252938, 'time_step': 0.0021477334470634, 'init_value': -1.4075188636779785, 'ave_value': -0.9206537376854333, 'soft_opc': nan} step=4316




2022-04-22 08:48.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_4316.pt


Epoch 27/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.20 [info     ] FQE_20220422084808: epoch=27 step=4482 epoch=27 metrics={'time_sample_batch': 0.00013795243688376554, 'time_algorithm_update': 0.0024775841149939112, 'loss': 0.0019499588223364968, 'time_step': 0.002678049616066806, 'init_value': -1.4284677505493164, 'ave_value': -0.9327571645244822, 'soft_opc': nan} step=4482




2022-04-22 08:48.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_4482.pt


Epoch 28/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.21 [info     ] FQE_20220422084808: epoch=28 step=4648 epoch=28 metrics={'time_sample_batch': 0.0001312781529254224, 'time_algorithm_update': 0.002260314412863858, 'loss': 0.0020755730283233418, 'time_step': 0.0024498327668890894, 'init_value': -1.5133472681045532, 'ave_value': -0.9872630416407241, 'soft_opc': nan} step=4648




2022-04-22 08:48.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_4648.pt


Epoch 29/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.21 [info     ] FQE_20220422084808: epoch=29 step=4814 epoch=29 metrics={'time_sample_batch': 0.00013879264693662343, 'time_algorithm_update': 0.0023861649524734682, 'loss': 0.002224629640920054, 'time_step': 0.0025878914867539004, 'init_value': -1.6028344631195068, 'ave_value': -1.0526569586448573, 'soft_opc': nan} step=4814




2022-04-22 08:48.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_4814.pt


Epoch 30/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.22 [info     ] FQE_20220422084808: epoch=30 step=4980 epoch=30 metrics={'time_sample_batch': 0.00013415210218314664, 'time_algorithm_update': 0.002327320087386901, 'loss': 0.0023840367920270346, 'time_step': 0.0025219701858888188, 'init_value': -1.6468822956085205, 'ave_value': -1.082779559352108, 'soft_opc': nan} step=4980




2022-04-22 08:48.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_4980.pt


Epoch 31/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.22 [info     ] FQE_20220422084808: epoch=31 step=5146 epoch=31 metrics={'time_sample_batch': 0.00012855501060026237, 'time_algorithm_update': 0.002372868089790804, 'loss': 0.002657299940383163, 'time_step': 0.002554966742733875, 'init_value': -1.770122766494751, 'ave_value': -1.1888417792145733, 'soft_opc': nan} step=5146




2022-04-22 08:48.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_5146.pt


Epoch 32/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.23 [info     ] FQE_20220422084808: epoch=32 step=5312 epoch=32 metrics={'time_sample_batch': 0.00013876823057611305, 'time_algorithm_update': 0.002495800156191171, 'loss': 0.002741518840319982, 'time_step': 0.0026957126985113307, 'init_value': -1.7816684246063232, 'ave_value': -1.1905030960032532, 'soft_opc': nan} step=5312




2022-04-22 08:48.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_5312.pt


Epoch 33/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.23 [info     ] FQE_20220422084808: epoch=33 step=5478 epoch=33 metrics={'time_sample_batch': 0.00014232727418462914, 'time_algorithm_update': 0.0026770585990813843, 'loss': 0.002800102264294962, 'time_step': 0.0028850773730909967, 'init_value': -1.8700735569000244, 'ave_value': -1.2502990019482536, 'soft_opc': nan} step=5478




2022-04-22 08:48.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_5478.pt


Epoch 34/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.24 [info     ] FQE_20220422084808: epoch=34 step=5644 epoch=34 metrics={'time_sample_batch': 0.0001241988446339067, 'time_algorithm_update': 0.002113579267478851, 'loss': 0.0033522830174672023, 'time_step': 0.002293868237231151, 'init_value': -1.9335397481918335, 'ave_value': -1.2998208058309986, 'soft_opc': nan} step=5644




2022-04-22 08:48.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_5644.pt


Epoch 35/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.24 [info     ] FQE_20220422084808: epoch=35 step=5810 epoch=35 metrics={'time_sample_batch': 0.00013579948838934842, 'time_algorithm_update': 0.0024453200489641673, 'loss': 0.0033738175247506946, 'time_step': 0.0026460914726716927, 'init_value': -1.9990324974060059, 'ave_value': -1.3423336960845165, 'soft_opc': nan} step=5810




2022-04-22 08:48.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_5810.pt


Epoch 36/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.25 [info     ] FQE_20220422084808: epoch=36 step=5976 epoch=36 metrics={'time_sample_batch': 0.00012884657066988657, 'time_algorithm_update': 0.002126774155949972, 'loss': 0.0035164121228777974, 'time_step': 0.002312650163489652, 'init_value': -2.0260040760040283, 'ave_value': -1.353644174337387, 'soft_opc': nan} step=5976




2022-04-22 08:48.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_5976.pt


Epoch 37/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.25 [info     ] FQE_20220422084808: epoch=37 step=6142 epoch=37 metrics={'time_sample_batch': 0.00013123793774340526, 'time_algorithm_update': 0.002106586134577372, 'loss': 0.0037038548072310536, 'time_step': 0.0022960527833685816, 'init_value': -2.1253364086151123, 'ave_value': -1.4257807743307707, 'soft_opc': nan} step=6142




2022-04-22 08:48.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_6142.pt


Epoch 38/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.26 [info     ] FQE_20220422084808: epoch=38 step=6308 epoch=38 metrics={'time_sample_batch': 0.0001288738595433982, 'time_algorithm_update': 0.00209862352853798, 'loss': 0.003825999871429325, 'time_step': 0.002286630940724568, 'init_value': -2.128795623779297, 'ave_value': -1.4180309018036266, 'soft_opc': nan} step=6308




2022-04-22 08:48.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_6308.pt


Epoch 39/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.26 [info     ] FQE_20220422084808: epoch=39 step=6474 epoch=39 metrics={'time_sample_batch': 0.0001280839184680617, 'time_algorithm_update': 0.0021893633417336337, 'loss': 0.004139664458427915, 'time_step': 0.0023767948150634766, 'init_value': -2.167454242706299, 'ave_value': -1.4462588361321806, 'soft_opc': nan} step=6474




2022-04-22 08:48.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_6474.pt


Epoch 40/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.27 [info     ] FQE_20220422084808: epoch=40 step=6640 epoch=40 metrics={'time_sample_batch': 0.00013488459299845868, 'time_algorithm_update': 0.002327898898756648, 'loss': 0.004267806293869234, 'time_step': 0.0025244951248168945, 'init_value': -2.2145111560821533, 'ave_value': -1.4775673828884826, 'soft_opc': nan} step=6640




2022-04-22 08:48.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_6640.pt


Epoch 41/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.27 [info     ] FQE_20220422084808: epoch=41 step=6806 epoch=41 metrics={'time_sample_batch': 0.0001330907086291945, 'time_algorithm_update': 0.0023031636893031107, 'loss': 0.004424018551562548, 'time_step': 0.0024944486388240954, 'init_value': -2.2867016792297363, 'ave_value': -1.5474782925401185, 'soft_opc': nan} step=6806




2022-04-22 08:48.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_6806.pt


Epoch 42/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.27 [info     ] FQE_20220422084808: epoch=42 step=6972 epoch=42 metrics={'time_sample_batch': 0.00013397400637707078, 'time_algorithm_update': 0.002364734569227839, 'loss': 0.004645758374567503, 'time_step': 0.0025642104895718127, 'init_value': -2.310580253601074, 'ave_value': -1.5627294104512748, 'soft_opc': nan} step=6972




2022-04-22 08:48.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_6972.pt


Epoch 43/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.28 [info     ] FQE_20220422084808: epoch=43 step=7138 epoch=43 metrics={'time_sample_batch': 0.00013977791889604316, 'time_algorithm_update': 0.0024322185171655863, 'loss': 0.004744831223499468, 'time_step': 0.0026316585310970446, 'init_value': -2.3384878635406494, 'ave_value': -1.573505896024473, 'soft_opc': nan} step=7138




2022-04-22 08:48.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_7138.pt


Epoch 44/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.28 [info     ] FQE_20220422084808: epoch=44 step=7304 epoch=44 metrics={'time_sample_batch': 0.00015560690178928604, 'time_algorithm_update': 0.0025290236415633238, 'loss': 0.004993011973165705, 'time_step': 0.0027453482869159744, 'init_value': -2.4046287536621094, 'ave_value': -1.6228907542521351, 'soft_opc': nan} step=7304




2022-04-22 08:48.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_7304.pt


Epoch 45/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.29 [info     ] FQE_20220422084808: epoch=45 step=7470 epoch=45 metrics={'time_sample_batch': 0.0001460270709302052, 'time_algorithm_update': 0.002717474857008601, 'loss': 0.0050461455128199125, 'time_step': 0.0029255625713302427, 'init_value': -2.4459877014160156, 'ave_value': -1.6474781539123338, 'soft_opc': nan} step=7470




2022-04-22 08:48.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_7470.pt


Epoch 46/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.29 [info     ] FQE_20220422084808: epoch=46 step=7636 epoch=46 metrics={'time_sample_batch': 0.00012952017496867352, 'time_algorithm_update': 0.002296391739902726, 'loss': 0.005232171827004325, 'time_step': 0.0024865334292492234, 'init_value': -2.461941957473755, 'ave_value': -1.655693420938946, 'soft_opc': nan} step=7636




2022-04-22 08:48.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_7636.pt


Epoch 47/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.30 [info     ] FQE_20220422084808: epoch=47 step=7802 epoch=47 metrics={'time_sample_batch': 0.00014058940381888883, 'time_algorithm_update': 0.0024558018489056325, 'loss': 0.005356100186949641, 'time_step': 0.0026608906596539967, 'init_value': -2.5361695289611816, 'ave_value': -1.710893558751087, 'soft_opc': nan} step=7802




2022-04-22 08:48.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_7802.pt


Epoch 48/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.30 [info     ] FQE_20220422084808: epoch=48 step=7968 epoch=48 metrics={'time_sample_batch': 0.0001330418759081737, 'time_algorithm_update': 0.0022782044238354787, 'loss': 0.005627200855306877, 'time_step': 0.0024708423269800394, 'init_value': -2.5577926635742188, 'ave_value': -1.7170008591032242, 'soft_opc': nan} step=7968




2022-04-22 08:48.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_7968.pt


Epoch 49/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.31 [info     ] FQE_20220422084808: epoch=49 step=8134 epoch=49 metrics={'time_sample_batch': 0.00013516179050307675, 'time_algorithm_update': 0.002494636788425675, 'loss': 0.005935164696729113, 'time_step': 0.002697039799517896, 'init_value': -2.6472058296203613, 'ave_value': -1.8036126713058702, 'soft_opc': nan} step=8134




2022-04-22 08:48.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_8134.pt


Epoch 50/50:   0%|          | 0/166 [00:00<?, ?it/s]



2022-04-22 08:48.31 [info     ] FQE_20220422084808: epoch=50 step=8300 epoch=50 metrics={'time_sample_batch': 0.00013497364090149662, 'time_algorithm_update': 0.0022491317197500943, 'loss': 0.006095571412406133, 'time_step': 0.0024431915168302604, 'init_value': -2.7042534351348877, 'ave_value': -1.8419147273038958, 'soft_opc': nan} step=8300




2022-04-22 08:48.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084808/model_8300.pt
start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3

2022-04-22 08:48.32 [info     ] Directory is created at d3rlpy_logs/FQE_20220422084832
2022-04-22 08:48.32 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 08:48.32 [debug    ] Building models...
2022-04-22 08:48.32 [debug    ] Models have been built.
2022-04-22 08:48.32 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220422084832/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_size': 100, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 

Epoch 1/50:   0%|          | 0/344 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-22 08:48.33 [info     ] FQE_20220422084832: epoch=1 step=344 epoch=1 metrics={'time_sample_batch': 0.00012860949649367223, 'time_algorithm_update': 0.002156030300051667, 'loss': 0.02503694006956594, 'time_step': 0.0023450782132703202, 'init_value': -1.1456990242004395, 'ave_value': -1.1242650346645902, 'soft_opc': nan} step=344




2022-04-22 08:48.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_344.pt


Epoch 2/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:48.34 [info     ] FQE_20220422084832: epoch=2 step=688 epoch=2 metrics={'time_sample_batch': 0.00013439461242320926, 'time_algorithm_update': 0.0023792878139850706, 'loss': 0.02180153894411443, 'time_step': 0.0025755908600119657, 'init_value': -1.888555645942688, 'ave_value': -1.8579672087151724, 'soft_opc': nan} step=688




2022-04-22 08:48.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_688.pt


Epoch 3/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:48.34 [info     ] FQE_20220422084832: epoch=3 step=1032 epoch=3 metrics={'time_sample_batch': 0.00013764237248620323, 'time_algorithm_update': 0.0023662198421566987, 'loss': 0.023893746853329588, 'time_step': 0.002567042445027551, 'init_value': -2.753584384918213, 'ave_value': -2.7536239985141666, 'soft_opc': nan} step=1032




2022-04-22 08:48.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_1032.pt


Epoch 4/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:48.35 [info     ] FQE_20220422084832: epoch=4 step=1376 epoch=4 metrics={'time_sample_batch': 0.00012719977733700774, 'time_algorithm_update': 0.0021161804365557296, 'loss': 0.026761818501210316, 'time_step': 0.002301139193911885, 'init_value': -3.2040505409240723, 'ave_value': -3.287562945344158, 'soft_opc': nan} step=1376




2022-04-22 08:48.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_1376.pt


Epoch 5/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:48.36 [info     ] FQE_20220422084832: epoch=5 step=1720 epoch=5 metrics={'time_sample_batch': 0.00012962069622305937, 'time_algorithm_update': 0.002207911291787791, 'loss': 0.035839019410940275, 'time_step': 0.002396163552306419, 'init_value': -3.6654229164123535, 'ave_value': -3.9830891647854365, 'soft_opc': nan} step=1720




2022-04-22 08:48.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_1720.pt


Epoch 6/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:48.37 [info     ] FQE_20220422084832: epoch=6 step=2064 epoch=6 metrics={'time_sample_batch': 0.00013990111129228458, 'time_algorithm_update': 0.002457743467286576, 'loss': 0.04610957295720487, 'time_step': 0.002660978672116302, 'init_value': -3.895883560180664, 'ave_value': -4.472348912989905, 'soft_opc': nan} step=2064




2022-04-22 08:48.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_2064.pt


Epoch 7/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:48.38 [info     ] FQE_20220422084832: epoch=7 step=2408 epoch=7 metrics={'time_sample_batch': 0.0001308252645093341, 'time_algorithm_update': 0.002302677825439808, 'loss': 0.05952791593492378, 'time_step': 0.002493093873179236, 'init_value': -4.2995710372924805, 'ave_value': -5.24378084928544, 'soft_opc': nan} step=2408




2022-04-22 08:48.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_2408.pt


Epoch 8/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:48.39 [info     ] FQE_20220422084832: epoch=8 step=2752 epoch=8 metrics={'time_sample_batch': 0.0001346926356470862, 'time_algorithm_update': 0.002384455398071644, 'loss': 0.07418902322844884, 'time_step': 0.002582313709480818, 'init_value': -3.9839212894439697, 'ave_value': -5.451014754833939, 'soft_opc': nan} step=2752




2022-04-22 08:48.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_2752.pt


Epoch 9/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:48.40 [info     ] FQE_20220422084832: epoch=9 step=3096 epoch=9 metrics={'time_sample_batch': 0.0001324789468632188, 'time_algorithm_update': 0.00242553478063539, 'loss': 0.08648386267880194, 'time_step': 0.002620627713757892, 'init_value': -4.113131046295166, 'ave_value': -6.043315777041622, 'soft_opc': nan} step=3096




2022-04-22 08:48.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_3096.pt


Epoch 10/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:48.41 [info     ] FQE_20220422084832: epoch=10 step=3440 epoch=10 metrics={'time_sample_batch': 0.00012690591257671978, 'time_algorithm_update': 0.002204642739406852, 'loss': 0.10033651970348559, 'time_step': 0.0023909183435661847, 'init_value': -4.185876846313477, 'ave_value': -6.620332817174914, 'soft_opc': nan} step=3440




2022-04-22 08:48.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_3440.pt


Epoch 11/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:48.42 [info     ] FQE_20220422084832: epoch=11 step=3784 epoch=11 metrics={'time_sample_batch': 0.000136358100314473, 'time_algorithm_update': 0.002471646597219068, 'loss': 0.10509059000976903, 'time_step': 0.002666575271029805, 'init_value': -4.386247634887695, 'ave_value': -7.166008209303726, 'soft_opc': nan} step=3784




2022-04-22 08:48.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_3784.pt


Epoch 12/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:48.43 [info     ] FQE_20220422084832: epoch=12 step=4128 epoch=12 metrics={'time_sample_batch': 0.00013574472693509833, 'time_algorithm_update': 0.0024157221927199254, 'loss': 0.1152482156501, 'time_step': 0.002613308817841286, 'init_value': -4.417757511138916, 'ave_value': -7.5567220395737165, 'soft_opc': nan} step=4128




2022-04-22 08:48.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_4128.pt


Epoch 13/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:48.44 [info     ] FQE_20220422084832: epoch=13 step=4472 epoch=13 metrics={'time_sample_batch': 0.00012570966121762297, 'time_algorithm_update': 0.002115589241648829, 'loss': 0.12545080427730151, 'time_step': 0.0022969675618548725, 'init_value': -4.524305820465088, 'ave_value': -7.953881272317439, 'soft_opc': nan} step=4472




2022-04-22 08:48.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_4472.pt


Epoch 14/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:48.45 [info     ] FQE_20220422084832: epoch=14 step=4816 epoch=14 metrics={'time_sample_batch': 0.00013328776803127554, 'time_algorithm_update': 0.0023701211740804274, 'loss': 0.13897078176743763, 'time_step': 0.0025644863760748574, 'init_value': -4.747961521148682, 'ave_value': -8.242287749481632, 'soft_opc': nan} step=4816




2022-04-22 08:48.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_4816.pt


Epoch 15/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:48.46 [info     ] FQE_20220422084832: epoch=15 step=5160 epoch=15 metrics={'time_sample_batch': 0.00013169854186302008, 'time_algorithm_update': 0.002163365829822629, 'loss': 0.1527712779054635, 'time_step': 0.0023531102856924366, 'init_value': -5.182102203369141, 'ave_value': -8.718226294938123, 'soft_opc': nan} step=5160




2022-04-22 08:48.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_5160.pt


Epoch 16/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:48.47 [info     ] FQE_20220422084832: epoch=16 step=5504 epoch=16 metrics={'time_sample_batch': 0.00013873882071916446, 'time_algorithm_update': 0.0024108512457026994, 'loss': 0.1634814407308261, 'time_step': 0.0026133296101592306, 'init_value': -6.191221714019775, 'ave_value': -9.68261450812187, 'soft_opc': nan} step=5504




2022-04-22 08:48.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_5504.pt


Epoch 17/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:48.48 [info     ] FQE_20220422084832: epoch=17 step=5848 epoch=17 metrics={'time_sample_batch': 0.00013251637303551963, 'time_algorithm_update': 0.002256284619486609, 'loss': 0.17574888960523313, 'time_step': 0.0024515598319297615, 'init_value': -6.419861793518066, 'ave_value': -9.714582804458736, 'soft_opc': nan} step=5848




2022-04-22 08:48.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_5848.pt


Epoch 18/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:48.49 [info     ] FQE_20220422084832: epoch=18 step=6192 epoch=18 metrics={'time_sample_batch': 0.000127867903820304, 'time_algorithm_update': 0.0021514989608942075, 'loss': 0.18104966217651963, 'time_step': 0.002341370942980744, 'init_value': -6.776395797729492, 'ave_value': -10.070833498353625, 'soft_opc': nan} step=6192




2022-04-22 08:48.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_6192.pt


Epoch 19/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:48.50 [info     ] FQE_20220422084832: epoch=19 step=6536 epoch=19 metrics={'time_sample_batch': 0.00014093726180320563, 'time_algorithm_update': 0.002549918585045393, 'loss': 0.20163399143534344, 'time_step': 0.00275645422381024, 'init_value': -7.212441921234131, 'ave_value': -10.33618708971955, 'soft_opc': nan} step=6536




2022-04-22 08:48.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_6536.pt


Epoch 20/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:48.50 [info     ] FQE_20220422084832: epoch=20 step=6880 epoch=20 metrics={'time_sample_batch': 0.0001325524130532908, 'time_algorithm_update': 0.0022311626478683116, 'loss': 0.21834067194614298, 'time_step': 0.002422748610030773, 'init_value': -7.633274078369141, 'ave_value': -10.606717292567598, 'soft_opc': nan} step=6880




2022-04-22 08:48.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_6880.pt


Epoch 21/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:48.51 [info     ] FQE_20220422084832: epoch=21 step=7224 epoch=21 metrics={'time_sample_batch': 0.00013914496399635492, 'time_algorithm_update': 0.0024030950180319853, 'loss': 0.251633872126424, 'time_step': 0.0026060356650241586, 'init_value': -8.220976829528809, 'ave_value': -10.890360012151058, 'soft_opc': nan} step=7224




2022-04-22 08:48.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_7224.pt


Epoch 22/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:48.52 [info     ] FQE_20220422084832: epoch=22 step=7568 epoch=22 metrics={'time_sample_batch': 0.00013565116150434628, 'time_algorithm_update': 0.002441141494484835, 'loss': 0.27502642284948814, 'time_step': 0.002642765987751096, 'init_value': -8.667337417602539, 'ave_value': -11.17497143617827, 'soft_opc': nan} step=7568




2022-04-22 08:48.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_7568.pt


Epoch 23/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:48.53 [info     ] FQE_20220422084832: epoch=23 step=7912 epoch=23 metrics={'time_sample_batch': 0.00013180319653000941, 'time_algorithm_update': 0.0023321474707403847, 'loss': 0.30654793239176964, 'time_step': 0.0025266450504923977, 'init_value': -9.484756469726562, 'ave_value': -11.591695948727303, 'soft_opc': nan} step=7912




2022-04-22 08:48.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_7912.pt


Epoch 24/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:48.54 [info     ] FQE_20220422084832: epoch=24 step=8256 epoch=24 metrics={'time_sample_batch': 0.00012333171312199084, 'time_algorithm_update': 0.0020288748796596086, 'loss': 0.34027756604387666, 'time_step': 0.002208928729212561, 'init_value': -9.997407913208008, 'ave_value': -11.851085115301315, 'soft_opc': nan} step=8256




2022-04-22 08:48.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_8256.pt


Epoch 25/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:48.55 [info     ] FQE_20220422084832: epoch=25 step=8600 epoch=25 metrics={'time_sample_batch': 0.00014981419541114984, 'time_algorithm_update': 0.0024378313574680063, 'loss': 0.3677281899875853, 'time_step': 0.002651565296705379, 'init_value': -10.37206745147705, 'ave_value': -11.979701101203315, 'soft_opc': nan} step=8600




2022-04-22 08:48.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_8600.pt


Epoch 26/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:48.56 [info     ] FQE_20220422084832: epoch=26 step=8944 epoch=26 metrics={'time_sample_batch': 0.0001287037550016891, 'time_algorithm_update': 0.0022123878778413284, 'loss': 0.3873145852216281, 'time_step': 0.002398513777311458, 'init_value': -11.104772567749023, 'ave_value': -12.271874198468133, 'soft_opc': nan} step=8944




2022-04-22 08:48.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_8944.pt


Epoch 27/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:48.57 [info     ] FQE_20220422084832: epoch=27 step=9288 epoch=27 metrics={'time_sample_batch': 0.00013428718544716058, 'time_algorithm_update': 0.0023337658061537633, 'loss': 0.3982922320773955, 'time_step': 0.0025305623231932175, 'init_value': -11.878952026367188, 'ave_value': -12.874452519255716, 'soft_opc': nan} step=9288




2022-04-22 08:48.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_9288.pt


Epoch 28/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:48.58 [info     ] FQE_20220422084832: epoch=28 step=9632 epoch=28 metrics={'time_sample_batch': 0.000128802171973295, 'time_algorithm_update': 0.002270891222842904, 'loss': 0.4241470858389728, 'time_step': 0.002462425897287768, 'init_value': -12.054672241210938, 'ave_value': -12.758764705550647, 'soft_opc': nan} step=9632




2022-04-22 08:48.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_9632.pt


Epoch 29/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:48.59 [info     ] FQE_20220422084832: epoch=29 step=9976 epoch=29 metrics={'time_sample_batch': 0.00012690521949945495, 'time_algorithm_update': 0.0020909690579702686, 'loss': 0.43320547519684877, 'time_step': 0.0022764552471249604, 'init_value': -12.141510963439941, 'ave_value': -12.826900503061594, 'soft_opc': nan} step=9976




2022-04-22 08:48.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_9976.pt


Epoch 30/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:49.00 [info     ] FQE_20220422084832: epoch=30 step=10320 epoch=30 metrics={'time_sample_batch': 0.0001304011012232581, 'time_algorithm_update': 0.00225774978482446, 'loss': 0.44616491926379154, 'time_step': 0.0024476217669109966, 'init_value': -12.817852973937988, 'ave_value': -13.26774284109339, 'soft_opc': nan} step=10320




2022-04-22 08:49.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_10320.pt


Epoch 31/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:49.01 [info     ] FQE_20220422084832: epoch=31 step=10664 epoch=31 metrics={'time_sample_batch': 0.00012715611346932344, 'time_algorithm_update': 0.002166740423025087, 'loss': 0.4666192601489033, 'time_step': 0.002353973859964415, 'init_value': -13.052492141723633, 'ave_value': -13.362888200692664, 'soft_opc': nan} step=10664




2022-04-22 08:49.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_10664.pt


Epoch 32/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:49.02 [info     ] FQE_20220422084832: epoch=32 step=11008 epoch=32 metrics={'time_sample_batch': 0.00013725910075875215, 'time_algorithm_update': 0.002341831839361856, 'loss': 0.48830817264591364, 'time_step': 0.0025429531585338503, 'init_value': -13.506391525268555, 'ave_value': -13.64231228358322, 'soft_opc': nan} step=11008




2022-04-22 08:49.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_11008.pt


Epoch 33/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:49.02 [info     ] FQE_20220422084832: epoch=33 step=11352 epoch=33 metrics={'time_sample_batch': 0.00013104635615681493, 'time_algorithm_update': 0.002235266358353371, 'loss': 0.49921278108647743, 'time_step': 0.002426844003588654, 'init_value': -13.49612808227539, 'ave_value': -13.592905732321025, 'soft_opc': nan} step=11352




2022-04-22 08:49.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_11352.pt


Epoch 34/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:49.03 [info     ] FQE_20220422084832: epoch=34 step=11696 epoch=34 metrics={'time_sample_batch': 0.0001351500666418741, 'time_algorithm_update': 0.00230999325597009, 'loss': 0.5021824905551363, 'time_step': 0.0025062207565751185, 'init_value': -13.726407051086426, 'ave_value': -13.535235233912202, 'soft_opc': nan} step=11696




2022-04-22 08:49.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_11696.pt


Epoch 35/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:49.04 [info     ] FQE_20220422084832: epoch=35 step=12040 epoch=35 metrics={'time_sample_batch': 0.0001303830812143725, 'time_algorithm_update': 0.0022096543811088386, 'loss': 0.5261320057379212, 'time_step': 0.002401956292085869, 'init_value': -13.796165466308594, 'ave_value': -13.41372857187279, 'soft_opc': nan} step=12040




2022-04-22 08:49.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_12040.pt


Epoch 36/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:49.05 [info     ] FQE_20220422084832: epoch=36 step=12384 epoch=36 metrics={'time_sample_batch': 0.00013572185538535895, 'time_algorithm_update': 0.0022980245046837385, 'loss': 0.5364252054673988, 'time_step': 0.0024994327578433725, 'init_value': -14.318624496459961, 'ave_value': -13.870181324673599, 'soft_opc': nan} step=12384




2022-04-22 08:49.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_12384.pt


Epoch 37/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:49.06 [info     ] FQE_20220422084832: epoch=37 step=12728 epoch=37 metrics={'time_sample_batch': 0.00013553888298744378, 'time_algorithm_update': 0.002430990684864133, 'loss': 0.5621276466110938, 'time_step': 0.0026277289834133414, 'init_value': -14.771637916564941, 'ave_value': -14.091808524613713, 'soft_opc': nan} step=12728




2022-04-22 08:49.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_12728.pt


Epoch 38/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:49.07 [info     ] FQE_20220422084832: epoch=38 step=13072 epoch=38 metrics={'time_sample_batch': 0.00013154606486475744, 'time_algorithm_update': 0.002189236324886943, 'loss': 0.5702527998459287, 'time_step': 0.0023811729841454084, 'init_value': -15.0693941116333, 'ave_value': -14.442181398844774, 'soft_opc': nan} step=13072




2022-04-22 08:49.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_13072.pt


Epoch 39/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:49.08 [info     ] FQE_20220422084832: epoch=39 step=13416 epoch=39 metrics={'time_sample_batch': 0.00013737623081650844, 'time_algorithm_update': 0.002404434043307637, 'loss': 0.6039274413149457, 'time_step': 0.0026042856449304624, 'init_value': -15.537446975708008, 'ave_value': -15.166234048558525, 'soft_opc': nan} step=13416




2022-04-22 08:49.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_13416.pt


Epoch 40/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:49.09 [info     ] FQE_20220422084832: epoch=40 step=13760 epoch=40 metrics={'time_sample_batch': 0.0001358632431473843, 'time_algorithm_update': 0.002274143141369487, 'loss': 0.6157923977562162, 'time_step': 0.002473233051078264, 'init_value': -15.619030952453613, 'ave_value': -15.035113037486669, 'soft_opc': nan} step=13760




2022-04-22 08:49.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_13760.pt


Epoch 41/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:49.10 [info     ] FQE_20220422084832: epoch=41 step=14104 epoch=41 metrics={'time_sample_batch': 0.00012765582217726598, 'time_algorithm_update': 0.002161000357117764, 'loss': 0.6210348526243293, 'time_step': 0.0023452819779861806, 'init_value': -15.831415176391602, 'ave_value': -14.927644421009196, 'soft_opc': nan} step=14104




2022-04-22 08:49.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_14104.pt


Epoch 42/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:49.11 [info     ] FQE_20220422084832: epoch=42 step=14448 epoch=42 metrics={'time_sample_batch': 0.00013557908146880394, 'time_algorithm_update': 0.0024145550506059514, 'loss': 0.6157421508132545, 'time_step': 0.002611073643662209, 'init_value': -16.057605743408203, 'ave_value': -15.154907002137461, 'soft_opc': nan} step=14448




2022-04-22 08:49.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_14448.pt


Epoch 43/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:49.12 [info     ] FQE_20220422084832: epoch=43 step=14792 epoch=43 metrics={'time_sample_batch': 0.00013003030488657397, 'time_algorithm_update': 0.0021748972493548725, 'loss': 0.6352051198482513, 'time_step': 0.0023673225280850434, 'init_value': -16.421640396118164, 'ave_value': -15.186039745474133, 'soft_opc': nan} step=14792




2022-04-22 08:49.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_14792.pt


Epoch 44/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:49.13 [info     ] FQE_20220422084832: epoch=44 step=15136 epoch=44 metrics={'time_sample_batch': 0.00014123043348622877, 'time_algorithm_update': 0.002488514018613239, 'loss': 0.6494805704000904, 'time_step': 0.002698480389838995, 'init_value': -17.06381607055664, 'ave_value': -15.77413335311067, 'soft_opc': nan} step=15136




2022-04-22 08:49.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_15136.pt


Epoch 45/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:49.14 [info     ] FQE_20220422084832: epoch=45 step=15480 epoch=45 metrics={'time_sample_batch': 0.00013082872989565828, 'time_algorithm_update': 0.002250981192256129, 'loss': 0.6676255403801279, 'time_step': 0.002442634382913279, 'init_value': -17.207412719726562, 'ave_value': -15.578805242896264, 'soft_opc': nan} step=15480




2022-04-22 08:49.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_15480.pt


Epoch 46/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:49.15 [info     ] FQE_20220422084832: epoch=46 step=15824 epoch=46 metrics={'time_sample_batch': 0.00012957703235537508, 'time_algorithm_update': 0.0021625881971314895, 'loss': 0.6703216458486696, 'time_step': 0.0023532156334366908, 'init_value': -17.827552795410156, 'ave_value': -16.25751987097466, 'soft_opc': nan} step=15824




2022-04-22 08:49.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_15824.pt


Epoch 47/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:49.16 [info     ] FQE_20220422084832: epoch=47 step=16168 epoch=47 metrics={'time_sample_batch': 0.0001364662203677865, 'time_algorithm_update': 0.0023774476938469465, 'loss': 0.682980980761999, 'time_step': 0.0025808229002841684, 'init_value': -17.87101936340332, 'ave_value': -16.18389277340475, 'soft_opc': nan} step=16168




2022-04-22 08:49.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_16168.pt


Epoch 48/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:49.16 [info     ] FQE_20220422084832: epoch=48 step=16512 epoch=48 metrics={'time_sample_batch': 0.0001277881999348485, 'time_algorithm_update': 0.002133797767550446, 'loss': 0.6926359689826969, 'time_step': 0.0023220867611641106, 'init_value': -18.136873245239258, 'ave_value': -16.269136296997374, 'soft_opc': nan} step=16512




2022-04-22 08:49.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_16512.pt


Epoch 49/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:49.17 [info     ] FQE_20220422084832: epoch=49 step=16856 epoch=49 metrics={'time_sample_batch': 0.0001409136971762014, 'time_algorithm_update': 0.002477447653925696, 'loss': 0.6865697295329165, 'time_step': 0.002683722695638967, 'init_value': -17.898941040039062, 'ave_value': -16.21542823434648, 'soft_opc': nan} step=16856




2022-04-22 08:49.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_16856.pt


Epoch 50/50:   0%|          | 0/344 [00:00<?, ?it/s]



2022-04-22 08:49.18 [info     ] FQE_20220422084832: epoch=50 step=17200 epoch=50 metrics={'time_sample_batch': 0.00012994990792385367, 'time_algorithm_update': 0.0022307592769001804, 'loss': 0.6750561835265957, 'time_step': 0.0024203734342442, 'init_value': -17.90399932861328, 'ave_value': -16.156077437144535, 'soft_opc': nan} step=17200




2022-04-22 08:49.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220422084832/model_17200.pt
search iteration:  37
using hyper params:  [0.002636244890570254, 0.004081578913848124, 3.275847449954471e-05, 3]
2022-04-22 08:49.18 [debug    ] RoundIterator is selected.
2022-04-22 08:49.18 [info     ] Directory is created at d3rlpy_logs/CQL_20220422084918
2022-04-22 08:49.18 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 08:49.18 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 08:49.18 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422084918/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'vector', 'params': {'hidden_units': [12, 24, 36, 24, 12], 'activation': 'relu', 'use_batch_norm': True, 'dropout_rate': 0.2, 'use_dense': False}}, 'actor_learning_rate': 0.002636244890570254, 'actor_optim_factory': {'optim

Epoch 1/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:49.31 [info     ] CQL_20220422084918: epoch=1 step=346 epoch=1 metrics={'time_sample_batch': 0.0003484335937941005, 'time_algorithm_update': 0.03551001149105888, 'temp_loss': 4.884540256048213, 'temp': 0.9940679755514068, 'alpha_loss': -17.65093259866527, 'alpha': 1.0177901383769306, 'critic_loss': 62.517315814949875, 'actor_loss': 1.1521176377774318, 'time_step': 0.0359515468509211, 'td_error': 1.2373880713201182, 'init_value': -3.8480255603790283, 'ave_value': -3.6216879624384113} step=346
2022-04-22 08:49.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422084918/model_346.pt


Epoch 2/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:49.44 [info     ] CQL_20220422084918: epoch=2 step=692 epoch=2 metrics={'time_sample_batch': 0.0003525556167426137, 'time_algorithm_update': 0.035636216918857114, 'temp_loss': 4.960670666887581, 'temp': 0.982623757482264, 'alpha_loss': -18.35437238285307, 'alpha': 1.0543510376373468, 'critic_loss': 94.83130568575997, 'actor_loss': 3.838942760677007, 'time_step': 0.03608226155959113, 'td_error': 1.273368269290374, 'init_value': -6.378618240356445, 'ave_value': -5.953086238407082} step=692
2022-04-22 08:49.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422084918/model_692.pt


Epoch 3/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:49.57 [info     ] CQL_20220422084918: epoch=3 step=1038 epoch=3 metrics={'time_sample_batch': 0.00033903742112176266, 'time_algorithm_update': 0.03487202128923008, 'temp_loss': 4.903941620292002, 'temp': 0.9715503971011652, 'alpha_loss': -19.02058060596444, 'alpha': 1.09266651469159, 'critic_loss': 180.33004575519894, 'actor_loss': 5.891146362172386, 'time_step': 0.035304868841446894, 'td_error': 1.3105241053233259, 'init_value': -7.753227710723877, 'ave_value': -7.358021107167642} step=1038
2022-04-22 08:49.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422084918/model_1038.pt


Epoch 4/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:50.09 [info     ] CQL_20220422084918: epoch=4 step=1384 epoch=4 metrics={'time_sample_batch': 0.00034726492931388015, 'time_algorithm_update': 0.03483209513515406, 'temp_loss': 4.849397357488643, 'temp': 0.9606961230666651, 'alpha_loss': -19.717559224608316, 'alpha': 1.132864123134944, 'critic_loss': 316.1358765177644, 'actor_loss': 6.503409353983884, 'time_step': 0.03527023888736791, 'td_error': 1.2919249635482994, 'init_value': -7.532780647277832, 'ave_value': -7.180318868618093} step=1384
2022-04-22 08:50.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422084918/model_1384.pt


Epoch 5/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:50.22 [info     ] CQL_20220422084918: epoch=5 step=1730 epoch=5 metrics={'time_sample_batch': 0.00034884221291955496, 'time_algorithm_update': 0.034813470923142624, 'temp_loss': 4.7964518345849365, 'temp': 0.9500184793003722, 'alpha_loss': -20.43774349565451, 'alpha': 1.1749711980709452, 'critic_loss': 511.24484658654717, 'actor_loss': 5.172683061202827, 'time_step': 0.0352573911578669, 'td_error': 1.2850608199318778, 'init_value': -5.988244533538818, 'ave_value': -5.828110192770526} step=1730
2022-04-22 08:50.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422084918/model_1730.pt


Epoch 6/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:50.35 [info     ] CQL_20220422084918: epoch=6 step=2076 epoch=6 metrics={'time_sample_batch': 0.0003438581621026717, 'time_algorithm_update': 0.03590080297062163, 'temp_loss': 4.74300476581375, 'temp': 0.9395015682443718, 'alpha_loss': -21.19982770137015, 'alpha': 1.2190374866386369, 'critic_loss': 759.5368726432667, 'actor_loss': 3.5531483495855607, 'time_step': 0.03633782904961206, 'td_error': 1.2812802178067257, 'init_value': -4.949656963348389, 'ave_value': -4.8694185086029185} step=2076
2022-04-22 08:50.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422084918/model_2076.pt


Epoch 7/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:50.47 [info     ] CQL_20220422084918: epoch=7 step=2422 epoch=7 metrics={'time_sample_batch': 0.00034222437467189195, 'time_algorithm_update': 0.03504173466235916, 'temp_loss': 4.690974999025378, 'temp': 0.9291340465835064, 'alpha_loss': -22.00256198816906, 'alpha': 1.265114289832253, 'critic_loss': 1018.435961949343, 'actor_loss': 3.0872892237812106, 'time_step': 0.035476567428236065, 'td_error': 1.286044432589329, 'init_value': -4.904647350311279, 'ave_value': -4.850193469792307} step=2422
2022-04-22 08:50.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422084918/model_2422.pt


Epoch 8/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:51.00 [info     ] CQL_20220422084918: epoch=8 step=2768 epoch=8 metrics={'time_sample_batch': 0.00034917296701773056, 'time_algorithm_update': 0.035156425713114656, 'temp_loss': 4.639916118169795, 'temp': 0.9189050094240663, 'alpha_loss': -22.837185082408045, 'alpha': 1.3132394224922093, 'critic_loss': 1274.70741298433, 'actor_loss': 3.0991615739171903, 'time_step': 0.03559744082434329, 'td_error': 1.292087639626396, 'init_value': -5.144514560699463, 'ave_value': -5.102650731612631} step=2768
2022-04-22 08:51.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422084918/model_2768.pt


Epoch 9/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:51.13 [info     ] CQL_20220422084918: epoch=9 step=3114 epoch=9 metrics={'time_sample_batch': 0.00037016620525734964, 'time_algorithm_update': 0.036235876166062546, 'temp_loss': 4.58747720167127, 'temp': 0.9088100982194691, 'alpha_loss': -23.707743154095777, 'alpha': 1.3634490594698514, 'critic_loss': 1548.6092765675803, 'actor_loss': 3.296335625510684, 'time_step': 0.036697078302416496, 'td_error': 1.2951227265270067, 'init_value': -5.064682483673096, 'ave_value': -5.041173429588618} step=3114
2022-04-22 08:51.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422084918/model_3114.pt


Epoch 10/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:51.26 [info     ] CQL_20220422084918: epoch=10 step=3460 epoch=10 metrics={'time_sample_batch': 0.00034841361073400243, 'time_algorithm_update': 0.035543696728744946, 'temp_loss': 4.539022715794558, 'temp': 0.8988395867320155, 'alpha_loss': -24.61841529228784, 'alpha': 1.4157924056053162, 'critic_loss': 1843.9340802672282, 'actor_loss': 3.546693837022506, 'time_step': 0.03598423431374434, 'td_error': 1.302513126387879, 'init_value': -5.557705402374268, 'ave_value': -5.529265485956184} step=3460
2022-04-22 08:51.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422084918/model_3460.pt


Epoch 11/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:51.39 [info     ] CQL_20220422084918: epoch=11 step=3806 epoch=11 metrics={'time_sample_batch': 0.0003499454156511781, 'time_algorithm_update': 0.035685320121015426, 'temp_loss': 4.489475168933758, 'temp': 0.8889881031361618, 'alpha_loss': -25.571356222119636, 'alpha': 1.470327709451576, 'critic_loss': 2158.875588830496, 'actor_loss': 3.8319596520738104, 'time_step': 0.036125713690167906, 'td_error': 1.3065011144363041, 'init_value': -5.680444717407227, 'ave_value': -5.6614474743788366} step=3806
2022-04-22 08:51.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422084918/model_3806.pt


Epoch 12/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:51.52 [info     ] CQL_20220422084918: epoch=12 step=4152 epoch=12 metrics={'time_sample_batch': 0.0003445782413372415, 'time_algorithm_update': 0.0356607526713024, 'temp_loss': 4.440448820246437, 'temp': 0.8792545840230291, 'alpha_loss': -26.55516930949481, 'alpha': 1.5271107644014965, 'critic_loss': 2480.9367767510385, 'actor_loss': 4.169111778281327, 'time_step': 0.03609980875357038, 'td_error': 1.3133342784255775, 'init_value': -6.066677570343018, 'ave_value': -6.048216349663696} step=4152
2022-04-22 08:51.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422084918/model_4152.pt


Epoch 13/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:52.05 [info     ] CQL_20220422084918: epoch=13 step=4498 epoch=13 metrics={'time_sample_batch': 0.0003438085489879454, 'time_algorithm_update': 0.03652805193311217, 'temp_loss': 4.390416148080991, 'temp': 0.8696365607956241, 'alpha_loss': -27.58315262215675, 'alpha': 1.5861953004247191, 'critic_loss': 2764.951597357072, 'actor_loss': 4.514793528297733, 'time_step': 0.03696441512576418, 'td_error': 1.3188675738215487, 'init_value': -6.303458213806152, 'ave_value': -6.291534291742649} step=4498
2022-04-22 08:52.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422084918/model_4498.pt


Epoch 14/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:52.17 [info     ] CQL_20220422084918: epoch=14 step=4844 epoch=14 metrics={'time_sample_batch': 0.00033787082385465587, 'time_algorithm_update': 0.034794070128071515, 'temp_loss': 4.34278382180054, 'temp': 0.8601307185054514, 'alpha_loss': -28.652943528456493, 'alpha': 1.6476632477231108, 'critic_loss': 2980.2349931132585, 'actor_loss': 4.908447841688387, 'time_step': 0.03521784399286171, 'td_error': 1.326987039814729, 'init_value': -6.743101596832275, 'ave_value': -6.731886300728637} step=4844
2022-04-22 08:52.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422084918/model_4844.pt


Epoch 15/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:52.30 [info     ] CQL_20220422084918: epoch=15 step=5190 epoch=15 metrics={'time_sample_batch': 0.00034615828122706773, 'time_algorithm_update': 0.035767203810587095, 'temp_loss': 4.2965482549171226, 'temp': 0.8507302293198646, 'alpha_loss': -29.761144990865894, 'alpha': 1.7115802292878917, 'critic_loss': 3127.4225375666097, 'actor_loss': 5.322921065236792, 'time_step': 0.03620206896280278, 'td_error': 1.3355994052792803, 'init_value': -7.161276340484619, 'ave_value': -7.151482721819817} step=5190
2022-04-22 08:52.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422084918/model_5190.pt


Epoch 16/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:52.43 [info     ] CQL_20220422084918: epoch=16 step=5536 epoch=16 metrics={'time_sample_batch': 0.00035170599215292515, 'time_algorithm_update': 0.03490150939522451, 'temp_loss': 4.249865458879857, 'temp': 0.8414351902256122, 'alpha_loss': -30.917996241178127, 'alpha': 1.7780329516168274, 'critic_loss': 3210.9109020784413, 'actor_loss': 5.81487006672545, 'time_step': 0.035348508399345974, 'td_error': 1.3436317685305894, 'init_value': -7.45188570022583, 'ave_value': -7.4519125186419135} step=5536
2022-04-22 08:52.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422084918/model_5536.pt


Epoch 17/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:52.55 [info     ] CQL_20220422084918: epoch=17 step=5882 epoch=17 metrics={'time_sample_batch': 0.00034935832712691645, 'time_algorithm_update': 0.035108703409316225, 'temp_loss': 4.202695989884393, 'temp': 0.8322456024285686, 'alpha_loss': -32.1202510491961, 'alpha': 1.84711483758309, 'critic_loss': 3296.1290530166184, 'actor_loss': 6.327755521487639, 'time_step': 0.03555536339048705, 'td_error': 1.3590886098016075, 'init_value': -8.311503410339355, 'ave_value': -8.300902775149032} step=5882
2022-04-22 08:52.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422084918/model_5882.pt


Epoch 18/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:53.08 [info     ] CQL_20220422084918: epoch=18 step=6228 epoch=18 metrics={'time_sample_batch': 0.00035553929433657257, 'time_algorithm_update': 0.03502303120717837, 'temp_loss': 4.158393413345249, 'temp': 0.8231559397512778, 'alpha_loss': -33.368097680152495, 'alpha': 1.9189178819601247, 'critic_loss': 3395.2876922078217, 'actor_loss': 6.889325629769033, 'time_step': 0.03547015631129976, 'td_error': 1.3704146392949517, 'init_value': -8.790172576904297, 'ave_value': -8.780669364685016} step=6228
2022-04-22 08:53.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422084918/model_6228.pt


Epoch 19/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:53.21 [info     ] CQL_20220422084918: epoch=19 step=6574 epoch=19 metrics={'time_sample_batch': 0.0003496050145584724, 'time_algorithm_update': 0.03570585788329902, 'temp_loss': 4.112028718683761, 'temp': 0.8141680551057606, 'alpha_loss': -34.66333167676981, 'alpha': 1.9935314283205594, 'critic_loss': 3449.686888942829, 'actor_loss': 7.440880475016687, 'time_step': 0.03615419230709186, 'td_error': 1.3828984145213372, 'init_value': -9.275908470153809, 'ave_value': -9.270677745008763} step=6574
2022-04-22 08:53.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422084918/model_6574.pt


Epoch 20/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:53.34 [info     ] CQL_20220422084918: epoch=20 step=6920 epoch=20 metrics={'time_sample_batch': 0.0003426977664749057, 'time_algorithm_update': 0.03627724110046563, 'temp_loss': 4.068406977405438, 'temp': 0.805278903314833, 'alpha_loss': -36.01507277295769, 'alpha': 2.071070923281543, 'critic_loss': 3558.76049522444, 'actor_loss': 8.049366594049973, 'time_step': 0.03670887175322957, 'td_error': 1.3976160783980094, 'init_value': -9.862515449523926, 'ave_value': -9.859762699810913} step=6920
2022-04-22 08:53.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422084918/model_6920.pt


Epoch 21/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:53.47 [info     ] CQL_20220422084918: epoch=21 step=7266 epoch=21 metrics={'time_sample_batch': 0.00034657241292082504, 'time_algorithm_update': 0.035075359950864936, 'temp_loss': 4.022937124864215, 'temp': 0.7964882550901071, 'alpha_loss': -37.41469828655265, 'alpha': 2.151647285229898, 'critic_loss': 3552.6942329186236, 'actor_loss': 8.608321167830098, 'time_step': 0.035512725741877034, 'td_error': 1.4127837235971086, 'init_value': -10.439735412597656, 'ave_value': -10.4368058887914} step=7266
2022-04-22 08:53.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422084918/model_7266.pt


Epoch 22/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:53.59 [info     ] CQL_20220422084918: epoch=22 step=7612 epoch=22 metrics={'time_sample_batch': 0.0003544236883262678, 'time_algorithm_update': 0.035075625932285553, 'temp_loss': 3.9786730639507315, 'temp': 0.7877953312645068, 'alpha_loss': -38.87241238941347, 'alpha': 2.2353692764491706, 'critic_loss': 3623.3524247538835, 'actor_loss': 9.259876816258954, 'time_step': 0.03552148383476831, 'td_error': 1.4307994509266744, 'init_value': -11.120220184326172, 'ave_value': -11.116587250421487} step=7612
2022-04-22 08:53.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422084918/model_7612.pt


Epoch 23/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:54.12 [info     ] CQL_20220422084918: epoch=23 step=7958 epoch=23 metrics={'time_sample_batch': 0.00035568744460971367, 'time_algorithm_update': 0.0357382276843738, 'temp_loss': 3.9355530352950785, 'temp': 0.7791970628878974, 'alpha_loss': -40.383376248310064, 'alpha': 2.3223581141819154, 'critic_loss': 3569.902558255058, 'actor_loss': 9.872827849636188, 'time_step': 0.036188416398329544, 'td_error': 1.447227356657898, 'init_value': -11.693181991577148, 'ave_value': -11.689843338459461} step=7958
2022-04-22 08:54.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422084918/model_7958.pt


Epoch 24/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:54.25 [info     ] CQL_20220422084918: epoch=24 step=8304 epoch=24 metrics={'time_sample_batch': 0.00035748936537373274, 'time_algorithm_update': 0.03595331914163049, 'temp_loss': 3.8919364255287743, 'temp': 0.7706933975908797, 'alpha_loss': -41.95109846964048, 'alpha': 2.4127262665357203, 'critic_loss': 3626.237536832776, 'actor_loss': 10.543516872935212, 'time_step': 0.03640322464738967, 'td_error': 1.465147750656161, 'init_value': -12.262456893920898, 'ave_value': -12.262600786915431} step=8304
2022-04-22 08:54.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422084918/model_8304.pt


Epoch 25/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:54.38 [info     ] CQL_20220422084918: epoch=25 step=8650 epoch=25 metrics={'time_sample_batch': 0.0003363872539101308, 'time_algorithm_update': 0.035433769915145255, 'temp_loss': 3.8491937659379376, 'temp': 0.7622843179744103, 'alpha_loss': -43.586108246290614, 'alpha': 2.5066244361028507, 'critic_loss': 3691.11342166614, 'actor_loss': 11.137667937085808, 'time_step': 0.03585418800398105, 'td_error': 1.4818454667115988, 'init_value': -12.757338523864746, 'ave_value': -12.76156118084549} step=8650
2022-04-22 08:54.38 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422084918/model_8650.pt


Epoch 26/50:   0%|          | 0/346 [00:00<?, ?it/s]

2022-04-22 08:54.50 [info     ] CQL_20220422084918: epoch=26 step=8996 epoch=26 metrics={'time_sample_batch': 0.00034888493532390263, 'time_algorithm_update': 0.03487764893239633, 'temp_loss': 3.807430429265678, 'temp': 0.7539667252860317, 'alpha_loss': -45.28020405631534, 'alpha': 2.6041824142367855, 'critic_loss': 3552.2794549313585, 'actor_loss': 11.696581545592732, 'time_step': 0.03530953936494155, 'td_error': 1.5035161830144845, 'init_value': -13.511636734008789, 'ave_value': -13.508767803476116} step=8996
2022-04-22 08:54.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422084918/model_8996.pt


Epoch 27/50:   0%|          | 0/346 [00:00<?, ?it/s]

## Reading hyper params from file

In [None]:
with open("hyperparams_cql.pkl", "rb") as f:
    data = pkl.load(f)

print(data)