# Sample Workflow for d3rlpy Experiments

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools
import math
import subprocess
import os
import d3rlpy
plt.style.use('matplotlibrc')

from Python.data_sampler import *

## Building an MDPDataset

We first read in a large batch of samples from the file. As `d3rlpy` wants it in the form (observations, actions, rewards, terminal flags), we go ahead and do that. Here's a helper function to get a dataset from a list of chunks of your choosing.

In [2]:
def get_dataset(chunks : list, batch_size=30000, 
                path="collected_data/rl_det_small.txt") -> d3rlpy.dataset.MDPDataset :
    random.seed(0)
    samples = DataSampler(path_to_data=path)
    states = []
    actions = []
    rewards = []
    next_states = []
    for chunk in chunks:
        samples.use_chunk(chunk)
        samples.read_chunk()
        [statesChunk, actionsChunk, rewardsChunk, nextStatesChunk] = samples.get_batch(batch_size)
        states.append(statesChunk)
        actions.append(actionsChunk)
        rewards.append(rewardsChunk)
        next_states.append(nextStatesChunk)
    states = torch.cat(states)
    actions = torch.cat(actions)
    rewards = torch.cat(rewards)
    next_states = torch.cat(next_states)
    terminals = np.zeros(len(states))
    terminals[::100] = 1 #episode length 100, change if necessary
    print(states.shape)
    dataset = d3rlpy.dataset.MDPDataset(states.numpy(), 
                                        actions.numpy(), 
                                        rewards.numpy(), terminals)
    return dataset

We can build the dataset from there, just like this, and split into train and test sets.

In [14]:
dataset = get_dataset([3,5,7,9], path="collected_data/rl_stoch_small.txt")

[ 0.00000000e+00  7.95731469e+08 -1.23489108e-01 -3.79999531e-03
  2.19998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.60168026e-02 -3.40510126e-01  6.00000000e-01]
Read chunk # 4 out of 10000
[ 0.00000000e+00  7.95731469e+08 -1.28589108e-01  1.20000047e-02
  1.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.75212759e-03 -3.52719043e-01  6.00000000e-01]
Read chunk # 6 out of 10000
[ 0.00000000e+00  7.95731469e+08  9.97108923e-02 -1.79999531e-03
 -6.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.78416413e-01  3.07580560e-01 -6.00000000e-01]
Read chunk # 8 out of 10000
[ 0.00000000e+00  7.95731469e+08 -1.03989108e-01  7.00000469e-03
 -8.60001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.65974295e-01 -2.19295880e-01  6.00000000e-01]
Read chunk # 10 out of 10000
torch.Size([111080, 6])


In [15]:
print("The behavior policy value statistics are:")
dataset.compute_stats()['return']

The behavior policy value statistics are:


{'mean': -5.6349497,
 'std': 2.8907404,
 'min': -12.561853,
 'max': 0.0,
 'histogram': (array([ 4, 15, 52, 42, 32, 38, 36, 85, 65, 56, 87, 98, 90, 78, 67, 80, 86,
         54, 36, 10]),
  array([-12.561853  , -11.933761  , -11.305668  , -10.677575  ,
         -10.049482  ,  -9.421391  ,  -8.793298  ,  -8.165205  ,
          -7.537112  ,  -6.9090195 ,  -6.2809267 ,  -5.652834  ,
          -5.024741  ,  -4.396649  ,  -3.768556  ,  -3.1404634 ,
          -2.5123706 ,  -1.884278  ,  -1.2561853 ,  -0.62809265,
           0.        ], dtype=float32))}

In [16]:
from sklearn.model_selection import train_test_split
train_episodes, test_episodes = train_test_split(dataset, test_size=0.2)

## Setting up an Algorithm

In [17]:
from d3rlpy.algos import CQL

from d3rlpy.preprocessing import MinMaxActionScaler
action_scaler = MinMaxActionScaler(minimum=-0.6, maximum=0.6)
#cql = CQL(action_scaler=action_scaler)

model = CQL(q_func_factory='mean', #qr -> quantile regression q function, but you don't have to use this
            reward_scaler='standard',
            action_scaler=action_scaler,
          actor_learning_rate=1e-5, 
          critic_learning_rate=0.0003, 
            use_gpu=False) #change it to true if you have one
model.build_with_dataset(dataset)

In [18]:
from d3rlpy.metrics.scorer import td_error_scorer
from d3rlpy.metrics.scorer import average_value_estimation_scorer
from d3rlpy.metrics.scorer import initial_state_value_estimation_scorer

# calculate metrics with test dataset
ave_error_init = average_value_estimation_scorer(model, test_episodes)
print(ave_error_init)

0.14527341764378593


In [8]:
%load_ext tensorboard
%tensorboard --logdir runs

In [19]:
model.fit(train_episodes,
        eval_episodes=test_episodes,
        n_epochs=5, 
        tensorboard_dir='runs',
        scorers={
            'td_error': td_error_scorer,
            'init_value': initial_state_value_estimation_scorer,
            'ave_value': average_value_estimation_scorer
        })

2022-04-07 20:17.42 [debug    ] RoundIterator is selected.
2022-04-07 20:17.42 [info     ] Directory is created at d3rlpy_logs/CQL_20220407201742
2022-04-07 20:17.42 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-07 20:17.42 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-07 20:17.42 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220407201742/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 1e-05, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'alpha_learning_rate': 0.0001, 'alpha_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'alpha_threshold': 10.0, 'batch_size': 256, 'conser

Epoch 1/5:   0%|          | 0/343 [00:00<?, ?it/s]

2022-04-07 20:18.08 [info     ] CQL_20220407201742: epoch=1 step=343 epoch=1 metrics={'time_sample_batch': 0.0003316763886209827, 'time_algorithm_update': 0.0751432427164417, 'temp_loss': 4.685994176753408, 'temp': 0.983123943339979, 'alpha_loss': -9.663411787578038, 'alpha': 1.0142724656502637, 'critic_loss': 10.895247617198844, 'actor_loss': 0.31483166424269754, 'time_step': 0.07555461138399976, 'td_error': 3.2080431202054194, 'init_value': -0.8907099962234497, 'ave_value': -0.8926067945390094} step=343
2022-04-07 20:18.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220407201742/model_343.pt


Epoch 2/5:   0%|          | 0/343 [00:00<?, ?it/s]

2022-04-07 20:18.32 [info     ] CQL_20220407201742: epoch=2 step=686 epoch=2 metrics={'time_sample_batch': 0.0003129448904587993, 'time_algorithm_update': 0.06703745310925187, 'temp_loss': 2.988745463485273, 'temp': 0.9548389737877137, 'alpha_loss': 3.04989481067866, 'alpha': 1.0203838549967061, 'critic_loss': -1.8484127205245349, 'actor_loss': 3.9301308563777377, 'time_step': 0.06742987479829927, 'td_error': 6.290793203723951, 'init_value': -2.344316005706787, 'ave_value': -2.3325591981343483} step=686
2022-04-07 20:18.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220407201742/model_686.pt


Epoch 3/5:   0%|          | 0/343 [00:00<?, ?it/s]

2022-04-07 20:18.57 [info     ] CQL_20220407201742: epoch=3 step=1029 epoch=3 metrics={'time_sample_batch': 0.0003350066026515238, 'time_algorithm_update': 0.0703233365762338, 'temp_loss': 1.5871880928559483, 'temp': 0.9356211336986664, 'alpha_loss': 8.64508952621816, 'alpha': 0.9925460017804849, 'critic_loss': -6.64621107779856, 'actor_loss': 6.427049009862516, 'time_step': 0.07074148592378933, 'td_error': 4.139094522106542, 'init_value': -2.656848192214966, 'ave_value': -2.6353507106727614} step=1029
2022-04-07 20:18.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220407201742/model_1029.pt


Epoch 4/5:   0%|          | 0/343 [00:00<?, ?it/s]

2022-04-07 20:19.22 [info     ] CQL_20220407201742: epoch=4 step=1372 epoch=4 metrics={'time_sample_batch': 0.00033195859836767435, 'time_algorithm_update': 0.07073451617716353, 'temp_loss': 0.7756605448945271, 'temp': 0.9236333154728392, 'alpha_loss': 11.25295761375316, 'alpha': 0.9514596969671222, 'critic_loss': -8.302424507307936, 'actor_loss': 8.350151242737173, 'time_step': 0.07114918864503199, 'td_error': 4.421148879792159, 'init_value': -3.4863736629486084, 'ave_value': -3.456131781871948} step=1372
2022-04-07 20:19.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220407201742/model_1372.pt


Epoch 5/5:   0%|          | 0/343 [00:00<?, ?it/s]

2022-04-07 20:19.50 [info     ] CQL_20220407201742: epoch=5 step=1715 epoch=5 metrics={'time_sample_batch': 0.000346282480757021, 'time_algorithm_update': 0.07982820116048651, 'temp_loss': 0.3819195845492901, 'temp': 0.9162918164153141, 'alpha_loss': 12.809436689660432, 'alpha': 0.9108857491621123, 'critic_loss': -8.761230831591103, 'actor_loss': 10.82426604724139, 'time_step': 0.08026000014547009, 'td_error': 6.248317634214251, 'init_value': -5.737633228302002, 'ave_value': -5.695264149381874} step=1715
2022-04-07 20:19.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220407201742/model_1715.pt


[(1,
  {'time_sample_batch': 0.0003316763886209827,
   'time_algorithm_update': 0.0751432427164417,
   'temp_loss': 4.685994176753408,
   'temp': 0.983123943339979,
   'alpha_loss': -9.663411787578038,
   'alpha': 1.0142724656502637,
   'critic_loss': 10.895247617198844,
   'actor_loss': 0.31483166424269754,
   'time_step': 0.07555461138399976,
   'td_error': 3.2080431202054194,
   'init_value': -0.8907099962234497,
   'ave_value': -0.8926067945390094}),
 (2,
  {'time_sample_batch': 0.0003129448904587993,
   'time_algorithm_update': 0.06703745310925187,
   'temp_loss': 2.988745463485273,
   'temp': 0.9548389737877137,
   'alpha_loss': 3.04989481067866,
   'alpha': 1.0203838549967061,
   'critic_loss': -1.8484127205245349,
   'actor_loss': 3.9301308563777377,
   'time_step': 0.06742987479829927,
   'td_error': 6.290793203723951,
   'init_value': -2.344316005706787,
   'ave_value': -2.3325591981343483}),
 (3,
  {'time_sample_batch': 0.0003350066026515238,
   'time_algorithm_update': 0.07

## Off-Policy Evaluation

We do get some metrics on a test set of initial state value and average value. However, these estimates (using the critic's Q-function) of model performance are biased. They're useful for validation during training, but not much else. Instead, we fit a Q-function to the data (or a separate dataset, as I've done here) separately and evaluate the model's performance on it.

Feel free to change the chunks and number of steps.

In [20]:
from d3rlpy.ope import FQE
# metrics to evaluate with
from d3rlpy.metrics.scorer import soft_opc_scorer


ope_dataset = get_dataset([2,4,6,8], path="collected_data/rl_det_small.txt") #change if you'd prefer different chunks
ope_train_episodes, ope_test_episodes = train_test_split(ope_dataset, test_size=0.2)

fqe = FQE(algo=model, action_scaler = action_scaler, use_gpu=False) #change this if you have one!
fqe.fit(ope_train_episodes, eval_episodes=ope_test_episodes,
        tensorboard_dir='runs',
        n_epochs=50, n_steps_per_epoch=1000, #change if overfitting/underfitting
        scorers={
           'init_value': initial_state_value_estimation_scorer,
            'ave_value': average_value_estimation_scorer,
           'soft_opc': soft_opc_scorer(return_threshold=0)
        })

[ 0.00000000e+00  7.95731469e+08 -1.03891077e-02 -1.41999953e-02
 -2.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.78778459e-03 -1.34615461e-02  4.84073546e-02]
Read chunk # 3 out of 10000
[ 0.00000000e+00  7.95731469e+08 -7.24891077e-02 -1.35999953e-02
 -4.20001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.23311010e-02 -1.64283998e-01  6.00000000e-01]
Read chunk # 5 out of 10000
[ 0.00000000e+00  7.95731469e+08  7.01089229e-03 -4.19999531e-03
  7.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.21623335e-01 -2.86362315e-02 -8.00043364e-02]
Read chunk # 7 out of 10000
[ 0.00000000e+00  7.95731469e+08 -1.03989108e-01 -1.37999953e-02
  7.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.76352555e-01 -3.26280816e-01  6.00000000e-01]
Read chunk # 9 out of 10000
torch.Size([111080, 6])
2022-04-07 20:19.51 [debug    ] RoundIterator is selected.
2022-04-07 20:19.51 [info     ] Directory is created at d3rlpy_logs/FQE_2022040720195

Epoch 1/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:19.53 [info     ] FQE_20220407201951: epoch=1 step=878 epoch=1 metrics={'time_sample_batch': 0.00013968911963877753, 'time_algorithm_update': 0.0018511578814043813, 'loss': 0.00017434491303991263, 'time_step': 0.0020544406764871166, 'init_value': -0.14161010086536407, 'ave_value': -0.14164950612826305, 'soft_opc': nan} step=878




2022-04-07 20:19.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_878.pt


Epoch 2/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:19.55 [info     ] FQE_20220407201951: epoch=2 step=1756 epoch=2 metrics={'time_sample_batch': 0.00013360727349283482, 'time_algorithm_update': 0.001868682461610416, 'loss': 0.0005978871645425246, 'time_step': 0.0020631492002135257, 'init_value': -0.3587397336959839, 'ave_value': -0.3586608218960169, 'soft_opc': nan} step=1756




2022-04-07 20:19.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_1756.pt


Epoch 3/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:19.57 [info     ] FQE_20220407201951: epoch=3 step=2634 epoch=3 metrics={'time_sample_batch': 0.00013338270382892025, 'time_algorithm_update': 0.001851950799685676, 'loss': 0.001988239196115851, 'time_step': 0.0020455303930747484, 'init_value': -0.5455045104026794, 'ave_value': -0.5452594495059389, 'soft_opc': nan} step=2634




2022-04-07 20:19.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_2634.pt


Epoch 4/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:19.59 [info     ] FQE_20220407201951: epoch=4 step=3512 epoch=4 metrics={'time_sample_batch': 0.00013503995734629706, 'time_algorithm_update': 0.0019145256294476416, 'loss': 0.0036624751454680227, 'time_step': 0.0021117569916883744, 'init_value': -0.6928534507751465, 'ave_value': -0.6925587982699324, 'soft_opc': nan} step=3512




2022-04-07 20:19.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_3512.pt


Epoch 5/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:20.01 [info     ] FQE_20220407201951: epoch=5 step=4390 epoch=5 metrics={'time_sample_batch': 0.0001464213217037957, 'time_algorithm_update': 0.0020950997337393445, 'loss': 0.005523266353713412, 'time_step': 0.002309610468923095, 'init_value': -0.8263406157493591, 'ave_value': -0.8261094028715282, 'soft_opc': nan} step=4390




2022-04-07 20:20.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_4390.pt


Epoch 6/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:20.04 [info     ] FQE_20220407201951: epoch=6 step=5268 epoch=6 metrics={'time_sample_batch': 0.00013806282252005401, 'time_algorithm_update': 0.0018850977708647083, 'loss': 0.007948995087319752, 'time_step': 0.002086570702815653, 'init_value': -0.972024142742157, 'ave_value': -0.971640245534968, 'soft_opc': nan} step=5268




2022-04-07 20:20.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_5268.pt


Epoch 7/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:20.06 [info     ] FQE_20220407201951: epoch=7 step=6146 epoch=7 metrics={'time_sample_batch': 0.00014298651919006486, 'time_algorithm_update': 0.002056941356094117, 'loss': 0.0101879981528924, 'time_step': 0.0022688536546224885, 'init_value': -1.0649253129959106, 'ave_value': -1.0645496407329278, 'soft_opc': nan} step=6146




2022-04-07 20:20.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_6146.pt


Epoch 8/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:20.08 [info     ] FQE_20220407201951: epoch=8 step=7024 epoch=8 metrics={'time_sample_batch': 0.00013978850597129596, 'time_algorithm_update': 0.0019912008272488188, 'loss': 0.0118732842714722, 'time_step': 0.002188912285216034, 'init_value': -1.1291826963424683, 'ave_value': -1.1287491821495417, 'soft_opc': nan} step=7024




2022-04-07 20:20.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_7024.pt


Epoch 9/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:20.10 [info     ] FQE_20220407201951: epoch=9 step=7902 epoch=9 metrics={'time_sample_batch': 0.0001353158494606105, 'time_algorithm_update': 0.0018772060615869753, 'loss': 0.013625111107221383, 'time_step': 0.002070951298863578, 'init_value': -1.2057225704193115, 'ave_value': -1.2053417390971835, 'soft_opc': nan} step=7902




2022-04-07 20:20.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_7902.pt


Epoch 10/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:20.12 [info     ] FQE_20220407201951: epoch=10 step=8780 epoch=10 metrics={'time_sample_batch': 0.00013268672795393473, 'time_algorithm_update': 0.0018135154437367085, 'loss': 0.015098140058017512, 'time_step': 0.0020013110784300367, 'init_value': -1.2827348709106445, 'ave_value': -1.2823601973339198, 'soft_opc': nan} step=8780




2022-04-07 20:20.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_8780.pt


Epoch 11/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:20.14 [info     ] FQE_20220407201951: epoch=11 step=9658 epoch=11 metrics={'time_sample_batch': 0.00013820945809262216, 'time_algorithm_update': 0.0020244341504872523, 'loss': 0.015893354086347865, 'time_step': 0.002222861678833842, 'init_value': -1.2878590822219849, 'ave_value': -1.2875085428081063, 'soft_opc': nan} step=9658




2022-04-07 20:20.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_9658.pt


Epoch 12/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:20.17 [info     ] FQE_20220407201951: epoch=12 step=10536 epoch=12 metrics={'time_sample_batch': 0.00013884352117030116, 'time_algorithm_update': 0.001960855410147908, 'loss': 0.01670567717185624, 'time_step': 0.0021598499293750944, 'init_value': -1.2635263204574585, 'ave_value': -1.2630503940216176, 'soft_opc': nan} step=10536




2022-04-07 20:20.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_10536.pt


Epoch 13/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:20.19 [info     ] FQE_20220407201951: epoch=13 step=11414 epoch=13 metrics={'time_sample_batch': 0.0001378890322118251, 'time_algorithm_update': 0.0020940740993734375, 'loss': 0.016807072452395422, 'time_step': 0.002292063621833819, 'init_value': -1.329136848449707, 'ave_value': -1.3288833156478477, 'soft_opc': nan} step=11414




2022-04-07 20:20.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_11414.pt


Epoch 14/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:20.21 [info     ] FQE_20220407201951: epoch=14 step=12292 epoch=14 metrics={'time_sample_batch': 0.0001319600672276525, 'time_algorithm_update': 0.0019002935609404752, 'loss': 0.017267204377485603, 'time_step': 0.0020889988792784395, 'init_value': -1.3035989999771118, 'ave_value': -1.3035099054611847, 'soft_opc': nan} step=12292




2022-04-07 20:20.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_12292.pt


Epoch 15/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:20.23 [info     ] FQE_20220407201951: epoch=15 step=13170 epoch=15 metrics={'time_sample_batch': 0.0001314310929769807, 'time_algorithm_update': 0.0018178401069380429, 'loss': 0.01657554216225951, 'time_step': 0.002007786396695704, 'init_value': -1.321296215057373, 'ave_value': -1.3212653598869852, 'soft_opc': nan} step=13170




2022-04-07 20:20.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_13170.pt


Epoch 16/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:20.25 [info     ] FQE_20220407201951: epoch=16 step=14048 epoch=16 metrics={'time_sample_batch': 0.00013561428000552237, 'time_algorithm_update': 0.001885271561172937, 'loss': 0.016408117045324376, 'time_step': 0.0020745753698848644, 'init_value': -1.3324601650238037, 'ave_value': -1.3326752208500294, 'soft_opc': nan} step=14048




2022-04-07 20:20.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_14048.pt


Epoch 17/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:20.27 [info     ] FQE_20220407201951: epoch=17 step=14926 epoch=17 metrics={'time_sample_batch': 0.0001377882881425236, 'time_algorithm_update': 0.001962012201887057, 'loss': 0.017231093800269045, 'time_step': 0.0021595191846947464, 'init_value': -1.3534801006317139, 'ave_value': -1.3535727238522257, 'soft_opc': nan} step=14926




2022-04-07 20:20.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_14926.pt


Epoch 18/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:20.29 [info     ] FQE_20220407201951: epoch=18 step=15804 epoch=18 metrics={'time_sample_batch': 0.0001402479640986762, 'time_algorithm_update': 0.0019014842960992, 'loss': 0.01880874426652763, 'time_step': 0.002101572064984089, 'init_value': -1.3891674280166626, 'ave_value': -1.3891581251671248, 'soft_opc': nan} step=15804




2022-04-07 20:20.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_15804.pt


Epoch 19/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:20.32 [info     ] FQE_20220407201951: epoch=19 step=16682 epoch=19 metrics={'time_sample_batch': 0.00013319560770021755, 'time_algorithm_update': 0.0018420898289778238, 'loss': 0.019271427850374105, 'time_step': 0.0020350559968883193, 'init_value': -1.419416069984436, 'ave_value': -1.4194773630144026, 'soft_opc': nan} step=16682




2022-04-07 20:20.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_16682.pt


Epoch 20/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:20.34 [info     ] FQE_20220407201951: epoch=20 step=17560 epoch=20 metrics={'time_sample_batch': 0.00013807368441431833, 'time_algorithm_update': 0.0019072196478181114, 'loss': 0.02089307879947779, 'time_step': 0.002105617577502831, 'init_value': -1.4322361946105957, 'ave_value': -1.4323274309276515, 'soft_opc': nan} step=17560




2022-04-07 20:20.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_17560.pt


Epoch 21/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:20.36 [info     ] FQE_20220407201951: epoch=21 step=18438 epoch=21 metrics={'time_sample_batch': 0.00014038129385077056, 'time_algorithm_update': 0.0021106667290515945, 'loss': 0.01975251328030832, 'time_step': 0.0023133045991623863, 'init_value': -1.4155908823013306, 'ave_value': -1.415757464483477, 'soft_opc': nan} step=18438




2022-04-07 20:20.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_18438.pt


Epoch 22/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:20.38 [info     ] FQE_20220407201951: epoch=22 step=19316 epoch=22 metrics={'time_sample_batch': 0.00013504375900928958, 'time_algorithm_update': 0.0018709210980182899, 'loss': 0.01984872690622554, 'time_step': 0.0020663700234102495, 'init_value': -1.4433122873306274, 'ave_value': -1.4435057059661383, 'soft_opc': nan} step=19316




2022-04-07 20:20.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_19316.pt


Epoch 23/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:20.40 [info     ] FQE_20220407201951: epoch=23 step=20194 epoch=23 metrics={'time_sample_batch': 0.0001315926636541623, 'time_algorithm_update': 0.001820720952844294, 'loss': 0.02115246498422303, 'time_step': 0.002011414812474696, 'init_value': -1.4736511707305908, 'ave_value': -1.4737781139367645, 'soft_opc': nan} step=20194




2022-04-07 20:20.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_20194.pt


Epoch 24/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:20.42 [info     ] FQE_20220407201951: epoch=24 step=21072 epoch=24 metrics={'time_sample_batch': 0.00013523601453776783, 'time_algorithm_update': 0.00182033942880826, 'loss': 0.021889962922218655, 'time_step': 0.0020167936225143814, 'init_value': -1.4915474653244019, 'ave_value': -1.491640827165175, 'soft_opc': nan} step=21072




2022-04-07 20:20.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_21072.pt


Epoch 25/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:20.44 [info     ] FQE_20220407201951: epoch=25 step=21950 epoch=25 metrics={'time_sample_batch': 0.00013995089129054738, 'time_algorithm_update': 0.002019852603486567, 'loss': 0.022000984293993588, 'time_step': 0.00222511362106219, 'init_value': -1.438968300819397, 'ave_value': -1.4389339519669118, 'soft_opc': nan} step=21950




2022-04-07 20:20.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_21950.pt


Epoch 26/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:20.47 [info     ] FQE_20220407201951: epoch=26 step=22828 epoch=26 metrics={'time_sample_batch': 0.00014397386537869049, 'time_algorithm_update': 0.001974986734585773, 'loss': 0.0198769647857071, 'time_step': 0.002182609942616526, 'init_value': -1.406660556793213, 'ave_value': -1.4066520274406316, 'soft_opc': nan} step=22828




2022-04-07 20:20.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_22828.pt


Epoch 27/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:20.49 [info     ] FQE_20220407201951: epoch=27 step=23706 epoch=27 metrics={'time_sample_batch': 0.0001413729847971019, 'time_algorithm_update': 0.0020247249777061794, 'loss': 0.02036328669761415, 'time_step': 0.0022303134813938708, 'init_value': -1.4313329458236694, 'ave_value': -1.4313314731701963, 'soft_opc': nan} step=23706




2022-04-07 20:20.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_23706.pt


Epoch 28/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:20.51 [info     ] FQE_20220407201951: epoch=28 step=24584 epoch=28 metrics={'time_sample_batch': 0.0001364503743165175, 'time_algorithm_update': 0.00194985502673173, 'loss': 0.021514005857893596, 'time_step': 0.0021471502026012656, 'init_value': -1.47808837890625, 'ave_value': -1.4778920096126853, 'soft_opc': nan} step=24584




2022-04-07 20:20.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_24584.pt


Epoch 29/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:20.53 [info     ] FQE_20220407201951: epoch=29 step=25462 epoch=29 metrics={'time_sample_batch': 0.00013530743149255565, 'time_algorithm_update': 0.0018562502089828456, 'loss': 0.02111180328364772, 'time_step': 0.0020531828691313096, 'init_value': -1.4379056692123413, 'ave_value': -1.4380262057440065, 'soft_opc': nan} step=25462




2022-04-07 20:20.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_25462.pt


Epoch 30/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:20.56 [info     ] FQE_20220407201951: epoch=30 step=26340 epoch=30 metrics={'time_sample_batch': 0.00014511898058150514, 'time_algorithm_update': 0.0020504953649429633, 'loss': 0.02046939302197639, 'time_step': 0.0022582005802754116, 'init_value': -1.471402645111084, 'ave_value': -1.4714676561241982, 'soft_opc': nan} step=26340




2022-04-07 20:20.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_26340.pt


Epoch 31/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:20.58 [info     ] FQE_20220407201951: epoch=31 step=27218 epoch=31 metrics={'time_sample_batch': 0.00015151690785053921, 'time_algorithm_update': 0.0022090741333494427, 'loss': 0.02082713299145893, 'time_step': 0.00242812785580924, 'init_value': -1.4621622562408447, 'ave_value': -1.4621607380381592, 'soft_opc': nan} step=27218




2022-04-07 20:20.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_27218.pt


Epoch 32/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:21.00 [info     ] FQE_20220407201951: epoch=32 step=28096 epoch=32 metrics={'time_sample_batch': 0.00014061047981974746, 'time_algorithm_update': 0.00196999596571868, 'loss': 0.02115431909218996, 'time_step': 0.002175604020816047, 'init_value': -1.4649745225906372, 'ave_value': -1.4650375631163668, 'soft_opc': nan} step=28096




2022-04-07 20:21.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_28096.pt


Epoch 33/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:21.03 [info     ] FQE_20220407201951: epoch=33 step=28974 epoch=33 metrics={'time_sample_batch': 0.0001573999813314451, 'time_algorithm_update': 0.002335477255471475, 'loss': 0.020344926388792713, 'time_step': 0.0025649434613203947, 'init_value': -1.423880934715271, 'ave_value': -1.4237357987983668, 'soft_opc': nan} step=28974




2022-04-07 20:21.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_28974.pt


Epoch 34/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:21.05 [info     ] FQE_20220407201951: epoch=34 step=29852 epoch=34 metrics={'time_sample_batch': 0.0001514463055378212, 'time_algorithm_update': 0.002156548999708171, 'loss': 0.021596222689011665, 'time_step': 0.0023741062246857036, 'init_value': -1.5022252798080444, 'ave_value': -1.5022064955488206, 'soft_opc': nan} step=29852




2022-04-07 20:21.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_29852.pt


Epoch 35/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:21.07 [info     ] FQE_20220407201951: epoch=35 step=30730 epoch=35 metrics={'time_sample_batch': 0.0001463412152335964, 'time_algorithm_update': 0.002056134317350279, 'loss': 0.022905971027380276, 'time_step': 0.0022717649138326796, 'init_value': -1.4624409675598145, 'ave_value': -1.4623984067575133, 'soft_opc': nan} step=30730




2022-04-07 20:21.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_30730.pt


Epoch 36/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:21.10 [info     ] FQE_20220407201951: epoch=36 step=31608 epoch=36 metrics={'time_sample_batch': 0.00013310056612540487, 'time_algorithm_update': 0.0019102865036636388, 'loss': 0.021579890245607566, 'time_step': 0.0021038503473060277, 'init_value': -1.4617583751678467, 'ave_value': -1.461592137762271, 'soft_opc': nan} step=31608




2022-04-07 20:21.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_31608.pt


Epoch 37/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:21.12 [info     ] FQE_20220407201951: epoch=37 step=32486 epoch=37 metrics={'time_sample_batch': 0.00012970025012596322, 'time_algorithm_update': 0.001833684352101389, 'loss': 0.02094788239587936, 'time_step': 0.0020215068699830216, 'init_value': -1.3832403421401978, 'ave_value': -1.3831770419993468, 'soft_opc': nan} step=32486




2022-04-07 20:21.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_32486.pt


Epoch 38/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:21.14 [info     ] FQE_20220407201951: epoch=38 step=33364 epoch=38 metrics={'time_sample_batch': 0.00014188593775373384, 'time_algorithm_update': 0.002359931724218138, 'loss': 0.019384493101043728, 'time_step': 0.0025664247521506897, 'init_value': -1.365249752998352, 'ave_value': -1.3652186432345685, 'soft_opc': nan} step=33364




2022-04-07 20:21.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_33364.pt


Epoch 39/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:21.17 [info     ] FQE_20220407201951: epoch=39 step=34242 epoch=39 metrics={'time_sample_batch': 0.00014590891184187695, 'time_algorithm_update': 0.0022839715106069093, 'loss': 0.019074320658219304, 'time_step': 0.0024929024907070847, 'init_value': -1.405124545097351, 'ave_value': -1.404789171754199, 'soft_opc': nan} step=34242




2022-04-07 20:21.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_34242.pt


Epoch 40/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:21.19 [info     ] FQE_20220407201951: epoch=40 step=35120 epoch=40 metrics={'time_sample_batch': 0.00015568515977446744, 'time_algorithm_update': 0.0022143038638903502, 'loss': 0.019608000528166646, 'time_step': 0.0024371959082357974, 'init_value': -1.4209625720977783, 'ave_value': -1.4208360687272836, 'soft_opc': nan} step=35120




2022-04-07 20:21.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_35120.pt


Epoch 41/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:21.22 [info     ] FQE_20220407201951: epoch=41 step=35998 epoch=41 metrics={'time_sample_batch': 0.00015240649699078607, 'time_algorithm_update': 0.0022297942828480364, 'loss': 0.019941276455209097, 'time_step': 0.002451423088890545, 'init_value': -1.4057354927062988, 'ave_value': -1.405555857741412, 'soft_opc': nan} step=35998




2022-04-07 20:21.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_35998.pt


Epoch 42/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:21.24 [info     ] FQE_20220407201951: epoch=42 step=36876 epoch=42 metrics={'time_sample_batch': 0.00013824475924898116, 'time_algorithm_update': 0.0020509260390505434, 'loss': 0.01821643859602773, 'time_step': 0.0022519286509798436, 'init_value': -1.3096829652786255, 'ave_value': -1.30953676646757, 'soft_opc': nan} step=36876




2022-04-07 20:21.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_36876.pt


Epoch 43/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:21.26 [info     ] FQE_20220407201951: epoch=43 step=37754 epoch=43 metrics={'time_sample_batch': 0.00014289364999410504, 'time_algorithm_update': 0.0020682211617402443, 'loss': 0.01773176389543677, 'time_step': 0.0022760496595725928, 'init_value': -1.352709412574768, 'ave_value': -1.3525130827530476, 'soft_opc': nan} step=37754




2022-04-07 20:21.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_37754.pt


Epoch 44/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:21.29 [info     ] FQE_20220407201951: epoch=44 step=38632 epoch=44 metrics={'time_sample_batch': 0.00014533893394035738, 'time_algorithm_update': 0.0020661812979974074, 'loss': 0.019009897184670353, 'time_step': 0.0022757050659770574, 'init_value': -1.3900490999221802, 'ave_value': -1.3899614704866396, 'soft_opc': nan} step=38632




2022-04-07 20:21.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_38632.pt


Epoch 45/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:21.31 [info     ] FQE_20220407201951: epoch=45 step=39510 epoch=45 metrics={'time_sample_batch': 0.000143590712058517, 'time_algorithm_update': 0.0020405551024069817, 'loss': 0.02031552886210462, 'time_step': 0.002247813350790454, 'init_value': -1.444032907485962, 'ave_value': -1.4439647628700851, 'soft_opc': nan} step=39510




2022-04-07 20:21.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_39510.pt


Epoch 46/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:21.33 [info     ] FQE_20220407201951: epoch=46 step=40388 epoch=46 metrics={'time_sample_batch': 0.00013986508232585932, 'time_algorithm_update': 0.0019161372630091082, 'loss': 0.02097952499091357, 'time_step': 0.002118449276291971, 'init_value': -1.4260329008102417, 'ave_value': -1.4259819419437891, 'soft_opc': nan} step=40388




2022-04-07 20:21.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_40388.pt


Epoch 47/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:21.35 [info     ] FQE_20220407201951: epoch=47 step=41266 epoch=47 metrics={'time_sample_batch': 0.00014117719915298776, 'time_algorithm_update': 0.001975745980994848, 'loss': 0.02021002796992892, 'time_step': 0.002179886594177107, 'init_value': -1.4180327653884888, 'ave_value': -1.4179303685046518, 'soft_opc': nan} step=41266




2022-04-07 20:21.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_41266.pt


Epoch 48/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:21.38 [info     ] FQE_20220407201951: epoch=48 step=42144 epoch=48 metrics={'time_sample_batch': 0.0001424336487720116, 'time_algorithm_update': 0.0021465517122273023, 'loss': 0.02049536586402241, 'time_step': 0.0023533703254404264, 'init_value': -1.4381914138793945, 'ave_value': -1.438145921048022, 'soft_opc': nan} step=42144




2022-04-07 20:21.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_42144.pt


Epoch 49/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:21.40 [info     ] FQE_20220407201951: epoch=49 step=43022 epoch=49 metrics={'time_sample_batch': 0.000140164327512841, 'time_algorithm_update': 0.001995606683109779, 'loss': 0.02114439026917475, 'time_step': 0.002195979576719106, 'init_value': -1.4777183532714844, 'ave_value': -1.4778062003556383, 'soft_opc': nan} step=43022




2022-04-07 20:21.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_43022.pt


Epoch 50/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:21.42 [info     ] FQE_20220407201951: epoch=50 step=43900 epoch=50 metrics={'time_sample_batch': 0.00014518903979950993, 'time_algorithm_update': 0.0020446207094301123, 'loss': 0.022649357612666426, 'time_step': 0.0022551255780091863, 'init_value': -1.5099941492080688, 'ave_value': -1.5100681873347797, 'soft_opc': nan} step=43900




2022-04-07 20:21.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407201951/model_43900.pt


[(1,
  {'time_sample_batch': 0.00013968911963877753,
   'time_algorithm_update': 0.0018511578814043813,
   'loss': 0.00017434491303991263,
   'time_step': 0.0020544406764871166,
   'init_value': -0.14161010086536407,
   'ave_value': -0.14164950612826305,
   'soft_opc': nan}),
 (2,
  {'time_sample_batch': 0.00013360727349283482,
   'time_algorithm_update': 0.001868682461610416,
   'loss': 0.0005978871645425246,
   'time_step': 0.0020631492002135257,
   'init_value': -0.3587397336959839,
   'ave_value': -0.3586608218960169,
   'soft_opc': nan}),
 (3,
  {'time_sample_batch': 0.00013338270382892025,
   'time_algorithm_update': 0.001851950799685676,
   'loss': 0.001988239196115851,
   'time_step': 0.0020455303930747484,
   'init_value': -0.5455045104026794,
   'ave_value': -0.5452594495059389,
   'soft_opc': nan}),
 (4,
  {'time_sample_batch': 0.00013503995734629706,
   'time_algorithm_update': 0.0019145256294476416,
   'loss': 0.0036624751454680227,
   'time_step': 0.0021117569916883744,
 

In [21]:
from d3rlpy.ope import FQE
# metrics to evaluate with
from d3rlpy.metrics.scorer import soft_opc_scorer


ope_dataset = get_dataset([2,4,6,8], path="collected_data/rl_stoch_small.txt") #change if you'd prefer different chunks
ope_train_episodes, ope_test_episodes = train_test_split(ope_dataset, test_size=0.2)

fqe = FQE(algo=model, action_scaler = action_scaler, use_gpu=False) #change this if you have one!
fqe.fit(ope_train_episodes, eval_episodes=ope_test_episodes,
        tensorboard_dir='runs',
        n_epochs=50, n_steps_per_epoch=1000, #change if overfitting/underfitting
        scorers={
           'init_value': initial_state_value_estimation_scorer,
            'ave_value': average_value_estimation_scorer,
           'soft_opc': soft_opc_scorer(return_threshold=0)
        })

[ 0.00000000e+00  7.95731469e+08  1.39310892e-01  1.82000047e-02
 -1.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -7.50230117e-02  3.69851546e-01 -6.00000000e-01]
Read chunk # 3 out of 10000
[ 0.00000000e+00  7.95731469e+08 -1.15389108e-01  1.64000047e-02
 -8.80001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.08831172e-01 -2.48178665e-01  6.00000000e-01]
Read chunk # 5 out of 10000
[ 0.00000000e+00  7.95731469e+08 -1.28589108e-01  1.20000047e-02
  1.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.75212759e-03 -3.52719043e-01  6.00000000e-01]
Read chunk # 7 out of 10000
[ 0.00000000e+00  7.95731469e+08 -1.03989108e-01  7.00000469e-03
 -8.60001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.65974295e-01 -2.19295880e-01  6.00000000e-01]
Read chunk # 9 out of 10000
torch.Size([111080, 6])
2022-04-07 20:21.43 [debug    ] RoundIterator is selected.
2022-04-07 20:21.43 [info     ] Directory is created at d3rlpy_logs/FQE_2022040720214

Epoch 1/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:21.45 [info     ] FQE_20220407202143: epoch=1 step=878 epoch=1 metrics={'time_sample_batch': 0.0001402658862242123, 'time_algorithm_update': 0.0020971023954943265, 'loss': 0.0005286302931466156, 'time_step': 0.0023040975144892457, 'init_value': -0.3165595531463623, 'ave_value': -0.3164726957802313, 'soft_opc': nan} step=878




2022-04-07 20:21.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_878.pt


Epoch 2/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:21.48 [info     ] FQE_20220407202143: epoch=2 step=1756 epoch=2 metrics={'time_sample_batch': 0.00013752896041696326, 'time_algorithm_update': 0.00205795531392369, 'loss': 0.0018772853800121275, 'time_step': 0.002257068499345714, 'init_value': -0.52862948179245, 'ave_value': -0.5286661683669388, 'soft_opc': nan} step=1756




2022-04-07 20:21.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_1756.pt


Epoch 3/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:21.50 [info     ] FQE_20220407202143: epoch=3 step=2634 epoch=3 metrics={'time_sample_batch': 0.00014293519673966602, 'time_algorithm_update': 0.0019880682569429924, 'loss': 0.003699947557589628, 'time_step': 0.002199543092679869, 'init_value': -0.6613953113555908, 'ave_value': -0.6614020432158646, 'soft_opc': nan} step=2634




2022-04-07 20:21.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_2634.pt


Epoch 4/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:21.52 [info     ] FQE_20220407202143: epoch=4 step=3512 epoch=4 metrics={'time_sample_batch': 0.00014332839731203396, 'time_algorithm_update': 0.0019716092285642865, 'loss': 0.00557659785202414, 'time_step': 0.0021811110012080514, 'init_value': -0.8179246187210083, 'ave_value': -0.8181076823285445, 'soft_opc': nan} step=3512




2022-04-07 20:21.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_3512.pt


Epoch 5/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:21.54 [info     ] FQE_20220407202143: epoch=5 step=4390 epoch=5 metrics={'time_sample_batch': 0.0001422894571256529, 'time_algorithm_update': 0.0019703294258725944, 'loss': 0.007837312238301745, 'time_step': 0.002179959097321321, 'init_value': -0.8776341080665588, 'ave_value': -0.8777253289958049, 'soft_opc': nan} step=4390




2022-04-07 20:21.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_4390.pt


Epoch 6/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:21.56 [info     ] FQE_20220407202143: epoch=6 step=5268 epoch=6 metrics={'time_sample_batch': 0.00014240486475221116, 'time_algorithm_update': 0.0019580807392580906, 'loss': 0.009722781093172886, 'time_step': 0.002165454395268121, 'init_value': -1.0338720083236694, 'ave_value': -1.034081550928142, 'soft_opc': nan} step=5268




2022-04-07 20:21.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_5268.pt


Epoch 7/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:21.59 [info     ] FQE_20220407202143: epoch=7 step=6146 epoch=7 metrics={'time_sample_batch': 0.00013425274155949135, 'time_algorithm_update': 0.0019394148455122338, 'loss': 0.011688956326422131, 'time_step': 0.002129857252290417, 'init_value': -0.9997265338897705, 'ave_value': -0.9999333179239487, 'soft_opc': nan} step=6146




2022-04-07 20:21.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_6146.pt


Epoch 8/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:22.01 [info     ] FQE_20220407202143: epoch=8 step=7024 epoch=8 metrics={'time_sample_batch': 0.00014319072280223385, 'time_algorithm_update': 0.001989164493621618, 'loss': 0.011844987472682233, 'time_step': 0.002193406665515248, 'init_value': -1.0868362188339233, 'ave_value': -1.0872190636399377, 'soft_opc': nan} step=7024




2022-04-07 20:22.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_7024.pt


Epoch 9/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:22.03 [info     ] FQE_20220407202143: epoch=9 step=7902 epoch=9 metrics={'time_sample_batch': 0.0001375376499323747, 'time_algorithm_update': 0.0019158817369465404, 'loss': 0.01378366030549595, 'time_step': 0.0021146416392576178, 'init_value': -1.1804144382476807, 'ave_value': -1.180710972257189, 'soft_opc': nan} step=7902




2022-04-07 20:22.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_7902.pt


Epoch 10/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:22.05 [info     ] FQE_20220407202143: epoch=10 step=8780 epoch=10 metrics={'time_sample_batch': 0.00014030363130678077, 'time_algorithm_update': 0.0019680400101090346, 'loss': 0.01517054568049598, 'time_step': 0.0021692633900392572, 'init_value': -1.1739482879638672, 'ave_value': -1.1742275229732362, 'soft_opc': nan} step=8780




2022-04-07 20:22.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_8780.pt


Epoch 11/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:22.07 [info     ] FQE_20220407202143: epoch=11 step=9658 epoch=11 metrics={'time_sample_batch': 0.00014146259542478245, 'time_algorithm_update': 0.0019703997566379557, 'loss': 0.015055459218703188, 'time_step': 0.002176341271889237, 'init_value': -1.2424061298370361, 'ave_value': -1.2427814760860738, 'soft_opc': nan} step=9658




2022-04-07 20:22.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_9658.pt


Epoch 12/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:22.10 [info     ] FQE_20220407202143: epoch=12 step=10536 epoch=12 metrics={'time_sample_batch': 0.0001395120707622693, 'time_algorithm_update': 0.001939577230831485, 'loss': 0.01761386197130355, 'time_step': 0.0021405719678874438, 'init_value': -1.2923803329467773, 'ave_value': -1.2928680826052594, 'soft_opc': nan} step=10536




2022-04-07 20:22.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_10536.pt


Epoch 13/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:22.12 [info     ] FQE_20220407202143: epoch=13 step=11414 epoch=13 metrics={'time_sample_batch': 0.0001397548340990766, 'time_algorithm_update': 0.0019475205341069736, 'loss': 0.019310316844444692, 'time_step': 0.0021539024991706728, 'init_value': -1.332446813583374, 'ave_value': -1.3328439830092194, 'soft_opc': nan} step=11414




2022-04-07 20:22.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_11414.pt


Epoch 14/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:22.14 [info     ] FQE_20220407202143: epoch=14 step=12292 epoch=14 metrics={'time_sample_batch': 0.00014348996798921554, 'time_algorithm_update': 0.002138933994232386, 'loss': 0.020816633835431135, 'time_step': 0.002344923030271074, 'init_value': -1.4214390516281128, 'ave_value': -1.4218049151877343, 'soft_opc': nan} step=12292




2022-04-07 20:22.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_12292.pt


Epoch 15/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:22.17 [info     ] FQE_20220407202143: epoch=15 step=13170 epoch=15 metrics={'time_sample_batch': 0.00014252733261004125, 'time_algorithm_update': 0.0023323449567130053, 'loss': 0.02077669931677022, 'time_step': 0.0025435577492507553, 'init_value': -1.3313651084899902, 'ave_value': -1.3317360949361325, 'soft_opc': nan} step=13170




2022-04-07 20:22.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_13170.pt


Epoch 16/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:22.19 [info     ] FQE_20220407202143: epoch=16 step=14048 epoch=16 metrics={'time_sample_batch': 0.0001386618559887306, 'time_algorithm_update': 0.002069534093209443, 'loss': 0.019719170544335186, 'time_step': 0.0022700017568462257, 'init_value': -1.3144497871398926, 'ave_value': -1.314762066659782, 'soft_opc': nan} step=14048




2022-04-07 20:22.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_14048.pt


Epoch 17/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:22.21 [info     ] FQE_20220407202143: epoch=17 step=14926 epoch=17 metrics={'time_sample_batch': 0.00014263595155268434, 'time_algorithm_update': 0.00199767343004092, 'loss': 0.01879594744689656, 'time_step': 0.0022034354524894835, 'init_value': -1.383181095123291, 'ave_value': -1.3837250346719405, 'soft_opc': nan} step=14926




2022-04-07 20:22.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_14926.pt


Epoch 18/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:22.24 [info     ] FQE_20220407202143: epoch=18 step=15804 epoch=18 metrics={'time_sample_batch': 0.00014127142608573063, 'time_algorithm_update': 0.0019813053700266773, 'loss': 0.020633150617524614, 'time_step': 0.002183239117841786, 'init_value': -1.3902881145477295, 'ave_value': -1.3908501790502599, 'soft_opc': nan} step=15804




2022-04-07 20:22.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_15804.pt


Epoch 19/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:22.26 [info     ] FQE_20220407202143: epoch=19 step=16682 epoch=19 metrics={'time_sample_batch': 0.00014229434497807183, 'time_algorithm_update': 0.001964734735684406, 'loss': 0.02129307299277579, 'time_step': 0.00217331922135755, 'init_value': -1.3890167474746704, 'ave_value': -1.389406714625358, 'soft_opc': nan} step=16682




2022-04-07 20:22.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_16682.pt


Epoch 20/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:22.28 [info     ] FQE_20220407202143: epoch=20 step=17560 epoch=20 metrics={'time_sample_batch': 0.00014492944052659298, 'time_algorithm_update': 0.0021741764963623604, 'loss': 0.02106990770557465, 'time_step': 0.0023853863018791876, 'init_value': -1.3545901775360107, 'ave_value': -1.3549781620648265, 'soft_opc': nan} step=17560




2022-04-07 20:22.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_17560.pt


Epoch 21/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:22.30 [info     ] FQE_20220407202143: epoch=21 step=18438 epoch=21 metrics={'time_sample_batch': 0.0001434524944540037, 'time_algorithm_update': 0.0020239315163301714, 'loss': 0.02134582991502011, 'time_step': 0.0022334036903120663, 'init_value': -1.3910497426986694, 'ave_value': -1.391504211593334, 'soft_opc': nan} step=18438




2022-04-07 20:22.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_18438.pt


Epoch 22/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:22.33 [info     ] FQE_20220407202143: epoch=22 step=19316 epoch=22 metrics={'time_sample_batch': 0.0001393181859496514, 'time_algorithm_update': 0.001976440870680407, 'loss': 0.021671438663680233, 'time_step': 0.002176982938292901, 'init_value': -1.344119906425476, 'ave_value': -1.3446969237616784, 'soft_opc': nan} step=19316




2022-04-07 20:22.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_19316.pt


Epoch 23/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:22.35 [info     ] FQE_20220407202143: epoch=23 step=20194 epoch=23 metrics={'time_sample_batch': 0.0001397016108171815, 'time_algorithm_update': 0.0019387758945821358, 'loss': 0.021078830407424493, 'time_step': 0.002142096163200083, 'init_value': -1.382448434829712, 'ave_value': -1.3831972887187505, 'soft_opc': nan} step=20194




2022-04-07 20:22.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_20194.pt


Epoch 24/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:22.37 [info     ] FQE_20220407202143: epoch=24 step=21072 epoch=24 metrics={'time_sample_batch': 0.00014151799108553042, 'time_algorithm_update': 0.0019643548409325113, 'loss': 0.020885443148540773, 'time_step': 0.0021687531525561915, 'init_value': -1.3445168733596802, 'ave_value': -1.3451974591321367, 'soft_opc': nan} step=21072




2022-04-07 20:22.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_21072.pt


Epoch 25/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:22.39 [info     ] FQE_20220407202143: epoch=25 step=21950 epoch=25 metrics={'time_sample_batch': 0.0001242636004993204, 'time_algorithm_update': 0.001800832009804276, 'loss': 0.020463398795039715, 'time_step': 0.001983297441434752, 'init_value': -1.3372279405593872, 'ave_value': -1.3380574433713222, 'soft_opc': nan} step=21950




2022-04-07 20:22.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_21950.pt


Epoch 26/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:22.41 [info     ] FQE_20220407202143: epoch=26 step=22828 epoch=26 metrics={'time_sample_batch': 0.00011943277302526938, 'time_algorithm_update': 0.0016150979626423133, 'loss': 0.020743096125975555, 'time_step': 0.0017880836760536141, 'init_value': -1.3399711847305298, 'ave_value': -1.3409994662580171, 'soft_opc': nan} step=22828




2022-04-07 20:22.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_22828.pt


Epoch 27/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:22.43 [info     ] FQE_20220407202143: epoch=27 step=23706 epoch=27 metrics={'time_sample_batch': 0.0001193659723755439, 'time_algorithm_update': 0.0015914727995227302, 'loss': 0.021839019622066375, 'time_step': 0.0017679443663508039, 'init_value': -1.411691665649414, 'ave_value': -1.4125124339827453, 'soft_opc': nan} step=23706




2022-04-07 20:22.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_23706.pt


Epoch 28/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:22.45 [info     ] FQE_20220407202143: epoch=28 step=24584 epoch=28 metrics={'time_sample_batch': 0.00011977437959988188, 'time_algorithm_update': 0.001713903183817592, 'loss': 0.02271687794794851, 'time_step': 0.001888768004936618, 'init_value': -1.4185038805007935, 'ave_value': -1.4192657432743199, 'soft_opc': nan} step=24584




2022-04-07 20:22.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_24584.pt


Epoch 29/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:22.46 [info     ] FQE_20220407202143: epoch=29 step=25462 epoch=29 metrics={'time_sample_batch': 0.00011985747309100383, 'time_algorithm_update': 0.0016131306020436907, 'loss': 0.023355432580762697, 'time_step': 0.0017880861199798237, 'init_value': -1.3860498666763306, 'ave_value': -1.386815170950554, 'soft_opc': nan} step=25462




2022-04-07 20:22.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_25462.pt


Epoch 30/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:22.48 [info     ] FQE_20220407202143: epoch=30 step=26340 epoch=30 metrics={'time_sample_batch': 0.00011847801251943671, 'time_algorithm_update': 0.0016306570830812216, 'loss': 0.022663588293408983, 'time_step': 0.001802868343431477, 'init_value': -1.4195955991744995, 'ave_value': -1.4204506898774625, 'soft_opc': nan} step=26340




2022-04-07 20:22.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_26340.pt


Epoch 31/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:22.50 [info     ] FQE_20220407202143: epoch=31 step=27218 epoch=31 metrics={'time_sample_batch': 0.000113926607275335, 'time_algorithm_update': 0.0015912517599744513, 'loss': 0.02299091394578058, 'time_step': 0.0017581056625261937, 'init_value': -1.4578006267547607, 'ave_value': -1.458492504128642, 'soft_opc': nan} step=27218




2022-04-07 20:22.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_27218.pt


Epoch 32/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:22.52 [info     ] FQE_20220407202143: epoch=32 step=28096 epoch=32 metrics={'time_sample_batch': 0.00011643760568188644, 'time_algorithm_update': 0.0015426428823101764, 'loss': 0.024228881138705322, 'time_step': 0.0017163737216680088, 'init_value': -1.4894893169403076, 'ave_value': -1.4902431947514216, 'soft_opc': nan} step=28096




2022-04-07 20:22.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_28096.pt


Epoch 33/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:22.54 [info     ] FQE_20220407202143: epoch=33 step=28974 epoch=33 metrics={'time_sample_batch': 0.00011969888943474494, 'time_algorithm_update': 0.0016259582276224819, 'loss': 0.025144303568305163, 'time_step': 0.0018016643024522786, 'init_value': -1.5271714925765991, 'ave_value': -1.5278841139462762, 'soft_opc': nan} step=28974




2022-04-07 20:22.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_28974.pt


Epoch 34/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:22.55 [info     ] FQE_20220407202143: epoch=34 step=29852 epoch=34 metrics={'time_sample_batch': 0.0001166339344207138, 'time_algorithm_update': 0.001589522274860217, 'loss': 0.026158409211348044, 'time_step': 0.0017623665120987252, 'init_value': -1.5562676191329956, 'ave_value': -1.5569084715628831, 'soft_opc': nan} step=29852




2022-04-07 20:22.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_29852.pt


Epoch 35/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:22.57 [info     ] FQE_20220407202143: epoch=35 step=30730 epoch=35 metrics={'time_sample_batch': 0.00012067564527646284, 'time_algorithm_update': 0.001698347321946963, 'loss': 0.027944458390957896, 'time_step': 0.0018776310332000662, 'init_value': -1.5958967208862305, 'ave_value': -1.5965939768861157, 'soft_opc': nan} step=30730




2022-04-07 20:22.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_30730.pt


Epoch 36/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:22.59 [info     ] FQE_20220407202143: epoch=36 step=31608 epoch=36 metrics={'time_sample_batch': 0.00012063681400446794, 'time_algorithm_update': 0.0016850564365777992, 'loss': 0.026948301647060854, 'time_step': 0.001862076257518864, 'init_value': -1.5318511724472046, 'ave_value': -1.5323746471297144, 'soft_opc': nan} step=31608




2022-04-07 20:22.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_31608.pt


Epoch 37/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:23.01 [info     ] FQE_20220407202143: epoch=37 step=32486 epoch=37 metrics={'time_sample_batch': 0.00012504973009669972, 'time_algorithm_update': 0.0017597702478221989, 'loss': 0.025269587248150777, 'time_step': 0.0019436317044129946, 'init_value': -1.4800795316696167, 'ave_value': -1.4804298304942345, 'soft_opc': nan} step=32486




2022-04-07 20:23.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_32486.pt


Epoch 38/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:23.03 [info     ] FQE_20220407202143: epoch=38 step=33364 epoch=38 metrics={'time_sample_batch': 0.0001223277393940641, 'time_algorithm_update': 0.0016936424924463781, 'loss': 0.024108329336484113, 'time_step': 0.0018743586160055868, 'init_value': -1.5078701972961426, 'ave_value': -1.5083468838930183, 'soft_opc': nan} step=33364




2022-04-07 20:23.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_33364.pt


Epoch 39/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:23.05 [info     ] FQE_20220407202143: epoch=39 step=34242 epoch=39 metrics={'time_sample_batch': 0.00012227587384895202, 'time_algorithm_update': 0.0016835227371076787, 'loss': 0.024134077420265194, 'time_step': 0.0018621930228822053, 'init_value': -1.4633879661560059, 'ave_value': -1.463814005205077, 'soft_opc': nan} step=34242




2022-04-07 20:23.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_34242.pt


Epoch 40/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:23.07 [info     ] FQE_20220407202143: epoch=40 step=35120 epoch=40 metrics={'time_sample_batch': 0.00012385655101176547, 'time_algorithm_update': 0.0017253380430043424, 'loss': 0.021700776310258532, 'time_step': 0.0019052273048626807, 'init_value': -1.339998722076416, 'ave_value': -1.3402761200338882, 'soft_opc': nan} step=35120




2022-04-07 20:23.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_35120.pt


Epoch 41/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:23.09 [info     ] FQE_20220407202143: epoch=41 step=35998 epoch=41 metrics={'time_sample_batch': 0.00013093443286174523, 'time_algorithm_update': 0.0018436992901604376, 'loss': 0.02001858971507815, 'time_step': 0.002035931737113379, 'init_value': -1.3391892910003662, 'ave_value': -1.33941630011058, 'soft_opc': nan} step=35998




2022-04-07 20:23.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_35998.pt


Epoch 42/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:23.11 [info     ] FQE_20220407202143: epoch=42 step=36876 epoch=42 metrics={'time_sample_batch': 0.00011894127230980945, 'time_algorithm_update': 0.0016497430605877505, 'loss': 0.019558913927318157, 'time_step': 0.001823700098893637, 'init_value': -1.317215085029602, 'ave_value': -1.3174541309437426, 'soft_opc': nan} step=36876




2022-04-07 20:23.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_36876.pt


Epoch 43/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:23.13 [info     ] FQE_20220407202143: epoch=43 step=37754 epoch=43 metrics={'time_sample_batch': 0.00011696739457462806, 'time_algorithm_update': 0.0015756608684948201, 'loss': 0.01925158019694333, 'time_step': 0.0017465162928935335, 'init_value': -1.2911431789398193, 'ave_value': -1.2916233363152685, 'soft_opc': nan} step=37754




2022-04-07 20:23.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_37754.pt


Epoch 44/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:23.15 [info     ] FQE_20220407202143: epoch=44 step=38632 epoch=44 metrics={'time_sample_batch': 0.00012185334616207044, 'time_algorithm_update': 0.0016409669215695461, 'loss': 0.01969397413999817, 'time_step': 0.0018172112032601394, 'init_value': -1.3284529447555542, 'ave_value': -1.3288619501351955, 'soft_opc': nan} step=38632




2022-04-07 20:23.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_38632.pt


Epoch 45/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:23.17 [info     ] FQE_20220407202143: epoch=45 step=39510 epoch=45 metrics={'time_sample_batch': 0.00013092004085184502, 'time_algorithm_update': 0.0018788904698400128, 'loss': 0.019638121347003244, 'time_step': 0.0020715055270184145, 'init_value': -1.3389097452163696, 'ave_value': -1.3393009383377026, 'soft_opc': nan} step=39510




2022-04-07 20:23.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_39510.pt


Epoch 46/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:23.18 [info     ] FQE_20220407202143: epoch=46 step=40388 epoch=46 metrics={'time_sample_batch': 0.00012207167023678303, 'time_algorithm_update': 0.0016231468978395224, 'loss': 0.019090877418996805, 'time_step': 0.0018008553628769443, 'init_value': -1.3052579164505005, 'ave_value': -1.3056257026891926, 'soft_opc': nan} step=40388




2022-04-07 20:23.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_40388.pt


Epoch 47/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:23.21 [info     ] FQE_20220407202143: epoch=47 step=41266 epoch=47 metrics={'time_sample_batch': 0.00014098575826657932, 'time_algorithm_update': 0.0020226916310999007, 'loss': 0.01902379925068421, 'time_step': 0.0022307821721313753, 'init_value': -1.3328394889831543, 'ave_value': -1.3331363834580763, 'soft_opc': nan} step=41266




2022-04-07 20:23.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_41266.pt


Epoch 48/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:23.23 [info     ] FQE_20220407202143: epoch=48 step=42144 epoch=48 metrics={'time_sample_batch': 0.00013064604956902783, 'time_algorithm_update': 0.0018187169333525291, 'loss': 0.019777392128466863, 'time_step': 0.002009870522657668, 'init_value': -1.3341330289840698, 'ave_value': -1.3346023515150989, 'soft_opc': nan} step=42144




2022-04-07 20:23.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_42144.pt


Epoch 49/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:23.25 [info     ] FQE_20220407202143: epoch=49 step=43022 epoch=49 metrics={'time_sample_batch': 0.00012584074754649796, 'time_algorithm_update': 0.001701133397825758, 'loss': 0.020840050602511883, 'time_step': 0.0018859300635127108, 'init_value': -1.3221980333328247, 'ave_value': -1.3227189791997922, 'soft_opc': nan} step=43022




2022-04-07 20:23.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_43022.pt


Epoch 50/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 20:23.27 [info     ] FQE_20220407202143: epoch=50 step=43900 epoch=50 metrics={'time_sample_batch': 0.00012191362967523735, 'time_algorithm_update': 0.001705906114165495, 'loss': 0.019594529408642637, 'time_step': 0.00188317087082222, 'init_value': -1.3522834777832031, 'ave_value': -1.3528912111468174, 'soft_opc': nan} step=43900




2022-04-07 20:23.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407202143/model_43900.pt


[(1,
  {'time_sample_batch': 0.0001402658862242123,
   'time_algorithm_update': 0.0020971023954943265,
   'loss': 0.0005286302931466156,
   'time_step': 0.0023040975144892457,
   'init_value': -0.3165595531463623,
   'ave_value': -0.3164726957802313,
   'soft_opc': nan}),
 (2,
  {'time_sample_batch': 0.00013752896041696326,
   'time_algorithm_update': 0.00205795531392369,
   'loss': 0.0018772853800121275,
   'time_step': 0.002257068499345714,
   'init_value': -0.52862948179245,
   'ave_value': -0.5286661683669388,
   'soft_opc': nan}),
 (3,
  {'time_sample_batch': 0.00014293519673966602,
   'time_algorithm_update': 0.0019880682569429924,
   'loss': 0.003699947557589628,
   'time_step': 0.002199543092679869,
   'init_value': -0.6613953113555908,
   'ave_value': -0.6614020432158646,
   'soft_opc': nan}),
 (4,
  {'time_sample_batch': 0.00014332839731203396,
   'time_algorithm_update': 0.0019716092285642865,
   'loss': 0.00557659785202414,
   'time_step': 0.0021811110012080514,
   'init_va