# Sample Workflow for d3rlpy Experiments

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools
import math
import subprocess
import os
import d3rlpy
plt.style.use('matplotlibrc')

from Python.data_sampler import *

## Building an MDPDataset

We first read in a large batch of samples from the file. As `d3rlpy` wants it in the form (observations, actions, rewards, terminal flags), we go ahead and do that. Here's a helper function to get a dataset from a list of chunks of your choosing.

In [2]:
def get_dataset(chunks : list, batch_size=30000, 
                path="collected_data/rl_det_small.txt") -> d3rlpy.dataset.MDPDataset :
    random.seed(0)
    samples = DataSampler(path_to_data=path)
    states = []
    actions = []
    rewards = []
    next_states = []
    for chunk in chunks:
        samples.use_chunk(chunk)
        samples.read_chunk()
        [statesChunk, actionsChunk, rewardsChunk, nextStatesChunk] = samples.get_batch(batch_size)
        states.append(statesChunk)
        actions.append(actionsChunk)
        rewards.append(rewardsChunk)
        next_states.append(nextStatesChunk)
    states = torch.cat(states)
    actions = torch.cat(actions)
    rewards = torch.cat(rewards)
    next_states = torch.cat(next_states)
    terminals = np.zeros(len(states))
    terminals[::100] = 1 #episode length 100, change if necessary
    print(states.shape)
    dataset = d3rlpy.dataset.MDPDataset(states.numpy(), 
                                        actions.numpy(), 
                                        rewards.numpy(), terminals)
    return dataset

We can build the dataset from there, just like this, and split into train and test sets.

In [3]:
dataset = get_dataset([3,5,7,9], path="collected_data/rl_det_small.txt")

[ 0.00000000e+00  7.95731469e+08 -8.17891077e-02 -1.19999531e-03
  7.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.09713430e-01 -2.63658359e-01  6.00000000e-01]
Read chunk # 4 out of 10000
[ 0.00000000e+00  7.95731469e+08  1.24610892e-01  2.40000469e-03
 -7.60001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.20016566e-01  3.79282423e-01 -6.00000000e-01]
Read chunk # 6 out of 10000
[ 0.00000000e+00  7.95731469e+08 -9.01891077e-02  1.08000047e-02
  3.99986580e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.17973088e-02 -2.40776052e-01  6.00000000e-01]
Read chunk # 8 out of 10000
[ 0.00000000e+00  7.95731469e+08  6.91108923e-02 -5.99999531e-03
 -6.00001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.42355434e-01  2.22081792e-01 -6.00000000e-01]
Read chunk # 10 out of 10000
torch.Size([111080, 6])


In [4]:
print("The behavior policy value statistics are:")
dataset.compute_stats()['return']

The behavior policy value statistics are:


{'mean': -4.1227446,
 'std': 2.4676569,
 'min': -12.578855,
 'max': 0.0,
 'histogram': (array([ 26,   9,   7,   7,   8,   7,  10,  13,  27,  54,  56,  73, 109,
          84, 186, 148, 124,  83,  67,  13]),
  array([-12.578855 , -11.949912 , -11.320969 , -10.692026 , -10.063084 ,
          -9.434141 ,  -8.805199 ,  -8.176255 ,  -7.5473127,  -6.9183702,
          -6.2894273,  -5.6604843,  -5.031542 ,  -4.4025993,  -3.7736564,
          -3.1447136,  -2.515771 ,  -1.8868282,  -1.2578855,  -0.6289427,
           0.       ], dtype=float32))}

In [5]:
from sklearn.model_selection import train_test_split
train_episodes, test_episodes = train_test_split(dataset, test_size=0.2)

## Setting up an Algorithm

In [12]:
from d3rlpy.algos import TD3PlusBC

from d3rlpy.preprocessing import MinMaxActionScaler
action_scaler = MinMaxActionScaler(minimum=-0.6, maximum=0.6)
#cql = CQL(action_scaler=action_scaler)

model = TD3PlusBC(q_func_factory='mean', #qr -> quantile regression q function, but you don't have to use this
            reward_scaler='standard',
            action_scaler=action_scaler,
          actor_learning_rate=1e-5, 
          critic_learning_rate=0.0003, 
            use_gpu=False) #change it to true if you have one
model.build_with_dataset(dataset)

In [7]:
from d3rlpy.metrics.scorer import td_error_scorer
from d3rlpy.metrics.scorer import average_value_estimation_scorer
from d3rlpy.metrics.scorer import initial_state_value_estimation_scorer

# calculate metrics with test dataset
ave_error_init = average_value_estimation_scorer(model, test_episodes)
print(ave_error_init)

-0.08131087201637716


In [8]:
%load_ext tensorboard
%tensorboard --logdir runs

In [9]:
model.fit(train_episodes,
        eval_episodes=test_episodes,
        n_epochs=5, 
        tensorboard_dir='runs',
        scorers={
            'td_error': td_error_scorer,
            'init_value': initial_state_value_estimation_scorer,
            'ave_value': average_value_estimation_scorer
        })

2022-04-07 19:43.22 [debug    ] RoundIterator is selected.
2022-04-07 19:43.22 [info     ] Directory is created at d3rlpy_logs/CQL_20220407194322
2022-04-07 19:43.22 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-07 19:43.22 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-07 19:43.22 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220407194322/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 1e-05, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'alpha_learning_rate': 0.0001, 'alpha_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'alpha_threshold': 10.0, 'batch_size': 256, 'conser

Epoch 1/5:   0%|          | 0/343 [00:00<?, ?it/s]

2022-04-07 19:43.45 [info     ] CQL_20220407194322: epoch=1 step=343 epoch=1 metrics={'time_sample_batch': 0.000332894200138726, 'time_algorithm_update': 0.06332637200202608, 'temp_loss': 4.549272245290328, 'temp': 0.9833125291343333, 'alpha_loss': -9.526565534901689, 'alpha': 1.014112019677899, 'critic_loss': 10.390127244913195, 'actor_loss': 0.17096432040475373, 'time_step': 0.06374213021281162, 'td_error': 1.640563069486044, 'init_value': -0.2725396454334259, 'ave_value': -0.2704807368589359} step=343
2022-04-07 19:43.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220407194322/model_343.pt


Epoch 2/5:   0%|          | 0/343 [00:00<?, ?it/s]

2022-04-07 19:44.10 [info     ] CQL_20220407194322: epoch=2 step=686 epoch=2 metrics={'time_sample_batch': 0.000326751620011844, 'time_algorithm_update': 0.07104923704275237, 'temp_loss': 2.526219411077027, 'temp': 0.956864971113622, 'alpha_loss': 4.13621229787273, 'alpha': 1.0176574339324462, 'critic_loss': -3.2920629723600325, 'actor_loss': 3.1353582501758988, 'time_step': 0.07146202371002286, 'td_error': 4.960642614457846, 'init_value': -1.62131667137146, 'ave_value': -1.6176998761369101} step=686
2022-04-07 19:44.10 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220407194322/model_686.pt


Epoch 3/5:   0%|          | 0/343 [00:00<?, ?it/s]

2022-04-07 19:44.34 [info     ] CQL_20220407194322: epoch=3 step=1029 epoch=3 metrics={'time_sample_batch': 0.000331777872914128, 'time_algorithm_update': 0.06792566449579622, 'temp_loss': 1.2130801743738158, 'temp': 0.9400827478041802, 'alpha_loss': 10.705599565200139, 'alpha': 0.9852839272154316, 'critic_loss': -9.13451760850912, 'actor_loss': 5.710984918883521, 'time_step': 0.06834246396323335, 'td_error': 6.480809372071453, 'init_value': -3.210794687271118, 'ave_value': -3.204889975710586} step=1029
2022-04-07 19:44.34 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220407194322/model_1029.pt


Epoch 4/5:   0%|          | 0/343 [00:00<?, ?it/s]

2022-04-07 19:45.02 [info     ] CQL_20220407194322: epoch=4 step=1372 epoch=4 metrics={'time_sample_batch': 0.00034571597596994296, 'time_algorithm_update': 0.07863425443888405, 'temp_loss': 0.44537788994458255, 'temp': 0.9308691212456706, 'alpha_loss': 13.288155922736788, 'alpha': 0.9422573655756847, 'critic_loss': -10.698442213041789, 'actor_loss': 7.801613221015597, 'time_step': 0.07906752425102034, 'td_error': 7.775450190520213, 'init_value': -4.5973310470581055, 'ave_value': -4.594196638820768} step=1372
2022-04-07 19:45.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220407194322/model_1372.pt


Epoch 5/5:   0%|          | 0/343 [00:00<?, ?it/s]

2022-04-07 19:45.33 [info     ] CQL_20220407194322: epoch=5 step=1715 epoch=5 metrics={'time_sample_batch': 0.00034187695027787903, 'time_algorithm_update': 0.09032315276454558, 'temp_loss': 0.03165825267265342, 'temp': 0.9276943024323911, 'alpha_loss': 14.31788788453483, 'alpha': 0.9025450887554937, 'critic_loss': -10.340598141139171, 'actor_loss': 9.92209205126971, 'time_step': 0.09075341836356561, 'td_error': 9.699289034171423, 'init_value': -7.284750938415527, 'ave_value': -7.281605792852697} step=1715
2022-04-07 19:45.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220407194322/model_1715.pt


[(1,
  {'time_sample_batch': 0.000332894200138726,
   'time_algorithm_update': 0.06332637200202608,
   'temp_loss': 4.549272245290328,
   'temp': 0.9833125291343333,
   'alpha_loss': -9.526565534901689,
   'alpha': 1.014112019677899,
   'critic_loss': 10.390127244913195,
   'actor_loss': 0.17096432040475373,
   'time_step': 0.06374213021281162,
   'td_error': 1.640563069486044,
   'init_value': -0.2725396454334259,
   'ave_value': -0.2704807368589359}),
 (2,
  {'time_sample_batch': 0.000326751620011844,
   'time_algorithm_update': 0.07104923704275237,
   'temp_loss': 2.526219411077027,
   'temp': 0.956864971113622,
   'alpha_loss': 4.13621229787273,
   'alpha': 1.0176574339324462,
   'critic_loss': -3.2920629723600325,
   'actor_loss': 3.1353582501758988,
   'time_step': 0.07146202371002286,
   'td_error': 4.960642614457846,
   'init_value': -1.62131667137146,
   'ave_value': -1.6176998761369101}),
 (3,
  {'time_sample_batch': 0.000331777872914128,
   'time_algorithm_update': 0.0679256

## Off-Policy Evaluation

We do get some metrics on a test set of initial state value and average value. However, these estimates (using the critic's Q-function) of model performance are biased. They're useful for validation during training, but not much else. Instead, we fit a Q-function to the data (or a separate dataset, as I've done here) separately and evaluate the model's performance on it.

Feel free to change the chunks and number of steps.

In [10]:
from d3rlpy.ope import FQE
# metrics to evaluate with
from d3rlpy.metrics.scorer import soft_opc_scorer


ope_dataset = get_dataset([2,4,6,8], path="collected_data/rl_det_small.txt") #change if you'd prefer different chunks
ope_train_episodes, ope_test_episodes = train_test_split(ope_dataset, test_size=0.2)

fqe = FQE(algo=model, action_scaler = action_scaler, use_gpu=False) #change this if you have one!
fqe.fit(ope_train_episodes, eval_episodes=ope_test_episodes,
        tensorboard_dir='runs',
        n_epochs=50, n_steps_per_epoch=1000, #change if overfitting/underfitting
        scorers={
           'init_value': initial_state_value_estimation_scorer,
            'ave_value': average_value_estimation_scorer,
           'soft_opc': soft_opc_scorer(return_threshold=0)
        })

[ 0.00000000e+00  7.95731469e+08 -1.03891077e-02 -1.41999953e-02
 -2.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.78778459e-03 -1.34615461e-02  4.84073546e-02]
Read chunk # 3 out of 10000
[ 0.00000000e+00  7.95731469e+08 -7.24891077e-02 -1.35999953e-02
 -4.20001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.23311010e-02 -1.64283998e-01  6.00000000e-01]
Read chunk # 5 out of 10000
[ 0.00000000e+00  7.95731469e+08  7.01089229e-03 -4.19999531e-03
  7.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.21623335e-01 -2.86362315e-02 -8.00043364e-02]
Read chunk # 7 out of 10000
[ 0.00000000e+00  7.95731469e+08 -1.03989108e-01 -1.37999953e-02
  7.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.76352555e-01 -3.26280816e-01  6.00000000e-01]
Read chunk # 9 out of 10000
torch.Size([111080, 6])
2022-04-07 19:45.34 [debug    ] RoundIterator is selected.
2022-04-07 19:45.34 [info     ] Directory is created at d3rlpy_logs/FQE_2022040719453

Epoch 1/50:   0%|          | 0/878 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-07 19:45.37 [info     ] FQE_20220407194534: epoch=1 step=878 epoch=1 metrics={'time_sample_batch': 0.00015500764911973122, 'time_algorithm_update': 0.0021512693044536475, 'loss': 0.0008598907446906451, 'time_step': 0.0023798494512779567, 'init_value': -0.3687102198600769, 'ave_value': -0.36885178882223535, 'soft_opc': nan} step=878




2022-04-07 19:45.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_878.pt


Epoch 2/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:45.39 [info     ] FQE_20220407194534: epoch=2 step=1756 epoch=2 metrics={'time_sample_batch': 0.0001387566260161867, 'time_algorithm_update': 0.0018661529979836153, 'loss': 0.0017972966269422025, 'time_step': 0.0020636958250423772, 'init_value': -0.4670436978340149, 'ave_value': -0.46696345135451456, 'soft_opc': nan} step=1756




2022-04-07 19:45.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_1756.pt


Epoch 3/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:45.41 [info     ] FQE_20220407194534: epoch=3 step=2634 epoch=3 metrics={'time_sample_batch': 0.00015332378396140688, 'time_algorithm_update': 0.0021614496148528706, 'loss': 0.0028082218235471962, 'time_step': 0.0023881615158637183, 'init_value': -0.6038172245025635, 'ave_value': -0.603597512889957, 'soft_opc': nan} step=2634




2022-04-07 19:45.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_2634.pt


Epoch 4/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:45.43 [info     ] FQE_20220407194534: epoch=4 step=3512 epoch=4 metrics={'time_sample_batch': 0.00014146884101398443, 'time_algorithm_update': 0.001892091201486783, 'loss': 0.004026786944634962, 'time_step': 0.0020962252975324836, 'init_value': -0.6947826147079468, 'ave_value': -0.6943413370377117, 'soft_opc': nan} step=3512




2022-04-07 19:45.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_3512.pt


Epoch 5/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:45.45 [info     ] FQE_20220407194534: epoch=5 step=4390 epoch=5 metrics={'time_sample_batch': 0.00013859071058129938, 'time_algorithm_update': 0.0019121669691081471, 'loss': 0.005384607592049404, 'time_step': 0.0021173644446023233, 'init_value': -0.799199104309082, 'ave_value': -0.7987060819290123, 'soft_opc': nan} step=4390




2022-04-07 19:45.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_4390.pt


Epoch 6/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:45.48 [info     ] FQE_20220407194534: epoch=6 step=5268 epoch=6 metrics={'time_sample_batch': 0.00014471926287257863, 'time_algorithm_update': 0.0019226383782736532, 'loss': 0.00646480936772785, 'time_step': 0.002136452051392992, 'init_value': -0.8538197875022888, 'ave_value': -0.8532566686317412, 'soft_opc': nan} step=5268




2022-04-07 19:45.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_5268.pt


Epoch 7/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:45.50 [info     ] FQE_20220407194534: epoch=7 step=6146 epoch=7 metrics={'time_sample_batch': 0.00013866321372551363, 'time_algorithm_update': 0.0018176310154734549, 'loss': 0.007539895954599643, 'time_step': 0.0020237286704547857, 'init_value': -0.9249391555786133, 'ave_value': -0.9244862327936599, 'soft_opc': nan} step=6146




2022-04-07 19:45.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_6146.pt


Epoch 8/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:45.52 [info     ] FQE_20220407194534: epoch=8 step=7024 epoch=8 metrics={'time_sample_batch': 0.0001402338436361326, 'time_algorithm_update': 0.0018686466173593438, 'loss': 0.008464527814796636, 'time_step': 0.0020743307057165612, 'init_value': -0.9299455285072327, 'ave_value': -0.9293143372175653, 'soft_opc': nan} step=7024




2022-04-07 19:45.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_7024.pt


Epoch 9/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:45.54 [info     ] FQE_20220407194534: epoch=9 step=7902 epoch=9 metrics={'time_sample_batch': 0.0001518158814901643, 'time_algorithm_update': 0.0021846367720862454, 'loss': 0.008821962090347983, 'time_step': 0.0024111471849584906, 'init_value': -0.9726646542549133, 'ave_value': -0.9719792439458421, 'soft_opc': nan} step=7902




2022-04-07 19:45.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_7902.pt


Epoch 10/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:45.56 [info     ] FQE_20220407194534: epoch=10 step=8780 epoch=10 metrics={'time_sample_batch': 0.00014454330018549684, 'time_algorithm_update': 0.0018852006873128625, 'loss': 0.009951402157921434, 'time_step': 0.0020987797435160923, 'init_value': -1.0245360136032104, 'ave_value': -1.02387508308131, 'soft_opc': nan} step=8780




2022-04-07 19:45.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_8780.pt


Epoch 11/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:45.58 [info     ] FQE_20220407194534: epoch=11 step=9658 epoch=11 metrics={'time_sample_batch': 0.0001353967705728796, 'time_algorithm_update': 0.0018439740960853246, 'loss': 0.011305126708308777, 'time_step': 0.0020352946870147775, 'init_value': -1.0842629671096802, 'ave_value': -1.0835329852210238, 'soft_opc': nan} step=9658




2022-04-07 19:45.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_9658.pt


Epoch 12/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.01 [info     ] FQE_20220407194534: epoch=12 step=10536 epoch=12 metrics={'time_sample_batch': 0.00013791944551576514, 'time_algorithm_update': 0.0018667039675701724, 'loss': 0.011853593341788267, 'time_step': 0.002060322935325953, 'init_value': -1.114078402519226, 'ave_value': -1.1132142047829812, 'soft_opc': nan} step=10536




2022-04-07 19:46.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_10536.pt


Epoch 13/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.03 [info     ] FQE_20220407194534: epoch=13 step=11414 epoch=13 metrics={'time_sample_batch': 0.000142137119058596, 'time_algorithm_update': 0.0018660468229671817, 'loss': 0.01198998790077542, 'time_step': 0.002067867878629298, 'init_value': -1.1067391633987427, 'ave_value': -1.1058548577114655, 'soft_opc': nan} step=11414




2022-04-07 19:46.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_11414.pt


Epoch 14/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.05 [info     ] FQE_20220407194534: epoch=14 step=12292 epoch=14 metrics={'time_sample_batch': 0.00014107455425219003, 'time_algorithm_update': 0.0019037476433165252, 'loss': 0.012680178666295451, 'time_step': 0.0021051497014073954, 'init_value': -1.1482951641082764, 'ave_value': -1.1472832891082194, 'soft_opc': nan} step=12292




2022-04-07 19:46.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_12292.pt


Epoch 15/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.07 [info     ] FQE_20220407194534: epoch=15 step=13170 epoch=15 metrics={'time_sample_batch': 0.00013761721330786078, 'time_algorithm_update': 0.0019023771438077262, 'loss': 0.013298806494440173, 'time_step': 0.0020974274376801857, 'init_value': -1.158900499343872, 'ave_value': -1.1579638829388719, 'soft_opc': nan} step=13170




2022-04-07 19:46.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_13170.pt


Epoch 16/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.09 [info     ] FQE_20220407194534: epoch=16 step=14048 epoch=16 metrics={'time_sample_batch': 0.00013663312568751445, 'time_algorithm_update': 0.001859648624150791, 'loss': 0.013820636291186522, 'time_step': 0.002055541801018161, 'init_value': -1.133089542388916, 'ave_value': -1.131977541294306, 'soft_opc': nan} step=14048




2022-04-07 19:46.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_14048.pt


Epoch 17/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.11 [info     ] FQE_20220407194534: epoch=17 step=14926 epoch=17 metrics={'time_sample_batch': 0.00012852336388242543, 'time_algorithm_update': 0.0016702867043045496, 'loss': 0.013923142377594778, 'time_step': 0.0018544083032629756, 'init_value': -1.1906569004058838, 'ave_value': -1.1895094598857487, 'soft_opc': nan} step=14926




2022-04-07 19:46.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_14926.pt


Epoch 18/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.13 [info     ] FQE_20220407194534: epoch=18 step=15804 epoch=18 metrics={'time_sample_batch': 0.00012638710082799265, 'time_algorithm_update': 0.0016507407255759272, 'loss': 0.014625608667886234, 'time_step': 0.0018286148345008797, 'init_value': -1.2275172472000122, 'ave_value': -1.2263318071812668, 'soft_opc': nan} step=15804




2022-04-07 19:46.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_15804.pt


Epoch 19/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.15 [info     ] FQE_20220407194534: epoch=19 step=16682 epoch=19 metrics={'time_sample_batch': 0.0001334057853542319, 'time_algorithm_update': 0.001778106483077134, 'loss': 0.015376418721865535, 'time_step': 0.0019727453827043328, 'init_value': -1.2541077136993408, 'ave_value': -1.2529907390767532, 'soft_opc': nan} step=16682




2022-04-07 19:46.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_16682.pt


Epoch 20/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.17 [info     ] FQE_20220407194534: epoch=20 step=17560 epoch=20 metrics={'time_sample_batch': 0.00012731090493517204, 'time_algorithm_update': 0.0016822225683642412, 'loss': 0.015405189241482938, 'time_step': 0.0018648517430507512, 'init_value': -1.2258505821228027, 'ave_value': -1.224869961066755, 'soft_opc': nan} step=17560




2022-04-07 19:46.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_17560.pt


Epoch 21/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.19 [info     ] FQE_20220407194534: epoch=21 step=18438 epoch=21 metrics={'time_sample_batch': 0.0001292204259468374, 'time_algorithm_update': 0.0016824916717946394, 'loss': 0.01518524447646012, 'time_step': 0.0018689322851784952, 'init_value': -1.2478523254394531, 'ave_value': -1.2468143310938677, 'soft_opc': nan} step=18438




2022-04-07 19:46.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_18438.pt


Epoch 22/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.21 [info     ] FQE_20220407194534: epoch=22 step=19316 epoch=22 metrics={'time_sample_batch': 0.00013190412847219132, 'time_algorithm_update': 0.001704399840978392, 'loss': 0.015791774910989455, 'time_step': 0.0018919337040199506, 'init_value': -1.2798423767089844, 'ave_value': -1.2787114192152518, 'soft_opc': nan} step=19316




2022-04-07 19:46.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_19316.pt


Epoch 23/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.23 [info     ] FQE_20220407194534: epoch=23 step=20194 epoch=23 metrics={'time_sample_batch': 0.00012256534333109583, 'time_algorithm_update': 0.0015752551767440482, 'loss': 0.016143764134213808, 'time_step': 0.0017542798318189478, 'init_value': -1.3036279678344727, 'ave_value': -1.3024494032197367, 'soft_opc': nan} step=20194




2022-04-07 19:46.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_20194.pt


Epoch 24/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.25 [info     ] FQE_20220407194534: epoch=24 step=21072 epoch=24 metrics={'time_sample_batch': 0.0001315926636541623, 'time_algorithm_update': 0.0017463997990775487, 'loss': 0.015413191768256554, 'time_step': 0.001933620839564295, 'init_value': -1.2606450319290161, 'ave_value': -1.259656531196706, 'soft_opc': nan} step=21072




2022-04-07 19:46.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_21072.pt


Epoch 25/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.26 [info     ] FQE_20220407194534: epoch=25 step=21950 epoch=25 metrics={'time_sample_batch': 0.00013158071557047154, 'time_algorithm_update': 0.0017041752713144777, 'loss': 0.01612325943138953, 'time_step': 0.001893798962812489, 'init_value': -1.302064299583435, 'ave_value': -1.3011092287358985, 'soft_opc': nan} step=21950




2022-04-07 19:46.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_21950.pt


Epoch 26/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.29 [info     ] FQE_20220407194534: epoch=26 step=22828 epoch=26 metrics={'time_sample_batch': 0.00012892987027526717, 'time_algorithm_update': 0.0017948734461582332, 'loss': 0.016046842463650145, 'time_step': 0.001980816584784784, 'init_value': -1.2567006349563599, 'ave_value': -1.255636820406209, 'soft_opc': nan} step=22828




2022-04-07 19:46.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_22828.pt


Epoch 27/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.30 [info     ] FQE_20220407194534: epoch=27 step=23706 epoch=27 metrics={'time_sample_batch': 0.00013349838300283514, 'time_algorithm_update': 0.0017253730726133447, 'loss': 0.016259032489934374, 'time_step': 0.0019187248377702231, 'init_value': -1.3349992036819458, 'ave_value': -1.333977972623657, 'soft_opc': nan} step=23706




2022-04-07 19:46.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_23706.pt


Epoch 28/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.32 [info     ] FQE_20220407194534: epoch=28 step=24584 epoch=28 metrics={'time_sample_batch': 0.00013244206378563119, 'time_algorithm_update': 0.0017551056073303918, 'loss': 0.018103349936926423, 'time_step': 0.0019431516086865124, 'init_value': -1.3442445993423462, 'ave_value': -1.3433268169683514, 'soft_opc': nan} step=24584




2022-04-07 19:46.32 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_24584.pt


Epoch 29/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.34 [info     ] FQE_20220407194534: epoch=29 step=25462 epoch=29 metrics={'time_sample_batch': 0.00013330802830585314, 'time_algorithm_update': 0.0017351447042408727, 'loss': 0.017786253522612705, 'time_step': 0.0019271452497512712, 'init_value': -1.3210583925247192, 'ave_value': -1.320212381343541, 'soft_opc': nan} step=25462




2022-04-07 19:46.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_25462.pt


Epoch 30/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.36 [info     ] FQE_20220407194534: epoch=30 step=26340 epoch=30 metrics={'time_sample_batch': 0.00013336885491373326, 'time_algorithm_update': 0.0017190215799422904, 'loss': 0.017222864022904245, 'time_step': 0.0019107315697811187, 'init_value': -1.3382995128631592, 'ave_value': -1.3375704140206763, 'soft_opc': nan} step=26340




2022-04-07 19:46.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_26340.pt


Epoch 31/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.38 [info     ] FQE_20220407194534: epoch=31 step=27218 epoch=31 metrics={'time_sample_batch': 0.00012889728459247423, 'time_algorithm_update': 0.0016197552713554922, 'loss': 0.016068026283147636, 'time_step': 0.0018046562112273823, 'init_value': -1.2550493478775024, 'ave_value': -1.2542832214644288, 'soft_opc': nan} step=27218




2022-04-07 19:46.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_27218.pt


Epoch 32/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.40 [info     ] FQE_20220407194534: epoch=32 step=28096 epoch=32 metrics={'time_sample_batch': 0.0001344732380130568, 'time_algorithm_update': 0.0018349646978877943, 'loss': 0.01561450435236459, 'time_step': 0.002026320861520963, 'init_value': -1.2487380504608154, 'ave_value': -1.2478636050967304, 'soft_opc': nan} step=28096




2022-04-07 19:46.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_28096.pt


Epoch 33/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.42 [info     ] FQE_20220407194534: epoch=33 step=28974 epoch=33 metrics={'time_sample_batch': 0.00012557653196851865, 'time_algorithm_update': 0.00163176906950653, 'loss': 0.01632706322407838, 'time_step': 0.0018116610468384346, 'init_value': -1.2552908658981323, 'ave_value': -1.254358465401986, 'soft_opc': nan} step=28974




2022-04-07 19:46.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_28974.pt


Epoch 34/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.44 [info     ] FQE_20220407194534: epoch=34 step=29852 epoch=34 metrics={'time_sample_batch': 0.0001216999219055871, 'time_algorithm_update': 0.0015488127097996601, 'loss': 0.015654533272900277, 'time_step': 0.0017241174376363907, 'init_value': -1.2443311214447021, 'ave_value': -1.2434401816878353, 'soft_opc': nan} step=29852




2022-04-07 19:46.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_29852.pt


Epoch 35/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.46 [info     ] FQE_20220407194534: epoch=35 step=30730 epoch=35 metrics={'time_sample_batch': 0.00012479311784470543, 'time_algorithm_update': 0.0016010923646305579, 'loss': 0.015312866015812028, 'time_step': 0.001781922538079542, 'init_value': -1.2515242099761963, 'ave_value': -1.2504565081918413, 'soft_opc': nan} step=30730




2022-04-07 19:46.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_30730.pt


Epoch 36/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.48 [info     ] FQE_20220407194534: epoch=36 step=31608 epoch=36 metrics={'time_sample_batch': 0.00012445422674365903, 'time_algorithm_update': 0.0017029682433143563, 'loss': 0.01456209137092477, 'time_step': 0.001881215458307288, 'init_value': -1.2282742261886597, 'ave_value': -1.22722280742833, 'soft_opc': nan} step=31608




2022-04-07 19:46.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_31608.pt


Epoch 37/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.49 [info     ] FQE_20220407194534: epoch=37 step=32486 epoch=37 metrics={'time_sample_batch': 0.00011863578153362578, 'time_algorithm_update': 0.0015296623755696151, 'loss': 0.015400217440799613, 'time_step': 0.0017016501524453826, 'init_value': -1.28080415725708, 'ave_value': -1.2796953822783803, 'soft_opc': nan} step=32486




2022-04-07 19:46.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_32486.pt


Epoch 38/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.51 [info     ] FQE_20220407194534: epoch=38 step=33364 epoch=38 metrics={'time_sample_batch': 0.00012269106975720522, 'time_algorithm_update': 0.001561924373913463, 'loss': 0.016150363038562075, 'time_step': 0.0017368359011778257, 'init_value': -1.2737716436386108, 'ave_value': -1.2726497722394539, 'soft_opc': nan} step=33364




2022-04-07 19:46.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_33364.pt


Epoch 39/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.53 [info     ] FQE_20220407194534: epoch=39 step=34242 epoch=39 metrics={'time_sample_batch': 0.00012484661267395716, 'time_algorithm_update': 0.0016125674128260863, 'loss': 0.016114571493919795, 'time_step': 0.0017927939365013313, 'init_value': -1.2449063062667847, 'ave_value': -1.2439748982283367, 'soft_opc': nan} step=34242




2022-04-07 19:46.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_34242.pt


Epoch 40/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.55 [info     ] FQE_20220407194534: epoch=40 step=35120 epoch=40 metrics={'time_sample_batch': 0.00012045324799140113, 'time_algorithm_update': 0.0015667215295152948, 'loss': 0.01525798887912945, 'time_step': 0.0017397360269463958, 'init_value': -1.213791012763977, 'ave_value': -1.2126224225628024, 'soft_opc': nan} step=35120




2022-04-07 19:46.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_35120.pt


Epoch 41/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.57 [info     ] FQE_20220407194534: epoch=41 step=35998 epoch=41 metrics={'time_sample_batch': 0.00012371941959667858, 'time_algorithm_update': 0.0015873181249666322, 'loss': 0.015171062889312909, 'time_step': 0.0017665434535380647, 'init_value': -1.2403626441955566, 'ave_value': -1.2392172092608893, 'soft_opc': nan} step=35998




2022-04-07 19:46.57 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_35998.pt


Epoch 42/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:46.59 [info     ] FQE_20220407194534: epoch=42 step=36876 epoch=42 metrics={'time_sample_batch': 0.00012543451270101284, 'time_algorithm_update': 0.0017313101840182154, 'loss': 0.014984252791057216, 'time_step': 0.0019082550578888564, 'init_value': -1.1852749586105347, 'ave_value': -1.184285222115214, 'soft_opc': nan} step=36876




2022-04-07 19:46.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_36876.pt


Epoch 43/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:47.00 [info     ] FQE_20220407194534: epoch=43 step=37754 epoch=43 metrics={'time_sample_batch': 0.00012244966415718097, 'time_algorithm_update': 0.0016038765396778568, 'loss': 0.014161791607441236, 'time_step': 0.0017791763796621684, 'init_value': -1.2249411344528198, 'ave_value': -1.2239944430080538, 'soft_opc': nan} step=37754




2022-04-07 19:47.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_37754.pt


Epoch 44/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:47.02 [info     ] FQE_20220407194534: epoch=44 step=38632 epoch=44 metrics={'time_sample_batch': 0.00011993893729798615, 'time_algorithm_update': 0.001603225097569355, 'loss': 0.014332774985007938, 'time_step': 0.0017732609920458262, 'init_value': -1.222414493560791, 'ave_value': -1.221536228323823, 'soft_opc': nan} step=38632




2022-04-07 19:47.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_38632.pt


Epoch 45/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:47.04 [info     ] FQE_20220407194534: epoch=45 step=39510 epoch=45 metrics={'time_sample_batch': 0.00011992237290923307, 'time_algorithm_update': 0.0015474557876586914, 'loss': 0.014686701407864102, 'time_step': 0.001721575482831186, 'init_value': -1.1837443113327026, 'ave_value': -1.1829041288982953, 'soft_opc': nan} step=39510




2022-04-07 19:47.04 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_39510.pt


Epoch 46/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:47.06 [info     ] FQE_20220407194534: epoch=46 step=40388 epoch=46 metrics={'time_sample_batch': 0.0001267593922539018, 'time_algorithm_update': 0.0016920756641987517, 'loss': 0.01385601655830548, 'time_step': 0.001873012284211526, 'init_value': -1.1505485773086548, 'ave_value': -1.1497782524628728, 'soft_opc': nan} step=40388




2022-04-07 19:47.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_40388.pt


Epoch 47/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:47.08 [info     ] FQE_20220407194534: epoch=47 step=41266 epoch=47 metrics={'time_sample_batch': 0.00013932823320184591, 'time_algorithm_update': 0.0018765453868683486, 'loss': 0.013917773804940366, 'time_step': 0.0020758372504510207, 'init_value': -1.1997215747833252, 'ave_value': -1.198894922365695, 'soft_opc': nan} step=41266




2022-04-07 19:47.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_41266.pt


Epoch 48/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:47.10 [info     ] FQE_20220407194534: epoch=48 step=42144 epoch=48 metrics={'time_sample_batch': 0.00013452428891609904, 'time_algorithm_update': 0.001918101093492095, 'loss': 0.014974607847378673, 'time_step': 0.002109478980913792, 'init_value': -1.2517588138580322, 'ave_value': -1.2508943391655127, 'soft_opc': nan} step=42144




2022-04-07 19:47.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_42144.pt


Epoch 49/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:47.12 [info     ] FQE_20220407194534: epoch=49 step=43022 epoch=49 metrics={'time_sample_batch': 0.0001245566000971001, 'time_algorithm_update': 0.0016602313056893665, 'loss': 0.015639378331149097, 'time_step': 0.0018412537346668286, 'init_value': -1.2574328184127808, 'ave_value': -1.256622122191225, 'soft_opc': nan} step=43022




2022-04-07 19:47.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_43022.pt


Epoch 50/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:47.14 [info     ] FQE_20220407194534: epoch=50 step=43900 epoch=50 metrics={'time_sample_batch': 0.00012273913363932478, 'time_algorithm_update': 0.001632396886995007, 'loss': 0.015797932613698454, 'time_step': 0.0018077195369572737, 'init_value': -1.2501708269119263, 'ave_value': -1.2492717514301808, 'soft_opc': nan} step=43900




2022-04-07 19:47.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194534/model_43900.pt


[(1,
  {'time_sample_batch': 0.00015500764911973122,
   'time_algorithm_update': 0.0021512693044536475,
   'loss': 0.0008598907446906451,
   'time_step': 0.0023798494512779567,
   'init_value': -0.3687102198600769,
   'ave_value': -0.36885178882223535,
   'soft_opc': nan}),
 (2,
  {'time_sample_batch': 0.0001387566260161867,
   'time_algorithm_update': 0.0018661529979836153,
   'loss': 0.0017972966269422025,
   'time_step': 0.0020636958250423772,
   'init_value': -0.4670436978340149,
   'ave_value': -0.46696345135451456,
   'soft_opc': nan}),
 (3,
  {'time_sample_batch': 0.00015332378396140688,
   'time_algorithm_update': 0.0021614496148528706,
   'loss': 0.0028082218235471962,
   'time_step': 0.0023881615158637183,
   'init_value': -0.6038172245025635,
   'ave_value': -0.603597512889957,
   'soft_opc': nan}),
 (4,
  {'time_sample_batch': 0.00014146884101398443,
   'time_algorithm_update': 0.001892091201486783,
   'loss': 0.004026786944634962,
   'time_step': 0.0020962252975324836,
   

In [11]:
from d3rlpy.ope import FQE
# metrics to evaluate with
from d3rlpy.metrics.scorer import soft_opc_scorer


ope_dataset = get_dataset([2,4,6,8], path="collected_data/rl_stoch_small.txt") #change if you'd prefer different chunks
ope_train_episodes, ope_test_episodes = train_test_split(ope_dataset, test_size=0.2)

fqe = FQE(algo=model, action_scaler = action_scaler, use_gpu=False) #change this if you have one!
fqe.fit(ope_train_episodes, eval_episodes=ope_test_episodes,
        tensorboard_dir='runs',
        n_epochs=50, n_steps_per_epoch=1000, #change if overfitting/underfitting
        scorers={
           'init_value': initial_state_value_estimation_scorer,
            'ave_value': average_value_estimation_scorer,
           'soft_opc': soft_opc_scorer(return_threshold=0)
        })

[ 0.00000000e+00  7.95731469e+08  1.39310892e-01  1.82000047e-02
 -1.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -7.50230117e-02  3.69851546e-01 -6.00000000e-01]
Read chunk # 3 out of 10000
[ 0.00000000e+00  7.95731469e+08 -1.15389108e-01  1.64000047e-02
 -8.80001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.08831172e-01 -2.48178665e-01  6.00000000e-01]
Read chunk # 5 out of 10000
[ 0.00000000e+00  7.95731469e+08 -1.28589108e-01  1.20000047e-02
  1.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.75212759e-03 -3.52719043e-01  6.00000000e-01]
Read chunk # 7 out of 10000
[ 0.00000000e+00  7.95731469e+08 -1.03989108e-01  7.00000469e-03
 -8.60001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.65974295e-01 -2.19295880e-01  6.00000000e-01]
Read chunk # 9 out of 10000
torch.Size([111080, 6])
2022-04-07 19:47.15 [debug    ] RoundIterator is selected.
2022-04-07 19:47.15 [info     ] Directory is created at d3rlpy_logs/FQE_2022040719471

Epoch 1/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:47.17 [info     ] FQE_20220407194715: epoch=1 step=878 epoch=1 metrics={'time_sample_batch': 0.00012011843010070384, 'time_algorithm_update': 0.0016416873367066263, 'loss': 0.0005790919066507191, 'time_step': 0.0018213905886256885, 'init_value': -0.2699924111366272, 'ave_value': -0.2707972494631286, 'soft_opc': nan} step=878




2022-04-07 19:47.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_878.pt


Epoch 2/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:47.19 [info     ] FQE_20220407194715: epoch=2 step=1756 epoch=2 metrics={'time_sample_batch': 0.00012525257597208566, 'time_algorithm_update': 0.0017642578394374978, 'loss': 0.0016421610482464442, 'time_step': 0.0019463102475385731, 'init_value': -0.4241451919078827, 'ave_value': -0.42590119195731213, 'soft_opc': nan} step=1756




2022-04-07 19:47.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_1756.pt


Epoch 3/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:47.21 [info     ] FQE_20220407194715: epoch=3 step=2634 epoch=3 metrics={'time_sample_batch': 0.00012309458912912425, 'time_algorithm_update': 0.0017062292555198582, 'loss': 0.0032858520809319765, 'time_step': 0.0018910036543235691, 'init_value': -0.5432124137878418, 'ave_value': -0.5457452573710693, 'soft_opc': nan} step=2634




2022-04-07 19:47.21 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_2634.pt


Epoch 4/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:47.23 [info     ] FQE_20220407194715: epoch=4 step=3512 epoch=4 metrics={'time_sample_batch': 0.00012767043363532063, 'time_algorithm_update': 0.0018168163734036318, 'loss': 0.005045686625876784, 'time_step': 0.0020041721013792555, 'init_value': -0.645917534828186, 'ave_value': -0.6488111254041943, 'soft_opc': nan} step=3512




2022-04-07 19:47.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_3512.pt


Epoch 5/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:47.26 [info     ] FQE_20220407194715: epoch=5 step=4390 epoch=5 metrics={'time_sample_batch': 0.00015785726707997247, 'time_algorithm_update': 0.002833164634357009, 'loss': 0.007545049979664973, 'time_step': 0.0030690016127393025, 'init_value': -0.8148442506790161, 'ave_value': -0.8183561642084529, 'soft_opc': nan} step=4390




2022-04-07 19:47.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_4390.pt


Epoch 6/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:47.28 [info     ] FQE_20220407194715: epoch=6 step=5268 epoch=6 metrics={'time_sample_batch': 0.00015903170939730077, 'time_algorithm_update': 0.0024048516310428976, 'loss': 0.009360201107241195, 'time_step': 0.002639558972421703, 'init_value': -0.8830693960189819, 'ave_value': -0.8871849077280953, 'soft_opc': nan} step=5268




2022-04-07 19:47.28 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_5268.pt


Epoch 7/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:47.31 [info     ] FQE_20220407194715: epoch=7 step=6146 epoch=7 metrics={'time_sample_batch': 0.0001454030191165168, 'time_algorithm_update': 0.00228766727013034, 'loss': 0.010717078875546196, 'time_step': 0.002498147156352606, 'init_value': -0.9017384648323059, 'ave_value': -0.9060965530297924, 'soft_opc': nan} step=6146




2022-04-07 19:47.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_6146.pt


Epoch 8/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:47.33 [info     ] FQE_20220407194715: epoch=8 step=7024 epoch=8 metrics={'time_sample_batch': 0.0001450326285221039, 'time_algorithm_update': 0.002096773823192831, 'loss': 0.011176214950371886, 'time_step': 0.002306412455704326, 'init_value': -0.8924575448036194, 'ave_value': -0.897269057460511, 'soft_opc': nan} step=7024




2022-04-07 19:47.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_7024.pt


Epoch 9/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:47.36 [info     ] FQE_20220407194715: epoch=9 step=7902 epoch=9 metrics={'time_sample_batch': 0.00013731986895237534, 'time_algorithm_update': 0.001997587621076232, 'loss': 0.011962102437758908, 'time_step': 0.0021917228003569236, 'init_value': -0.9497050046920776, 'ave_value': -0.9546935201088101, 'soft_opc': nan} step=7902




2022-04-07 19:47.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_7902.pt


Epoch 10/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:47.38 [info     ] FQE_20220407194715: epoch=10 step=8780 epoch=10 metrics={'time_sample_batch': 0.0001455675768146211, 'time_algorithm_update': 0.0021242459977134755, 'loss': 0.01332029815141787, 'time_step': 0.0023323150865037783, 'init_value': -1.008182406425476, 'ave_value': -1.0130127867791847, 'soft_opc': nan} step=8780




2022-04-07 19:47.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_8780.pt


Epoch 11/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:47.40 [info     ] FQE_20220407194715: epoch=11 step=9658 epoch=11 metrics={'time_sample_batch': 0.00014378812698677082, 'time_algorithm_update': 0.002144754340273916, 'loss': 0.014207990951143484, 'time_step': 0.0023525714331172863, 'init_value': -1.0584899187088013, 'ave_value': -1.0635352596730356, 'soft_opc': nan} step=9658




2022-04-07 19:47.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_9658.pt


Epoch 12/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:47.43 [info     ] FQE_20220407194715: epoch=12 step=10536 epoch=12 metrics={'time_sample_batch': 0.00014734974211603742, 'time_algorithm_update': 0.002325884302004595, 'loss': 0.015208486926656443, 'time_step': 0.0025395323312364026, 'init_value': -1.0359933376312256, 'ave_value': -1.0409320683622294, 'soft_opc': nan} step=10536




2022-04-07 19:47.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_10536.pt


Epoch 13/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:47.45 [info     ] FQE_20220407194715: epoch=13 step=11414 epoch=13 metrics={'time_sample_batch': 0.00014770058130077456, 'time_algorithm_update': 0.0021604508636752675, 'loss': 0.01443603255323075, 'time_step': 0.002377889422457962, 'init_value': -1.020995855331421, 'ave_value': -1.0258524456917064, 'soft_opc': nan} step=11414




2022-04-07 19:47.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_11414.pt


Epoch 14/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:47.48 [info     ] FQE_20220407194715: epoch=14 step=12292 epoch=14 metrics={'time_sample_batch': 0.00014785509174668435, 'time_algorithm_update': 0.002195029160970979, 'loss': 0.014443122110794583, 'time_step': 0.002407412594163065, 'init_value': -1.0594441890716553, 'ave_value': -1.064514109598811, 'soft_opc': nan} step=12292




2022-04-07 19:47.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_12292.pt


Epoch 15/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:47.50 [info     ] FQE_20220407194715: epoch=15 step=13170 epoch=15 metrics={'time_sample_batch': 0.00014573484998629143, 'time_algorithm_update': 0.0022322629198669574, 'loss': 0.015305571249153379, 'time_step': 0.00244148146861778, 'init_value': -1.0409061908721924, 'ave_value': -1.0464355908828666, 'soft_opc': nan} step=13170




2022-04-07 19:47.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_13170.pt


Epoch 16/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:47.53 [info     ] FQE_20220407194715: epoch=16 step=14048 epoch=16 metrics={'time_sample_batch': 0.0001493589210075778, 'time_algorithm_update': 0.0023780672859765404, 'loss': 0.01681168312680299, 'time_step': 0.002594751486202579, 'init_value': -1.12869393825531, 'ave_value': -1.1342145079527395, 'soft_opc': nan} step=14048




2022-04-07 19:47.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_14048.pt


Epoch 17/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:47.55 [info     ] FQE_20220407194715: epoch=17 step=14926 epoch=17 metrics={'time_sample_batch': 0.00014827327467586022, 'time_algorithm_update': 0.0022731500768987355, 'loss': 0.016017960015347204, 'time_step': 0.002488114296168021, 'init_value': -1.0459181070327759, 'ave_value': -1.0508810738109617, 'soft_opc': nan} step=14926




2022-04-07 19:47.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_14926.pt


Epoch 18/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:47.58 [info     ] FQE_20220407194715: epoch=18 step=15804 epoch=18 metrics={'time_sample_batch': 0.00015380985372973467, 'time_algorithm_update': 0.002545580505510126, 'loss': 0.014640103658765214, 'time_step': 0.0027648460349080774, 'init_value': -1.007827639579773, 'ave_value': -1.0130277787755677, 'soft_opc': nan} step=15804




2022-04-07 19:47.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_15804.pt


Epoch 19/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:48.01 [info     ] FQE_20220407194715: epoch=19 step=16682 epoch=19 metrics={'time_sample_batch': 0.000151643720466075, 'time_algorithm_update': 0.0022945355174210187, 'loss': 0.014279734284115164, 'time_step': 0.0025149388302431563, 'init_value': -1.009173035621643, 'ave_value': -1.0146043368637947, 'soft_opc': nan} step=16682




2022-04-07 19:48.01 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_16682.pt


Epoch 20/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:48.03 [info     ] FQE_20220407194715: epoch=20 step=17560 epoch=20 metrics={'time_sample_batch': 0.00015054096665089115, 'time_algorithm_update': 0.002382355561832089, 'loss': 0.014088582257674119, 'time_step': 0.002599793577520071, 'init_value': -0.9771550893783569, 'ave_value': -0.9827193153878078, 'soft_opc': nan} step=17560




2022-04-07 19:48.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_17560.pt


Epoch 21/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:48.06 [info     ] FQE_20220407194715: epoch=21 step=18438 epoch=21 metrics={'time_sample_batch': 0.00014404745471233117, 'time_algorithm_update': 0.0020987789288740223, 'loss': 0.014034033349062757, 'time_step': 0.002307251537036244, 'init_value': -0.9603726267814636, 'ave_value': -0.9662785915602278, 'soft_opc': nan} step=18438




2022-04-07 19:48.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_18438.pt


Epoch 22/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:48.08 [info     ] FQE_20220407194715: epoch=22 step=19316 epoch=22 metrics={'time_sample_batch': 0.00013832866738217294, 'time_algorithm_update': 0.002045743014654972, 'loss': 0.013661034672129844, 'time_step': 0.0022463421072123533, 'init_value': -0.9445369839668274, 'ave_value': -0.9499523526825119, 'soft_opc': nan} step=19316




2022-04-07 19:48.08 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_19316.pt


Epoch 23/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:48.10 [info     ] FQE_20220407194715: epoch=23 step=20194 epoch=23 metrics={'time_sample_batch': 0.00014723840769982827, 'time_algorithm_update': 0.002157996075571534, 'loss': 0.013897523014843304, 'time_step': 0.002371572687848554, 'init_value': -1.0299580097198486, 'ave_value': -1.035299172183604, 'soft_opc': nan} step=20194




2022-04-07 19:48.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_20194.pt


Epoch 24/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:48.13 [info     ] FQE_20220407194715: epoch=24 step=21072 epoch=24 metrics={'time_sample_batch': 0.00014270438148654947, 'time_algorithm_update': 0.002012543363288758, 'loss': 0.01388622367626951, 'time_step': 0.0022169541660908413, 'init_value': -1.0207420587539673, 'ave_value': -1.0257451268727242, 'soft_opc': nan} step=21072




2022-04-07 19:48.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_21072.pt


Epoch 25/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:48.15 [info     ] FQE_20220407194715: epoch=25 step=21950 epoch=25 metrics={'time_sample_batch': 0.0001462627380475368, 'time_algorithm_update': 0.002234361709386178, 'loss': 0.013774900815060019, 'time_step': 0.0024449097539949527, 'init_value': -0.9885272979736328, 'ave_value': -0.9936396478837171, 'soft_opc': nan} step=21950




2022-04-07 19:48.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_21950.pt


Epoch 26/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:48.17 [info     ] FQE_20220407194715: epoch=26 step=22828 epoch=26 metrics={'time_sample_batch': 0.0001472862000345912, 'time_algorithm_update': 0.0021351676323962373, 'loss': 0.013538869348473382, 'time_step': 0.0023456782034698, 'init_value': -0.9681068062782288, 'ave_value': -0.9731957886553483, 'soft_opc': nan} step=22828




2022-04-07 19:48.17 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_22828.pt


Epoch 27/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:48.20 [info     ] FQE_20220407194715: epoch=27 step=23706 epoch=27 metrics={'time_sample_batch': 0.00014263323607911825, 'time_algorithm_update': 0.002042268837774533, 'loss': 0.01308035251712762, 'time_step': 0.002242724281780269, 'init_value': -0.9832358360290527, 'ave_value': -0.9880763689703919, 'soft_opc': nan} step=23706




2022-04-07 19:48.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_23706.pt


Epoch 28/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:48.22 [info     ] FQE_20220407194715: epoch=28 step=24584 epoch=28 metrics={'time_sample_batch': 0.00013972061913214404, 'time_algorithm_update': 0.0019833751039787413, 'loss': 0.013558182040780308, 'time_step': 0.002184311186805673, 'init_value': -0.9646814465522766, 'ave_value': -0.9696414606286201, 'soft_opc': nan} step=24584




2022-04-07 19:48.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_24584.pt


Epoch 29/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:48.24 [info     ] FQE_20220407194715: epoch=29 step=25462 epoch=29 metrics={'time_sample_batch': 0.00014674554924758528, 'time_algorithm_update': 0.0020773361918594952, 'loss': 0.01340400861708012, 'time_step': 0.0022885424672606865, 'init_value': -0.9761149883270264, 'ave_value': -0.9809474990952354, 'soft_opc': nan} step=25462




2022-04-07 19:48.24 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_25462.pt


Epoch 30/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:48.27 [info     ] FQE_20220407194715: epoch=30 step=26340 epoch=30 metrics={'time_sample_batch': 0.00014278883271445447, 'time_algorithm_update': 0.002160825870574743, 'loss': 0.013897476628015548, 'time_step': 0.0023653097196157536, 'init_value': -1.0243659019470215, 'ave_value': -1.029184744590196, 'soft_opc': nan} step=26340




2022-04-07 19:48.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_26340.pt


Epoch 31/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:48.29 [info     ] FQE_20220407194715: epoch=31 step=27218 epoch=31 metrics={'time_sample_batch': 0.00014114760049111752, 'time_algorithm_update': 0.001999056149180766, 'loss': 0.014870946392291109, 'time_step': 0.002201563676560387, 'init_value': -1.0624487400054932, 'ave_value': -1.067451506661414, 'soft_opc': nan} step=27218




2022-04-07 19:48.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_27218.pt


Epoch 32/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:48.31 [info     ] FQE_20220407194715: epoch=32 step=28096 epoch=32 metrics={'time_sample_batch': 0.00015031178068191425, 'time_algorithm_update': 0.0022094176407555515, 'loss': 0.01580397056578727, 'time_step': 0.0024240511153444887, 'init_value': -1.107540488243103, 'ave_value': -1.1122769307613956, 'soft_opc': nan} step=28096




2022-04-07 19:48.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_28096.pt


Epoch 33/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:48.34 [info     ] FQE_20220407194715: epoch=33 step=28974 epoch=33 metrics={'time_sample_batch': 0.00014676129899426853, 'time_algorithm_update': 0.0021474847489446063, 'loss': 0.01674616938273635, 'time_step': 0.0023605125640139223, 'init_value': -1.2068020105361938, 'ave_value': -1.2112543260661859, 'soft_opc': nan} step=28974




2022-04-07 19:48.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_28974.pt


Epoch 34/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:48.36 [info     ] FQE_20220407194715: epoch=34 step=29852 epoch=34 metrics={'time_sample_batch': 0.00014734295343212222, 'time_algorithm_update': 0.0020985353508951453, 'loss': 0.017187848355940803, 'time_step': 0.002313583206750266, 'init_value': -1.139150619506836, 'ave_value': -1.1436132349446129, 'soft_opc': nan} step=29852




2022-04-07 19:48.36 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_29852.pt


Epoch 35/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:48.39 [info     ] FQE_20220407194715: epoch=35 step=30730 epoch=35 metrics={'time_sample_batch': 0.00015817524903456007, 'time_algorithm_update': 0.002349426914727769, 'loss': 0.016133714851908024, 'time_step': 0.0025794544089627973, 'init_value': -1.1254295110702515, 'ave_value': -1.1298894863505669, 'soft_opc': nan} step=30730




2022-04-07 19:48.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_30730.pt


Epoch 36/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:48.41 [info     ] FQE_20220407194715: epoch=36 step=31608 epoch=36 metrics={'time_sample_batch': 0.0001498596543331624, 'time_algorithm_update': 0.002215377019043664, 'loss': 0.01630922499294862, 'time_step': 0.0024282459789093645, 'init_value': -1.1483098268508911, 'ave_value': -1.1527254340624569, 'soft_opc': nan} step=31608




2022-04-07 19:48.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_31608.pt


Epoch 37/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:48.44 [info     ] FQE_20220407194715: epoch=37 step=32486 epoch=37 metrics={'time_sample_batch': 0.0001615644315923808, 'time_algorithm_update': 0.0024874536214493947, 'loss': 0.01616758457989745, 'time_step': 0.002720411383209576, 'init_value': -1.0980688333511353, 'ave_value': -1.1025539918733032, 'soft_opc': nan} step=32486




2022-04-07 19:48.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_32486.pt


Epoch 38/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:48.46 [info     ] FQE_20220407194715: epoch=38 step=33364 epoch=38 metrics={'time_sample_batch': 0.0001499432909189976, 'time_algorithm_update': 0.0021186651564404743, 'loss': 0.015986064538549297, 'time_step': 0.0023350237713859402, 'init_value': -1.138708472251892, 'ave_value': -1.1432574723014939, 'soft_opc': nan} step=33364




2022-04-07 19:48.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_33364.pt


Epoch 39/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:48.49 [info     ] FQE_20220407194715: epoch=39 step=34242 epoch=39 metrics={'time_sample_batch': 0.0001512765884399414, 'time_algorithm_update': 0.0022601016833190225, 'loss': 0.01566080172659085, 'time_step': 0.002478145249612239, 'init_value': -1.0939439535140991, 'ave_value': -1.09864159108407, 'soft_opc': nan} step=34242




2022-04-07 19:48.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_34242.pt


Epoch 40/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:48.51 [info     ] FQE_20220407194715: epoch=40 step=35120 epoch=40 metrics={'time_sample_batch': 0.0001363279064586874, 'time_algorithm_update': 0.0020515869853165265, 'loss': 0.015017621576824734, 'time_step': 0.0022481745087747423, 'init_value': -1.0186997652053833, 'ave_value': -1.0235288714662747, 'soft_opc': nan} step=35120




2022-04-07 19:48.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_35120.pt


Epoch 41/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:48.53 [info     ] FQE_20220407194715: epoch=41 step=35998 epoch=41 metrics={'time_sample_batch': 0.00013589505997225473, 'time_algorithm_update': 0.001881463924138584, 'loss': 0.014460833854733367, 'time_step': 0.0020781087440890442, 'init_value': -1.035244345664978, 'ave_value': -1.0397052007489442, 'soft_opc': nan} step=35998




2022-04-07 19:48.53 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_35998.pt


Epoch 42/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:48.56 [info     ] FQE_20220407194715: epoch=42 step=36876 epoch=42 metrics={'time_sample_batch': 0.00014376911867180827, 'time_algorithm_update': 0.0020905719531152675, 'loss': 0.01456390390334582, 'time_step': 0.002298118041696744, 'init_value': -1.0830506086349487, 'ave_value': -1.0873627410397968, 'soft_opc': nan} step=36876




2022-04-07 19:48.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_36876.pt


Epoch 43/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:48.58 [info     ] FQE_20220407194715: epoch=43 step=37754 epoch=43 metrics={'time_sample_batch': 0.00014270628231804572, 'time_algorithm_update': 0.0020296152740513274, 'loss': 0.014237122507621821, 'time_step': 0.0022344187343310657, 'init_value': -1.008405089378357, 'ave_value': -1.0127839076862974, 'soft_opc': nan} step=37754




2022-04-07 19:48.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_37754.pt


Epoch 44/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:49.00 [info     ] FQE_20220407194715: epoch=44 step=38632 epoch=44 metrics={'time_sample_batch': 0.00014174717705450732, 'time_algorithm_update': 0.0020422386960179495, 'loss': 0.01262598041832818, 'time_step': 0.0022453819157593887, 'init_value': -0.9811661243438721, 'ave_value': -0.9854814348545023, 'soft_opc': nan} step=38632




2022-04-07 19:49.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_38632.pt


Epoch 45/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:49.02 [info     ] FQE_20220407194715: epoch=45 step=39510 epoch=45 metrics={'time_sample_batch': 0.00013736847592920812, 'time_algorithm_update': 0.0019757033480598605, 'loss': 0.013166741835684372, 'time_step': 0.0021715215778568067, 'init_value': -1.0112711191177368, 'ave_value': -1.015539491914205, 'soft_opc': nan} step=39510




2022-04-07 19:49.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_39510.pt


Epoch 46/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:49.05 [info     ] FQE_20220407194715: epoch=46 step=40388 epoch=46 metrics={'time_sample_batch': 0.000139515057783192, 'time_algorithm_update': 0.002239037754866963, 'loss': 0.013041086338034802, 'time_step': 0.002436758173896946, 'init_value': -1.0293153524398804, 'ave_value': -1.0333668895458274, 'soft_opc': nan} step=40388




2022-04-07 19:49.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_40388.pt


Epoch 47/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:49.07 [info     ] FQE_20220407194715: epoch=47 step=41266 epoch=47 metrics={'time_sample_batch': 0.00014684059082239797, 'time_algorithm_update': 0.0022203976581199837, 'loss': 0.0134339064295934, 'time_step': 0.0024336359223906708, 'init_value': -1.0487778186798096, 'ave_value': -1.0526104984793936, 'soft_opc': nan} step=41266




2022-04-07 19:49.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_41266.pt


Epoch 48/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:49.10 [info     ] FQE_20220407194715: epoch=48 step=42144 epoch=48 metrics={'time_sample_batch': 0.0001383976404107513, 'time_algorithm_update': 0.002004898490558181, 'loss': 0.01391516288942516, 'time_step': 0.0022016440545779424, 'init_value': -1.0611566305160522, 'ave_value': -1.064918287818931, 'soft_opc': nan} step=42144




2022-04-07 19:49.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_42144.pt


Epoch 49/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:49.12 [info     ] FQE_20220407194715: epoch=49 step=43022 epoch=49 metrics={'time_sample_batch': 0.00014159348125066735, 'time_algorithm_update': 0.002073873148420677, 'loss': 0.014711927342006575, 'time_step': 0.002279046727747472, 'init_value': -1.0797172784805298, 'ave_value': -1.0836601058669069, 'soft_opc': nan} step=43022




2022-04-07 19:49.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_43022.pt


Epoch 50/50:   0%|          | 0/878 [00:00<?, ?it/s]



2022-04-07 19:49.14 [info     ] FQE_20220407194715: epoch=50 step=43900 epoch=50 metrics={'time_sample_batch': 0.00013469672148754493, 'time_algorithm_update': 0.0020765655404614424, 'loss': 0.014310937399106917, 'time_step': 0.0022711433419334045, 'init_value': -1.0202535390853882, 'ave_value': -1.0242416432717358, 'soft_opc': nan} step=43900




2022-04-07 19:49.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220407194715/model_43900.pt


[(1,
  {'time_sample_batch': 0.00012011843010070384,
   'time_algorithm_update': 0.0016416873367066263,
   'loss': 0.0005790919066507191,
   'time_step': 0.0018213905886256885,
   'init_value': -0.2699924111366272,
   'ave_value': -0.2707972494631286,
   'soft_opc': nan}),
 (2,
  {'time_sample_batch': 0.00012525257597208566,
   'time_algorithm_update': 0.0017642578394374978,
   'loss': 0.0016421610482464442,
   'time_step': 0.0019463102475385731,
   'init_value': -0.4241451919078827,
   'ave_value': -0.42590119195731213,
   'soft_opc': nan}),
 (3,
  {'time_sample_batch': 0.00012309458912912425,
   'time_algorithm_update': 0.0017062292555198582,
   'loss': 0.0032858520809319765,
   'time_step': 0.0018910036543235691,
   'init_value': -0.5432124137878418,
   'ave_value': -0.5457452573710693,
   'soft_opc': nan}),
 (4,
  {'time_sample_batch': 0.00012767043363532063,
   'time_algorithm_update': 0.0018168163734036318,
   'loss': 0.005045686625876784,
   'time_step': 0.0020041721013792555,
 