# Sample Workflow for d3rlpy Experiments

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools
import math
import subprocess
import os
import d3rlpy
plt.style.use('matplotlibrc')

from Python.data_sampler import *

## Building an MDPDataset

We first read in a large batch of samples from the file. As `d3rlpy` wants it in the form (observations, actions, rewards, terminal flags), we go ahead and do that. Here's a helper function to get a dataset from a list of chunks of your choosing.

In [2]:
def get_dataset(chunks : list, batch_size=30000, 
                path="collected_data/rl_deterministic.txt") -> d3rlpy.dataset.MDPDataset :
    random.seed(0)
    samples = DataSampler(path_to_data="collected_data/rl_stochpid.txt")
    samples.setting("coarse")
    states = []
    actions = []
    rewards = []
    next_states = []
    for chunk in chunks:
        samples.use_chunk(chunk)
        samples.read_chunk()
        [statesChunk, actionsChunk, rewardsChunk, nextStatesChunk] = samples.get_batch(batch_size)
        states.append(statesChunk)
        actions.append(actionsChunk)
        rewards.append(rewardsChunk)
        next_states.append(nextStatesChunk)
    states = torch.cat(states)
    actions = torch.cat(actions)
    rewards = torch.cat(rewards)
    next_states = torch.cat(next_states)
    terminals = np.zeros(len(states))
    terminals[::1111] = 1 #episode length 100, change if necessary
    print(states.shape)
    dataset = d3rlpy.dataset.MDPDataset(states.numpy(), 
                                        actions.numpy(), 
                                        rewards.numpy(), terminals)
    return dataset

We can build the dataset from there, just like this, and split into train and test sets.

In [3]:
dataset = get_dataset([i for i in range(2000)])

start
[ 0.00000000e+00  7.95731469e+08 -4.75891077e-02 -3.69999953e-02
  2.00999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.50429671e-01 -4.92727243e-01 -5.31666025e-03]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.25610892e-01 -3.35999953e-02
 -2.42000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.08749986e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.90489108e-01 -5.87999953e-02
 -1.01000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.76979602e-02 -6.00000000e-01  6.00000000e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.48010892e-01  4.64000047e-02
 -1.04000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.83151637e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.08389108e-01  3.32000047e-02
 -2.02000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.25137655

[ 0.00000000e+00  7.95731469e+08  4.45610892e-01 -4.15999953e-02
 -1.45000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.82854667e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 51 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.36410892e-01 -3.37999953e-02
  8.49998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.45025378e-01  1.67203680e-01 -6.00000000e-01]
Read chunk # 52 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.02189108e-01 -2.81999953e-02
  1.33999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.85653844e-01 -2.69594451e-01  6.00000000e-01]
Read chunk # 53 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.06389108e-01 -5.79999531e-03
 -2.02000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -3.87850514e-02  6.00000000e-01]
Read chunk # 54 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -6.79891077e-02 -1.81999953e-02
  2.77999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-

Read chunk # 102 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.28189108e-01  9.80000469e-03
  2.00999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 103 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.24010892e-01  2.32000047e-02
 -2.93000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 104 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.26989108e-01 -5.97999953e-02
 -1.10000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.62172095e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 105 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.14210892e-01  1.12000047e-02
  1.89998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  9.04351236e-02  5.92342699e-01 -6.00000000e-01]
Read chunk # 106 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.58110892e-01  3.32000047e-02
 -2.87000134e-02  0.00000000e+00 -5.3342

[ 0.00000000e+00  7.95731469e+08 -2.03889108e-01 -1.59999531e-03
  7.19998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.41230860e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 153 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  8.02108923e-02 -5.75999953e-02
 -1.60000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.55303446e-01  2.60448458e-01 -6.00000000e-01]
Read chunk # 154 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.38789108e-01 -2.49999953e-02
 -2.11000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -2.01289443e-01  6.00000000e-01]
Read chunk # 155 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.14289108e-01 -2.27999953e-02
  1.34999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.73931443e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 156 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.13710892e-01  1.34000047e-02
 -4.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.364527

[ 0.00000000e+00  7.95731469e+08 -2.21289108e-01 -7.99995308e-04
 -2.20000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.75665250e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 206 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.48108923e-02  1.46000047e-02
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.28364642e-01  5.50456532e-02 -2.49661252e-01]
Read chunk # 207 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.00910892e-01  3.00000047e-02
 -1.52000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  4.65266662e-01 -6.00000000e-01]
Read chunk # 208 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.57689108e-01  2.42000047e-02
  2.37999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.83548139e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 209 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.12389108e-01 -7.19999531e-03
 -2.77000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.000000

[ 0.00000000e+00  7.95731469e+08 -1.23789108e-01  3.50000047e-02
 -2.99000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -1.69292322e-01  6.00000000e-01]
Read chunk # 260 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.14289108e-01  5.40000469e-03
 -8.40001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.51162136e-02 -6.00000000e-01  6.00000000e-01]
Read chunk # 261 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.21589108e-01 -1.59999953e-02
 -1.66000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.52540637e-01 -1.83298456e-01  6.00000000e-01]
Read chunk # 262 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.13189108e-01 -5.13999953e-02
  1.89999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.66280664e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 263 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.22810892e-01  5.40000047e-02
  2.53999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.120985

start
[ 0.00000000e+00  7.95731469e+08  3.58108923e-02  1.12000047e-02
  3.29998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.85015768e-02  9.04288377e-03 -5.18814423e-01]
Read chunk # 313 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.36891077e-02  5.04000047e-02
  8.99986580e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.52208393e-01  6.92762005e-02  1.56656693e-01]
Read chunk # 314 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.71310892e-01  3.10000047e-02
  3.69998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.79497123e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 315 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -8.50891077e-02  3.82000047e-02
  2.62999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.37047519e-01 -6.22459244e-02  6.00000000e-01]
Read chunk # 316 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.10810892e-01 -5.97999953e-02
  1.98999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.

start
[ 0.00000000e+00  7.95731469e+08 -3.40089108e-01  4.00000469e-03
 -2.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -8.76858206e-02 -6.00000000e-01  6.00000000e-01]
Read chunk # 365 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.36389108e-01  2.04000047e-02
 -2.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 366 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.84108923e-02 -2.75999953e-02
 -2.23000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  4.39420718e-01 -6.00000000e-01]
Read chunk # 367 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.30389108e-01 -5.85999953e-02
  8.09998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.42118666e-01  2.63720877e-02  6.00000000e-01]
Read chunk # 368 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.96010892e-01  3.74000047e-02
  4.69998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.

[ 0.00000000e+00  7.95731469e+08 -3.90189108e-01  3.20000469e-03
  8.69998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.66133711e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 414 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.77889108e-01 -6.19999531e-03
  2.05999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 415 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.94689108e-01  1.92000047e-02
 -7.40001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.15756192e-01 -5.77515100e-01  6.00000000e-01]
Read chunk # 416 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.38489108e-01 -5.63999953e-02
 -2.74000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.21147849e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 417 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.09089108e-01 -4.19999953e-02
 -4.00001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  8.963099

Read chunk # 467 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.02889108e-01 -3.33999953e-02
  6.49998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.61621743e-04 -6.00000000e-01  6.00000000e-01]
Read chunk # 468 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  6.37108923e-02  5.16000047e-02
 -2.40001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -3.26272615e-01 -6.00000000e-01]
Read chunk # 469 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.19389108e-01 -5.61999953e-02
 -1.83000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.31365978e-01 -3.13119816e-01  6.00000000e-01]
Read chunk # 470 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.50110892e-01 -1.33999953e-02
 -1.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.11948579e-02 -5.52342020e-02 -3.93177770e-01]
Read chunk # 471 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.55108923e-02 -1.61999953e-02
 -2.26000134e-02  0.00000000e+00 -5.3342

[ 0.00000000e+00  7.95731469e+08  7.09108923e-02 -3.51999953e-02
 -8.80001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.44354934e-01  2.40725104e-01 -6.00000000e-01]
Read chunk # 520 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.81789108e-01 -2.33999953e-02
 -2.73000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 521 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.92210892e-01  4.69200001e-09
 -6.00001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.86748751e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 522 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  7.57108923e-02  6.60000469e-03
  2.28999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  7.07521537e-02 -6.00000000e-01]
Read chunk # 523 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.35610892e-01 -1.11999953e-02
 -2.99000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.819471

[ 0.00000000e+00  7.95731469e+08  2.70710892e-01  1.80000047e-02
 -5.40001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.20137414e-01  5.50894748e-01 -6.00000000e-01]
Read chunk # 567 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.60589108e-01 -4.37999953e-02
  2.45999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -7.70055079e-02  6.00000000e-01]
Read chunk # 568 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.18891077e-02 -2.57999953e-02
 -1.15000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.52135913e-01 -2.93324567e-02  2.87400114e-01]
Read chunk # 569 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.29110892e-01 -5.11999953e-02
 -1.50000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.64037458e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 570 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.15610892e-01 -3.51999953e-02
  7.49998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.000000

start
[ 0.00000000e+00  7.95731469e+08 -4.37289108e-01 -5.05999953e-02
 -2.01000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.51851377e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 620 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.72310892e-01 -4.05999953e-02
  1.77999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.61349369e-01 -6.00000000e-01]
Read chunk # 621 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.35789108e-01 -4.55999953e-02
 -1.69000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.08765494e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 622 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.68710892e-01 -6.19999531e-03
 -6.90001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.32999016e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 623 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.31289108e-01 -1.37999953e-02
 -2.07000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.

[ 0.00000000e+00  7.95731469e+08 -3.04989108e-01 -3.77999953e-02
  1.09998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.68214143e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 671 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.38489108e-01 -2.81999953e-02
  4.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.13255921e-01 -4.32566674e-01  6.00000000e-01]
Read chunk # 672 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.55389108e-01  6.80000469e-03
  1.46999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.32480300e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 673 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.64510892e-01  3.54000047e-02
 -2.15000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -9.66830564e-02  6.00000000e-01  8.48428897e-03]
Read chunk # 674 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.05189108e-01  1.86000047e-02
  2.56999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.727429

start
[ 0.00000000e+00  7.95731469e+08 -4.35489108e-01 -2.25999953e-02
 -2.27000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.01923484e-02 -6.00000000e-01  6.00000000e-01]
Read chunk # 723 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.44891077e-02 -3.37999953e-02
 -2.52000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  7.31893603e-02  2.12762309e-01]
Read chunk # 724 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.58610892e-01 -5.43999953e-02
  1.78999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.87836788e-01  3.97626436e-01 -6.00000000e-01]
Read chunk # 725 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  7.96108923e-02 -4.13999953e-02
 -2.56000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  1.47236679e-01 -4.68845664e-01]
Read chunk # 726 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -7.60891077e-02 -2.09999953e-02
 -1.91000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.

[ 0.00000000e+00  7.95731469e+08 -7.51891077e-02 -2.83999953e-02
  2.64999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.11742508e-01 -5.58024364e-01  3.00646130e-02]
Read chunk # 774 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.39010892e-01  2.94000047e-02
 -2.92000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  5.29377438e-01 -4.34227796e-01]
Read chunk # 775 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.84689108e-01  4.70000047e-02
  2.79999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.36931285e-02 -2.02243799e-01  6.00000000e-01]
Read chunk # 776 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.99989108e-01  5.84000047e-02
  2.80999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.67939926e-01 -2.09140783e-02  6.00000000e-01]
Read chunk # 777 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.92489108e-01  4.00000469e-03
  5.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.656764

start
[ 0.00000000e+00  7.95731469e+08  3.88010892e-01 -3.19999953e-02
  5.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.70605148e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 827 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.19889108e-01  2.88000047e-02
 -2.94000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -3.01462549e-01  6.00000000e-01]
Read chunk # 828 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.99189108e-01  5.32000047e-02
  1.19998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.64102964e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 829 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.49889108e-01 -3.85999953e-02
  2.32999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.39205518e-01  6.00000000e-01]
Read chunk # 830 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.09989108e-01 -3.17999953e-02
 -1.26000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.

start
[ 0.00000000e+00  7.95731469e+08  3.51110892e-01 -5.99995308e-04
  3.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.31890079e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 876 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.19610892e-01 -5.11999953e-02
 -2.39000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 877 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.79989108e-01  2.80000469e-03
 -1.71000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 878 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.30389108e-01  2.28000047e-02
 -1.34198363e-08  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.24801029e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 879 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.44189108e-01 -4.31999953e-02
  2.37999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.

[ 0.00000000e+00  7.95731469e+08  3.68510892e-01 -6.99999531e-03
  2.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.90130903e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 926 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.27010892e-01 -3.21999953e-02
  1.42999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.13665010e-01  1.22408720e-01 -6.00000000e-01]
Read chunk # 927 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.87989108e-01  3.92000047e-02
  2.50999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.27341824e-01 -5.53349873e-01  6.00000000e-01]
Read chunk # 928 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.15910892e-01 -5.13999953e-02
  6.59998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.57571474e-01  3.10731550e-01 -6.00000000e-01]
Read chunk # 929 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.96108923e-02  4.24000047e-02
 -2.76000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.000000

start
[ 0.00000000e+00  7.95731469e+08 -8.41891077e-02 -5.41999953e-02
  1.82999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -2.52725788e-01 -9.11130647e-02]
Read chunk # 979 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.62389108e-01 -1.57999953e-02
  6.89998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.47524840e-02  2.69616991e-01  6.00000000e-01]
Read chunk # 980 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.84889108e-01 -2.77999953e-02
  2.72999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.82185619e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 981 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.09010892e-01 -2.85999953e-02
  5.89998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  6.03342529e-02 -2.84968606e-01]
Read chunk # 982 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.79089108e-01 -2.91999953e-02
  1.81999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.

[ 0.00000000e+00  7.95731469e+08 -2.02989108e-01  1.54000047e-02
 -1.85000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1028 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.29789108e-01  9.20000469e-03
  7.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.10886093e-02 -8.99201645e-02  6.00000000e-01]
Read chunk # 1029 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  9.70108923e-02  1.88000047e-02
 -1.39000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.17599242e-01  4.49183106e-01 -4.06791516e-01]
Read chunk # 1030 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.80989108e-01 -3.69999953e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.82410094e-01 -5.30180399e-01  6.00000000e-01]
Read chunk # 1031 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.91489108e-01  2.94000047e-02
  2.91999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00

[ 0.00000000e+00  7.95731469e+08 -1.72089108e-01  1.82000047e-02
  5.09998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.30163612e-02 -3.02810951e-01  6.00000000e-01]
Read chunk # 1078 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.05589108e-01  2.00000047e-02
 -1.71000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -5.65169417e-01  6.00000000e-01]
Read chunk # 1079 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.08210892e-01 -1.23999953e-02
 -1.09000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.73712972e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1080 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.44910892e-01  2.02000047e-02
  2.50999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.00249960e-01 -6.00000000e-01]
Read chunk # 1081 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.27689108e-01  3.26000047e-02
 -5.20001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.49

[ 0.00000000e+00  7.95731469e+08  3.80810892e-01  3.42000047e-02
 -1.77000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  5.88804319e-01 -6.00000000e-01]
Read chunk # 1124 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.22810892e-01  2.90000047e-02
  1.16999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.91904334e-01  3.15182029e-02 -6.00000000e-01]
Read chunk # 1125 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.31089229e-03  3.16000047e-02
 -7.40001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.07877978e-02  7.43174936e-02 -9.41300200e-03]
Read chunk # 1126 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.72810892e-01 -4.59999531e-03
  2.91999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.74493726e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1127 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.93610892e-01  5.76000047e-02
  1.19998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.25

start
[ 0.00000000e+00  7.95731469e+08 -2.05891077e-02 -5.27999953e-02
 -1.67000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.90171747e-01  8.33739627e-02  1.18166002e-02]
Read chunk # 1174 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.61089229e-03 -7.99995308e-04
  1.40999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  9.02483090e-02  9.12377654e-02  5.06199830e-02]
Read chunk # 1175 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.82108923e-02 -4.61999953e-02
  1.74999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.27345434e-01 -7.71118878e-02  8.91861846e-02]
Read chunk # 1176 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.46310892e-01 -1.89999953e-02
  5.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.72738640e-02 -1.07251242e-01 -6.00000000e-01]
Read chunk # 1177 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.36789108e-01  2.04000047e-02
 -2.52000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00


[ 0.00000000e+00  7.95731469e+08 -1.96891077e-02  5.04000047e-02
  1.14999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.69681687e-01 -3.36221353e-01  1.54925933e-01]
Read chunk # 1221 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  7.87108923e-02  3.72000047e-02
  2.88999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.01917101e-01  7.14564315e-02 -6.00000000e-01]
Read chunk # 1222 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.39989108e-01 -3.57999953e-02
 -1.41000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.96066795e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1223 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.72789108e-01  3.56000047e-02
  1.70999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1224 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.90789108e-01 -2.99999953e-02
  2.14999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00

start
[ 0.00000000e+00  7.95731469e+08 -2.88789108e-01  2.00000047e-02
  1.98999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.11362085e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1269 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.52891077e-02 -6.19999531e-03
  4.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.06581636e-02 -3.24454045e-01 -1.00408094e-01]
Read chunk # 1270 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.23410892e-01 -3.73999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.37594753e-01  2.14006131e-01 -6.00000000e-01]
Read chunk # 1271 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.51108923e-02  5.44000047e-02
 -8.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.90477657e-02  1.42917439e-01  5.20334799e-02]
Read chunk # 1272 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  5.68108923e-02 -4.77999953e-02
  7.19998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00


[ 0.00000000e+00  7.95731469e+08  4.49510892e-01 -2.57999953e-02
 -1.61000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.07430253e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1320 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.98789108e-01 -3.77999953e-02
  2.74999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -4.98376806e-01  6.00000000e-01]
Read chunk # 1321 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.98510892e-01 -5.99995308e-04
 -2.33000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1322 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.81289108e-01  3.60000469e-03
  2.20999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1323 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.24108923e-02 -1.31999953e-02
 -8.00001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.63

start
[ 0.00000000e+00  7.95731469e+08  1.04210892e-01 -1.23999953e-02
  8.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.01430148e-01  4.71151342e-02 -6.00000000e-01]
Read chunk # 1370 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.50510892e-01  1.92000047e-02
 -1.35000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.30229529e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1371 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.37510892e-01 -3.01999953e-02
  1.79999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.41940734e-01  1.43452757e-01 -6.00000000e-01]
Read chunk # 1372 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.19289108e-01  2.76000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1373 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.32889108e-01  8.80000469e-03
 -2.99000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00


[ 0.00000000e+00  7.95731469e+08 -1.89789108e-01 -2.05999953e-02
  6.49998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.19656957e-01 -4.24118140e-01  6.00000000e-01]
Read chunk # 1418 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.81310892e-01  4.18000047e-02
 -1.51000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1419 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.32889108e-01 -1.25999953e-02
  1.50999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.20260922e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1420 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.78010892e-01  3.18000047e-02
  5.19998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.60105165e-02  3.38657431e-01 -6.00000000e-01]
Read chunk # 1421 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -6.46891077e-02  2.54000047e-02
  9.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.14

[ 0.00000000e+00  7.95731469e+08  3.61010892e-01  5.94000047e-02
  2.80999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.05967092e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1469 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.36289108e-01  4.96000047e-02
  8.89998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  8.83705240e-02 -5.38762297e-01  6.00000000e-01]
Read chunk # 1470 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.65389108e-01 -3.17999953e-02
 -8.80001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -7.29136229e-02 -5.38426634e-01  6.00000000e-01]
Read chunk # 1471 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.42889108e-01 -3.19999953e-02
  2.58999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.69833796e-01  6.00000000e-01]
Read chunk # 1472 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.44710892e-01  1.42000047e-02
 -6.20001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  9.25

start
[ 0.00000000e+00  7.95731469e+08 -1.42089108e-01 -1.39999531e-03
 -2.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  8.74292842e-02 -1.47313000e-01  6.00000000e-01]
Read chunk # 1515 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.28110892e-01  4.88000047e-02
 -4.80001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.79613959e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1516 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.03010892e-01  4.62000047e-02
  5.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.81372080e-02  4.67746125e-01 -6.00000000e-01]
Read chunk # 1517 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.02289108e-01 -5.89999953e-02
  7.09998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.54428589e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1518 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.28210892e-01 -3.99995308e-04
 -1.86000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00


[ 0.00000000e+00  7.95731469e+08 -3.23589108e-01  2.98000047e-02
 -3.20001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.12165568e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1565 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  8.23108923e-02 -3.57999953e-02
  2.90999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -1.09575768e-01 -2.40636657e-01]
Read chunk # 1566 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -6.55891077e-02 -1.81999953e-02
  2.00999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.43412567e-01 -2.67062351e-01  6.00000000e-01]
Read chunk # 1567 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.28108923e-02  5.36000047e-02
  9.49998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.58228770e-01 -1.70677960e-02 -1.47902578e-01]
Read chunk # 1568 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.14910892e-01  5.08000047e-02
  2.64999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.89

start
[ 0.00000000e+00  7.95731469e+08 -2.14089108e-01  4.68000047e-02
 -5.20001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.61252167e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1617 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  7.33108923e-02 -7.99995308e-04
  7.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.76301446e-01  8.94054180e-02 -5.13684134e-01]
Read chunk # 1618 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.23910892e-01  5.44000047e-02
 -1.64000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.71286012e-01  6.00000000e-01 -5.25191252e-01]
Read chunk # 1619 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.04108923e-02  2.80000047e-02
 -5.40001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.94198641e-01  1.08127722e-02 -6.64634631e-02]
Read chunk # 1620 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.20810892e-01 -5.29999953e-02
  2.97999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00


start
[ 0.00000000e+00  7.95731469e+08  2.92108923e-02  2.24000047e-02
 -5.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.18508970e-01  2.44976727e-01 -3.95829242e-01]
Read chunk # 1669 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.58689108e-01  8.20000469e-03
 -1.10000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.11787292e-02 -6.00000000e-01  6.00000000e-01]
Read chunk # 1670 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.64510892e-01  1.64000047e-02
 -1.28000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.40685386e-02  2.29551228e-01 -6.00000000e-01]
Read chunk # 1671 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.27210892e-01  2.12000047e-02
 -2.51000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1672 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.54310892e-01  5.20000469e-03
  2.23999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00


start
[ 0.00000000e+00  7.95731469e+08 -3.64989108e-01 -5.57999953e-02
  2.79999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.85600921e-01  6.86389451e-02  6.00000000e-01]
Read chunk # 1720 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.87410892e-01  5.52000047e-02
  2.07999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.36859665e-01  3.14333831e-01 -6.00000000e-01]
Read chunk # 1721 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.78810892e-01  3.76000047e-02
  1.19999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.76470011e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1722 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.37210892e-01  8.40000469e-03
  4.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.56802582e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 1723 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.42910892e-01 -1.19999953e-02
 -2.58000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00


[ 0.00000000e+00  7.95731469e+08  2.95108923e-02 -3.75999953e-02
  2.79999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -2.09514413e-01 -1.53278954e-01]
Read chunk # 1770 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.92910892e-01 -2.61999953e-02
  1.28999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.43643553e-01 -1.59929835e-01 -6.00000000e-01]
Read chunk # 1771 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  7.36108923e-02 -2.21999953e-02
  5.49998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.56288742e-01  3.66149890e-01 -6.00000000e-01]
Read chunk # 1772 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.35310892e-01 -2.65999953e-02
  1.60999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.82724931e-01  5.59806812e-02 -6.00000000e-01]
Read chunk # 1773 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.39010892e-01  1.82000047e-02
  2.39999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00

[ 0.00000000e+00  7.95731469e+08 -1.05189108e-01  2.30000047e-02
  2.68999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -4.86562490e-01  6.00000000e-01]
Read chunk # 1820 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.21310892e-01  1.74000047e-02
 -2.28000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  3.19535893e-01 -6.00000000e-01]
Read chunk # 1821 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.38110892e-01 -2.59999953e-02
 -2.58000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.53679247e-01  3.88160056e-01 -6.00000000e-01]
Read chunk # 1822 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.82289108e-01 -4.13999953e-02
  1.65999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.27271947e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1823 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.20289108e-01 -5.49999953e-02
 -1.97000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.77

[ 0.00000000e+00  7.95731469e+08 -4.25589108e-01 -4.81999953e-02
  5.29998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.68611830e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1869 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.25891077e-02  7.20000469e-03
  1.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.55260624e-03 -3.12380432e-01  1.80859602e-01]
Read chunk # 1870 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.43810892e-01 -4.05999953e-02
 -3.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.65373217e-01 -3.12006856e-02 -6.00000000e-01]
Read chunk # 1871 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.58589108e-01 -3.79999953e-02
 -2.20001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.09434744e-01 -5.42482642e-01  6.00000000e-01]
Read chunk # 1872 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.46710892e-01 -1.59999531e-03
 -2.28000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.67

[ 0.00000000e+00  7.95731469e+08  3.65810892e-01 -5.71999953e-02
 -1.72000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.66773768e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1919 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.12389108e-01 -3.49999953e-02
 -7.20001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.62987534e-02 -5.82291756e-01  6.00000000e-01]
Read chunk # 1920 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.52489108e-01 -5.61999953e-02
  6.49998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.84240900e-01 -1.20263578e-01  6.00000000e-01]
Read chunk # 1921 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  9.11089229e-03  8.80000469e-03
  1.04999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.30898155e-01 -2.05160369e-02 -5.33774587e-02]
Read chunk # 1922 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.32789108e-01 -1.39999531e-03
  2.30999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.98

[ 0.00000000e+00  7.95731469e+08 -3.36789108e-01 -4.41999953e-02
  1.59999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.24260526e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1970 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.13589108e-01 -1.59999953e-02
 -2.06000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.55822005e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1971 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.20189108e-01 -1.19999531e-03
 -3.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.32413842e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 1972 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.68610892e-01 -5.83999953e-02
  2.43999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1973 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  5.11108923e-02  5.94000047e-02
 -2.82000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00

In [4]:
print("The behavior policy value statistics are:")
dataset.compute_stats()['return']

The behavior policy value statistics are:


{'mean': -127.88257,
 'std': 81.13078,
 'min': -410.7058,
 'max': 0.0,
 'histogram': (array([  6,   7,  19,  21,  33,  35,  51,  58,  59,  50,  70,  93, 151,
         178, 209, 220, 273, 353, 112,   1]),
  array([-410.7058  , -390.17053 , -369.63522 , -349.09995 , -328.56464 ,
         -308.02936 , -287.49408 , -266.95877 , -246.4235  , -225.8882  ,
         -205.3529  , -184.81761 , -164.28232 , -143.74704 , -123.21175 ,
         -102.67645 ,  -82.14116 ,  -61.605873,  -41.07058 ,  -20.53529 ,
            0.      ], dtype=float32))}

In [5]:
from sklearn.model_selection import train_test_split
train_episodes, test_episodes = train_test_split(dataset, test_size=0.2)

## Setting up an Algorithm

In [6]:
from d3rlpy.algos import CQL

from d3rlpy.preprocessing import MinMaxActionScaler
action_scaler = MinMaxActionScaler(minimum=-0.6, maximum=0.6)
#cql = CQL(action_scaler=action_scaler)

model = CQL(q_func_factory='mean', #qr -> quantile regression q function, but you don't have to use this
            reward_scaler='standard',
            action_scaler=action_scaler,
          actor_learning_rate=1e-5, 
          critic_learning_rate=0.0003, 
            use_gpu=False) #change it to true if you have one
model.build_with_dataset(dataset)

In [7]:
from d3rlpy.metrics.scorer import td_error_scorer
from d3rlpy.metrics.scorer import average_value_estimation_scorer
from d3rlpy.metrics.scorer import initial_state_value_estimation_scorer

# calculate metrics with test dataset
ave_error_init = average_value_estimation_scorer(model, test_episodes)
print(ave_error_init)

0.099473260977188


In [8]:
%load_ext tensorboard
%tensorboard --logdir runs

ERROR: Failed to launch TensorBoard (exited with 1).
Contents of stderr:
Traceback (most recent call last):
  File "/home/dasc/anaconda3/envs/jbreeden3.10/bin/tensorboard", line 6, in <module>
    from tensorboard.main import run_main
  File "/home/dasc/anaconda3/envs/jbreeden3.10/lib/python3.10/site-packages/tensorboard/main.py", line 40, in <module>
    from tensorboard import default
  File "/home/dasc/anaconda3/envs/jbreeden3.10/lib/python3.10/site-packages/tensorboard/default.py", line 38, in <module>
    from tensorboard.plugins.audio import audio_plugin
  File "/home/dasc/anaconda3/envs/jbreeden3.10/lib/python3.10/site-packages/tensorboard/plugins/audio/audio_plugin.py", line 25, in <module>
    from tensorboard import plugin_util
  File "/home/dasc/anaconda3/envs/jbreeden3.10/lib/python3.10/site-packages/tensorboard/plugin_util.py", line 21, in <module>
    from tensorboard._vendor import bleach
  File "/home/dasc/anaconda3/envs/jbreeden3.10/lib/python3.10/site-packages/tensorb

In [9]:
model.fit(train_episodes,
        eval_episodes=test_episodes,
        n_epochs=20, 
        tensorboard_dir='runs',
        scorers={
            'td_error': td_error_scorer,
            'init_value': initial_state_value_estimation_scorer,
            'ave_value': average_value_estimation_scorer
        })

2022-04-17 14:40.30 [debug    ] RoundIterator is selected.
2022-04-17 14:40.30 [info     ] Directory is created at d3rlpy_logs/CQL_20220417144030
2022-04-17 14:40.30 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-17 14:40.30 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-17 14:40.30 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220417144030/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 1e-05, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'alpha_learning_rate': 0.0001, 'alpha_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'alpha_threshold': 10.0, 'batch_size': 256, 'conser

Epoch 1/20:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-17 14:52.16 [info     ] CQL_20220417144030: epoch=1 step=6928 epoch=1 metrics={'time_sample_batch': 0.00029666907655174135, 'time_algorithm_update': 0.10028393924511608, 'temp_loss': 2.8203760104524895, 'temp': 0.7465105789603868, 'alpha_loss': -4.3775416721176486, 'alpha': 1.2774664221060468, 'critic_loss': 22.781028200069972, 'actor_loss': 26.226714315611275, 'time_step': 0.10086665675904403, 'td_error': 37.92656884774088, 'init_value': -71.30191040039062, 'ave_value': -63.93212064640088} step=6928
2022-04-17 14:52.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417144030/model_6928.pt


Epoch 2/20:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-17 15:02.42 [info     ] CQL_20220417144030: epoch=2 step=13856 epoch=2 metrics={'time_sample_batch': 0.00029326410959829637, 'time_algorithm_update': 0.08866798440661221, 'temp_loss': 0.9077620992768237, 'temp': 0.4195573395569798, 'alpha_loss': 0.6452432500244717, 'alpha': 1.2477544136775558, 'critic_loss': 113.6042675585174, 'actor_loss': 92.3157616746343, 'time_step': 0.08927167986474467, 'td_error': 80.32413035570184, 'init_value': -130.15475463867188, 'ave_value': -120.32872105798826} step=13856
2022-04-17 15:02.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417144030/model_13856.pt


Epoch 3/20:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-17 15:13.55 [info     ] CQL_20220417144030: epoch=3 step=20784 epoch=3 metrics={'time_sample_batch': 0.00033041993134566875, 'time_algorithm_update': 0.09504533529832236, 'temp_loss': 0.2965051314841115, 'temp': 0.23614714365865494, 'alpha_loss': -0.017581665574032332, 'alpha': 1.096549143244708, 'critic_loss': 172.63682422595954, 'actor_loss': 127.72803148111075, 'time_step': 0.0957175443340394, 'td_error': 81.91163173139155, 'init_value': -146.16566467285156, 'ave_value': -136.11714112512377} step=20784
2022-04-17 15:13.55 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417144030/model_20784.pt


Epoch 4/20:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-17 15:25.00 [info     ] CQL_20220417144030: epoch=4 step=27712 epoch=4 metrics={'time_sample_batch': 0.0003308735392110177, 'time_algorithm_update': 0.09374219251422354, 'temp_loss': -0.0017021659158384503, 'temp': 0.18721377709557843, 'alpha_loss': 0.5304022061195348, 'alpha': 0.9050909366295189, 'critic_loss': 149.73387621807723, 'actor_loss': 127.10508371005157, 'time_step': 0.094418332462498, 'td_error': 64.3223646178937, 'init_value': -136.2177276611328, 'ave_value': -127.48888593494264} step=27712
2022-04-17 15:25.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417144030/model_27712.pt


Epoch 5/20:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-17 15:35.47 [info     ] CQL_20220417144030: epoch=5 step=34640 epoch=5 metrics={'time_sample_batch': 0.00033326360975622304, 'time_algorithm_update': 0.09129185558191348, 'temp_loss': 0.00014870004498159973, 'temp': 0.20514249940796414, 'alpha_loss': 0.12687077615010725, 'alpha': 0.7218535514612947, 'critic_loss': 117.51959465337406, 'actor_loss': 115.91373261127957, 'time_step': 0.09196531700344614, 'td_error': 52.766254027502434, 'init_value': -124.29606628417969, 'ave_value': -116.89887721603162} step=34640
2022-04-17 15:35.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417144030/model_34640.pt


Epoch 6/20:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-17 15:46.33 [info     ] CQL_20220417144030: epoch=6 step=41568 epoch=6 metrics={'time_sample_batch': 0.00033371477424410143, 'time_algorithm_update': 0.09117597575551092, 'temp_loss': 0.004415782955999759, 'temp': 0.1944931348630896, 'alpha_loss': 0.1472975689020663, 'alpha': 0.6299157243041326, 'critic_loss': 101.68251729104453, 'actor_loss': 106.02977155813169, 'time_step': 0.09185240822080652, 'td_error': 47.47443822181218, 'init_value': -114.02496337890625, 'ave_value': -106.78653254952786} step=41568
2022-04-17 15:46.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417144030/model_41568.pt


Epoch 7/20:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-17 15:57.44 [info     ] CQL_20220417144030: epoch=7 step=48496 epoch=7 metrics={'time_sample_batch': 0.00034155743089063614, 'time_algorithm_update': 0.09463640307581728, 'temp_loss': 0.005297220559643128, 'temp': 0.17290901675321352, 'alpha_loss': 0.0911868620668141, 'alpha': 0.5269975530168614, 'critic_loss': 93.21913470801043, 'actor_loss': 97.3536046823233, 'time_step': 0.09532753978673085, 'td_error': 44.14311976596095, 'init_value': -103.66361999511719, 'ave_value': -96.50177199725755} step=48496
2022-04-17 15:57.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417144030/model_48496.pt


Epoch 8/20:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-17 16:08.45 [info     ] CQL_20220417144030: epoch=8 step=55424 epoch=8 metrics={'time_sample_batch': 0.0003378468696845321, 'time_algorithm_update': 0.09322258193415803, 'temp_loss': 0.00044251969606567477, 'temp': 0.16027348365755129, 'alpha_loss': 0.003970423552831096, 'alpha': 0.502546705942231, 'critic_loss': 85.61696034903889, 'actor_loss': 87.85422803733421, 'time_step': 0.09390893186212412, 'td_error': 40.05525231115046, 'init_value': -94.1341781616211, 'ave_value': -87.36799008027445} step=55424
2022-04-17 16:08.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417144030/model_55424.pt


Epoch 9/20:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-17 16:19.26 [info     ] CQL_20220417144030: epoch=9 step=62352 epoch=9 metrics={'time_sample_batch': 0.00033443023646400927, 'time_algorithm_update': 0.09048452952717523, 'temp_loss': -3.700287263250563e-05, 'temp': 0.1618018929027107, 'alpha_loss': -0.0457827813098504, 'alpha': 0.513931519792826, 'critic_loss': 74.00235746738954, 'actor_loss': 76.09822630606953, 'time_step': 0.0911672903745334, 'td_error': 33.62778299534533, 'init_value': -79.91213989257812, 'ave_value': -74.10091873916306} step=62352
2022-04-17 16:19.26 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417144030/model_62352.pt


Epoch 10/20:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-17 16:30.09 [info     ] CQL_20220417144030: epoch=10 step=69280 epoch=10 metrics={'time_sample_batch': 0.00033609937300032336, 'time_algorithm_update': 0.09073116518471037, 'temp_loss': -0.0017297781914265963, 'temp': 0.16508070560382984, 'alpha_loss': -0.0013461766214363422, 'alpha': 0.5445868475126063, 'critic_loss': 61.39090847712673, 'actor_loss': 63.66786521852154, 'time_step': 0.09141203578707803, 'td_error': 28.339810980717594, 'init_value': -67.54627990722656, 'ave_value': -62.985547665973755} step=69280
2022-04-17 16:30.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417144030/model_69280.pt


Epoch 11/20:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-17 16:40.49 [info     ] CQL_20220417144030: epoch=11 step=76208 epoch=11 metrics={'time_sample_batch': 0.0003351768087294581, 'time_algorithm_update': 0.09040082867233637, 'temp_loss': 0.0008734338782493409, 'temp': 0.1666486959508674, 'alpha_loss': -0.0033163088568961127, 'alpha': 0.5391322114605964, 'critic_loss': 52.542249211211264, 'actor_loss': 52.46349658977077, 'time_step': 0.09108240203824385, 'td_error': 24.590117312910582, 'init_value': -54.41242599487305, 'ave_value': -50.89397732156464} step=76208
2022-04-17 16:40.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417144030/model_76208.pt


Epoch 12/20:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-17 16:51.58 [info     ] CQL_20220417144030: epoch=12 step=83136 epoch=12 metrics={'time_sample_batch': 0.0003428466050233907, 'time_algorithm_update': 0.09474210977967286, 'temp_loss': -0.0002966092013942554, 'temp': 0.16538644891541976, 'alpha_loss': 0.013455251705558611, 'alpha': 0.5406909546541699, 'critic_loss': 46.91550199603167, 'actor_loss': 43.13007960969251, 'time_step': 0.09543948890705857, 'td_error': 22.052855251618006, 'init_value': -44.77936553955078, 'ave_value': -42.147361440728275} step=83136
2022-04-17 16:51.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417144030/model_83136.pt


Epoch 13/20:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-17 17:03.05 [info     ] CQL_20220417144030: epoch=13 step=90064 epoch=13 metrics={'time_sample_batch': 0.0003418824000942239, 'time_algorithm_update': 0.09409243983429519, 'temp_loss': -0.001293469315508217, 'temp': 0.16731202132638553, 'alpha_loss': 0.03768408587832313, 'alpha': 0.5177417415480278, 'critic_loss': 43.03249496189709, 'actor_loss': 36.27430096330599, 'time_step': 0.09478467265810757, 'td_error': 20.379027963197597, 'init_value': -37.428524017333984, 'ave_value': -35.29611509922182} step=90064
2022-04-17 17:03.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417144030/model_90064.pt


Epoch 14/20:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-17 17:14.03 [info     ] CQL_20220417144030: epoch=14 step=96992 epoch=14 metrics={'time_sample_batch': 0.00033165841399935046, 'time_algorithm_update': 0.09290178099488, 'temp_loss': 0.00165975175841427, 'temp': 0.16668926349327, 'alpha_loss': 0.031212258777586075, 'alpha': 0.49083223611580995, 'critic_loss': 41.06977227064362, 'actor_loss': 31.35212546876066, 'time_step': 0.09357792465984133, 'td_error': 19.445633759966686, 'init_value': -34.244972229003906, 'ave_value': -32.15121388861863} step=96992
2022-04-17 17:14.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417144030/model_96992.pt


Epoch 15/20:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-17 17:24.51 [info     ] CQL_20220417144030: epoch=15 step=103920 epoch=15 metrics={'time_sample_batch': 0.0003339206718132072, 'time_algorithm_update': 0.09140906247200646, 'temp_loss': 0.0006839100771672878, 'temp': 0.16274796220243803, 'alpha_loss': 0.04539761136388828, 'alpha': 0.4529016237000464, 'critic_loss': 39.6999629056963, 'actor_loss': 28.048616693662716, 'time_step': 0.09208835017873838, 'td_error': 19.147145683267212, 'init_value': -29.7656192779541, 'ave_value': -27.803563339465036} step=103920
2022-04-17 17:24.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417144030/model_103920.pt


Epoch 16/20:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-17 17:35.42 [info     ] CQL_20220417144030: epoch=16 step=110848 epoch=16 metrics={'time_sample_batch': 0.00033631504407931144, 'time_algorithm_update': 0.09191817919321478, 'temp_loss': 0.00024372155119039334, 'temp': 0.16008990707293186, 'alpha_loss': 0.03971895059147152, 'alpha': 0.4196545460732443, 'critic_loss': 39.48648498045586, 'actor_loss': 25.173988843280778, 'time_step': 0.09260017959955934, 'td_error': 19.062019903686785, 'init_value': -28.239370346069336, 'ave_value': -26.378089968246602} step=110848
2022-04-17 17:35.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417144030/model_110848.pt


Epoch 17/20:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-17 17:46.29 [info     ] CQL_20220417144030: epoch=17 step=117776 epoch=17 metrics={'time_sample_batch': 0.00033768047912016203, 'time_algorithm_update': 0.09134629554616386, 'temp_loss': 0.001612349915157952, 'temp': 0.15800954198451847, 'alpha_loss': 0.028815326230948977, 'alpha': 0.38908843339074567, 'critic_loss': 39.09105355968789, 'actor_loss': 22.732619915777004, 'time_step': 0.09203076758368187, 'td_error': 18.958369229967328, 'init_value': -26.109521865844727, 'ave_value': -24.454958600806258} step=117776
2022-04-17 17:46.29 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417144030/model_117776.pt


Epoch 18/20:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-17 17:57.20 [info     ] CQL_20220417144030: epoch=18 step=124704 epoch=18 metrics={'time_sample_batch': 0.0003342624693489515, 'time_algorithm_update': 0.09189760805415226, 'temp_loss': 0.0007355376482805706, 'temp': 0.155023693223215, 'alpha_loss': 0.03034949796024435, 'alpha': 0.3635110840476688, 'critic_loss': 39.006215102961995, 'actor_loss': 20.73817636975997, 'time_step': 0.09258182828613572, 'td_error': 18.61155615672758, 'init_value': -23.49062728881836, 'ave_value': -21.844551638258874} step=124704
2022-04-17 17:57.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417144030/model_124704.pt


Epoch 19/20:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-17 18:08.24 [info     ] CQL_20220417144030: epoch=19 step=131632 epoch=19 metrics={'time_sample_batch': 0.00033422361620579253, 'time_algorithm_update': 0.09361962623051077, 'temp_loss': 0.0010802490694779585, 'temp': 0.15126606310324153, 'alpha_loss': 0.027971341859239756, 'alpha': 0.3347467774908138, 'critic_loss': 38.89104494267502, 'actor_loss': 19.261070990225186, 'time_step': 0.09429922761873065, 'td_error': 18.72637916507333, 'init_value': -22.360034942626953, 'ave_value': -20.8096111468993} step=131632
2022-04-17 18:08.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417144030/model_131632.pt


Epoch 20/20:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-17 18:19.17 [info     ] CQL_20220417144030: epoch=20 step=138560 epoch=20 metrics={'time_sample_batch': 0.0003328250751209039, 'time_algorithm_update': 0.09206571142612924, 'temp_loss': 0.00038696179106884244, 'temp': 0.14947076501673212, 'alpha_loss': 0.026300459506297277, 'alpha': 0.3118550264239999, 'critic_loss': 38.48978160570352, 'actor_loss': 18.019496157627735, 'time_step': 0.09274033637713064, 'td_error': 18.26813940368282, 'init_value': -20.709875106811523, 'ave_value': -19.23658484142155} step=138560
2022-04-17 18:19.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220417144030/model_138560.pt


[(1,
  {'time_sample_batch': 0.00029666907655174135,
   'time_algorithm_update': 0.10028393924511608,
   'temp_loss': 2.8203760104524895,
   'temp': 0.7465105789603868,
   'alpha_loss': -4.3775416721176486,
   'alpha': 1.2774664221060468,
   'critic_loss': 22.781028200069972,
   'actor_loss': 26.226714315611275,
   'time_step': 0.10086665675904403,
   'td_error': 37.92656884774088,
   'init_value': -71.30191040039062,
   'ave_value': -63.93212064640088}),
 (2,
  {'time_sample_batch': 0.00029326410959829637,
   'time_algorithm_update': 0.08866798440661221,
   'temp_loss': 0.9077620992768237,
   'temp': 0.4195573395569798,
   'alpha_loss': 0.6452432500244717,
   'alpha': 1.2477544136775558,
   'critic_loss': 113.6042675585174,
   'actor_loss': 92.3157616746343,
   'time_step': 0.08927167986474467,
   'td_error': 80.32413035570184,
   'init_value': -130.15475463867188,
   'ave_value': -120.32872105798826}),
 (3,
  {'time_sample_batch': 0.00033041993134566875,
   'time_algorithm_update': 0

In [10]:
model.save_model('cqlStochpid2000_Ep20.pt')
model.save_policy('cqlStochpid2000_Ep20.pt')

  minimum = torch.tensor(
  maximum = torch.tensor(


## Off-Policy Evaluation

We do get some metrics on a test set of initial state value and average value. However, these estimates (using the critic's Q-function) of model performance are biased. They're useful for validation during training, but not much else. Instead, we fit a Q-function to the data (or a separate dataset, as I've done here) separately and evaluate the model's performance on it.

Feel free to change the chunks and number of steps.

In [11]:
from d3rlpy.ope import FQE
# metrics to evaluate with
from d3rlpy.metrics.scorer import soft_opc_scorer


ope_dataset = get_dataset([i+2000 for i in range(100)]) #change if you'd prefer different chunks
ope_train_episodes, ope_test_episodes = train_test_split(ope_dataset, test_size=0.2)

fqe = FQE(algo=model, action_scaler = action_scaler, use_gpu=False) #change this if you have one!
fqe.fit(ope_train_episodes, eval_episodes=ope_test_episodes,
        tensorboard_dir='runs',
        n_epochs=100, n_steps_per_epoch=10000, #change if overfitting/underfitting
        scorers={
           'init_value': initial_state_value_estimation_scorer,
            'ave_value': average_value_estimation_scorer,
           'soft_opc': soft_opc_scorer(return_threshold=0)
        })

start
[ 0.00000000e+00  7.95731469e+08 -2.00289108e-01  1.20000047e-02
 -1.88000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.04329141e-01  1.07235845e-01  6.00000000e-01]
Read chunk # 2001 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.21689108e-01 -4.79999953e-02
  2.36999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 2002 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.75789108e-01 -3.21999953e-02
  1.39999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.78630064e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 2003 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.79910892e-01 -3.15999953e-02
  3.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.39455184e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 2004 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.31489108e-01  2.30000047e-02
 -4.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00


[ 0.00000000e+00  7.95731469e+08 -9.31891077e-02 -3.55999953e-02
 -2.80001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  9.63507822e-03  6.73487384e-02  6.00000000e-01]
Read chunk # 2046 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.00389108e-01 -3.65999953e-02
 -1.90000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.28812898e-02 -6.00000000e-01  6.00000000e-01]
Read chunk # 2047 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.33910892e-01  1.88000047e-02
  9.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.09151976e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 2048 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.47410892e-01 -5.99995308e-04
  2.57999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  6.06779283e-02 -6.00000000e-01]
Read chunk # 2049 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  8.83108923e-02  2.48000047e-02
 -2.27000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.38

[ 0.00000000e+00  7.95731469e+08  3.55010892e-01  3.20000047e-02
 -1.36000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.53899655e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 2099 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.40489108e-01 -5.19999953e-02
 -2.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -3.24939354e-01  6.00000000e-01]
Read chunk # 2100 out of 4999
torch.Size([111000, 6])
2022-04-17 18:19.17 [debug    ] RoundIterator is selected.
2022-04-17 18:19.17 [info     ] Directory is created at d3rlpy_logs/FQE_20220417181917
2022-04-17 18:19.17 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-17 18:19.17 [debug    ] Building models...
2022-04-17 18:19.17 [debug    ] Models have been built.
2022-04-17 18:19.17 [info     ] Parameters are saved to d3rlpy_logs/FQE_20220417181917/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'batch_s

Epoch 1/100:   0%|          | 0/876 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


2022-04-17 18:19.20 [info     ] FQE_20220417181917: epoch=1 step=876 epoch=1 metrics={'time_sample_batch': 0.0001437021717088952, 'time_algorithm_update': 0.0030731534304684156, 'loss': 0.007757241122975316, 'time_step': 0.0032807988663242286, 'init_value': -1.0139727592468262, 'ave_value': -0.7032107809085298, 'soft_opc': nan} step=876




2022-04-17 18:19.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_876.pt


Epoch 2/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:19.23 [info     ] FQE_20220417181917: epoch=2 step=1752 epoch=2 metrics={'time_sample_batch': 0.00014131580857925762, 'time_algorithm_update': 0.003101518437198308, 'loss': 0.0060979163775977474, 'time_step': 0.003304141841522635, 'init_value': -1.4636434316635132, 'ave_value': -1.0110418039432785, 'soft_opc': nan} step=1752




2022-04-17 18:19.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_1752.pt


Epoch 3/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:19.27 [info     ] FQE_20220417181917: epoch=3 step=2628 epoch=3 metrics={'time_sample_batch': 0.00013856392472846323, 'time_algorithm_update': 0.00310577458986953, 'loss': 0.007260493202970605, 'time_step': 0.0033101047555061237, 'init_value': -1.9516700506210327, 'ave_value': -1.4125365340394211, 'soft_opc': nan} step=2628




2022-04-17 18:19.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_2628.pt


Epoch 4/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:19.30 [info     ] FQE_20220417181917: epoch=4 step=3504 epoch=4 metrics={'time_sample_batch': 0.0001628491432155104, 'time_algorithm_update': 0.003677782947069978, 'loss': 0.010123744105827701, 'time_step': 0.003917415664620595, 'init_value': -2.420569658279419, 'ave_value': -1.8719225466909173, 'soft_opc': nan} step=3504




2022-04-17 18:19.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_3504.pt


Epoch 5/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:19.34 [info     ] FQE_20220417181917: epoch=5 step=4380 epoch=5 metrics={'time_sample_batch': 0.0001460362787116064, 'time_algorithm_update': 0.003264266062000571, 'loss': 0.013423352911733473, 'time_step': 0.003475902287383058, 'init_value': -2.9003024101257324, 'ave_value': -2.1527395538287655, 'soft_opc': nan} step=4380




2022-04-17 18:19.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_4380.pt


Epoch 6/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:19.37 [info     ] FQE_20220417181917: epoch=6 step=5256 epoch=6 metrics={'time_sample_batch': 0.00014344306841288526, 'time_algorithm_update': 0.0032422447313456776, 'loss': 0.017621666737210183, 'time_step': 0.003449880369177692, 'init_value': -3.378718614578247, 'ave_value': -2.5300325953356317, 'soft_opc': nan} step=5256




2022-04-17 18:19.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_5256.pt


Epoch 7/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:19.40 [info     ] FQE_20220417181917: epoch=7 step=6132 epoch=7 metrics={'time_sample_batch': 0.00015177302164574193, 'time_algorithm_update': 0.0034373957816868613, 'loss': 0.020903949139397335, 'time_step': 0.003659113629223549, 'init_value': -3.8921058177948, 'ave_value': -2.9061553105946865, 'soft_opc': nan} step=6132




2022-04-17 18:19.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_6132.pt


Epoch 8/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:19.44 [info     ] FQE_20220417181917: epoch=8 step=7008 epoch=8 metrics={'time_sample_batch': 0.00014820109763646235, 'time_algorithm_update': 0.0033864049606671617, 'loss': 0.024569323696791445, 'time_step': 0.0035990898467634365, 'init_value': -4.154097557067871, 'ave_value': -3.1106412680213853, 'soft_opc': nan} step=7008




2022-04-17 18:19.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_7008.pt


Epoch 9/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:19.47 [info     ] FQE_20220417181917: epoch=9 step=7884 epoch=9 metrics={'time_sample_batch': 0.00015600740093074433, 'time_algorithm_update': 0.003489847869089205, 'loss': 0.02848557132980935, 'time_step': 0.003717908304031581, 'init_value': -4.810022354125977, 'ave_value': -3.5997865346057325, 'soft_opc': nan} step=7884




2022-04-17 18:19.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_7884.pt


Epoch 10/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:19.51 [info     ] FQE_20220417181917: epoch=10 step=8760 epoch=10 metrics={'time_sample_batch': 0.0001661962570120755, 'time_algorithm_update': 0.0038380010487282113, 'loss': 0.0324158398017232, 'time_step': 0.0040816852491195885, 'init_value': -5.265963077545166, 'ave_value': -3.9359731959568354, 'soft_opc': nan} step=8760




2022-04-17 18:19.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_8760.pt


Epoch 11/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:19.55 [info     ] FQE_20220417181917: epoch=11 step=9636 epoch=11 metrics={'time_sample_batch': 0.0001563277418754961, 'time_algorithm_update': 0.003571680419521245, 'loss': 0.03643382267046243, 'time_step': 0.0038022271030025397, 'init_value': -5.656669616699219, 'ave_value': -4.215855991968902, 'soft_opc': nan} step=9636




2022-04-17 18:19.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_9636.pt


Epoch 12/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:19.58 [info     ] FQE_20220417181917: epoch=12 step=10512 epoch=12 metrics={'time_sample_batch': 0.000154143054735715, 'time_algorithm_update': 0.0035610239799708536, 'loss': 0.04148580219525393, 'time_step': 0.003786010557113717, 'init_value': -5.949587821960449, 'ave_value': -4.498052365936272, 'soft_opc': nan} step=10512




2022-04-17 18:19.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_10512.pt


Epoch 13/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:20.02 [info     ] FQE_20220417181917: epoch=13 step=11388 epoch=13 metrics={'time_sample_batch': 0.00015292673894803818, 'time_algorithm_update': 0.003553757384487483, 'loss': 0.046444459668664714, 'time_step': 0.003778926858074589, 'init_value': -6.346841335296631, 'ave_value': -4.805817099201518, 'soft_opc': nan} step=11388




2022-04-17 18:20.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_11388.pt


Epoch 14/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:20.05 [info     ] FQE_20220417181917: epoch=14 step=12264 epoch=14 metrics={'time_sample_batch': 0.00014468197408876464, 'time_algorithm_update': 0.0033764215909182754, 'loss': 0.050941151204002284, 'time_step': 0.0035899286945116574, 'init_value': -6.454695224761963, 'ave_value': -4.953911584708337, 'soft_opc': nan} step=12264




2022-04-17 18:20.05 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_12264.pt


Epoch 15/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:20.09 [info     ] FQE_20220417181917: epoch=15 step=13140 epoch=15 metrics={'time_sample_batch': 0.00015820161392699638, 'time_algorithm_update': 0.003656610778477638, 'loss': 0.052124230880035115, 'time_step': 0.0038918719444100717, 'init_value': -6.628746032714844, 'ave_value': -5.033724906340007, 'soft_opc': nan} step=13140




2022-04-17 18:20.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_13140.pt


Epoch 16/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:20.12 [info     ] FQE_20220417181917: epoch=16 step=14016 epoch=16 metrics={'time_sample_batch': 0.0001427719037826747, 'time_algorithm_update': 0.0033153975935287126, 'loss': 0.055635320733686204, 'time_step': 0.0035234259688146584, 'init_value': -6.917851448059082, 'ave_value': -5.295874720538824, 'soft_opc': nan} step=14016




2022-04-17 18:20.12 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_14016.pt


Epoch 17/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:20.16 [info     ] FQE_20220417181917: epoch=17 step=14892 epoch=17 metrics={'time_sample_batch': 0.00015301437682757095, 'time_algorithm_update': 0.0035623412698371224, 'loss': 0.0607736909745546, 'time_step': 0.003785641226049972, 'init_value': -7.235141754150391, 'ave_value': -5.64728636289888, 'soft_opc': nan} step=14892




2022-04-17 18:20.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_14892.pt


Epoch 18/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:20.19 [info     ] FQE_20220417181917: epoch=18 step=15768 epoch=18 metrics={'time_sample_batch': 0.00015346345291834443, 'time_algorithm_update': 0.0034921827925938993, 'loss': 0.0647713287019056, 'time_step': 0.003713494294310269, 'init_value': -7.476769924163818, 'ave_value': -5.863389368469214, 'soft_opc': nan} step=15768




2022-04-17 18:20.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_15768.pt


Epoch 19/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:20.23 [info     ] FQE_20220417181917: epoch=19 step=16644 epoch=19 metrics={'time_sample_batch': 0.00015556567335781987, 'time_algorithm_update': 0.003545571407771002, 'loss': 0.06620574861416194, 'time_step': 0.003773736082799903, 'init_value': -7.5836639404296875, 'ave_value': -6.075893567889123, 'soft_opc': nan} step=16644




2022-04-17 18:20.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_16644.pt


Epoch 20/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:20.26 [info     ] FQE_20220417181917: epoch=20 step=17520 epoch=20 metrics={'time_sample_batch': 0.00015194911390679068, 'time_algorithm_update': 0.003481560646126804, 'loss': 0.06745361413495173, 'time_step': 0.0037048910850803604, 'init_value': -7.763945579528809, 'ave_value': -6.317519506100599, 'soft_opc': nan} step=17520




2022-04-17 18:20.26 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_17520.pt


Epoch 21/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:20.30 [info     ] FQE_20220417181917: epoch=21 step=18396 epoch=21 metrics={'time_sample_batch': 0.00015562582233725073, 'time_algorithm_update': 0.0035552308983998757, 'loss': 0.07058420588212411, 'time_step': 0.0037836222888127853, 'init_value': -7.794457912445068, 'ave_value': -6.382644092441276, 'soft_opc': nan} step=18396




2022-04-17 18:20.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_18396.pt


Epoch 22/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:20.33 [info     ] FQE_20220417181917: epoch=22 step=19272 epoch=22 metrics={'time_sample_batch': 0.00014416785000666093, 'time_algorithm_update': 0.003334001591216484, 'loss': 0.07203038840514255, 'time_step': 0.0035449957738728282, 'init_value': -7.912284851074219, 'ave_value': -6.4877003652146, 'soft_opc': nan} step=19272




2022-04-17 18:20.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_19272.pt


Epoch 23/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:20.37 [info     ] FQE_20220417181917: epoch=23 step=20148 epoch=23 metrics={'time_sample_batch': 0.0001581610609951629, 'time_algorithm_update': 0.0036296784605609773, 'loss': 0.0751337269756977, 'time_step': 0.003860503026883896, 'init_value': -8.046655654907227, 'ave_value': -6.702854576691448, 'soft_opc': nan} step=20148




2022-04-17 18:20.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_20148.pt


Epoch 24/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:20.40 [info     ] FQE_20220417181917: epoch=24 step=21024 epoch=24 metrics={'time_sample_batch': 0.00014531095278317525, 'time_algorithm_update': 0.0033203156571410017, 'loss': 0.07610933829128708, 'time_step': 0.00353265189689044, 'init_value': -8.141138076782227, 'ave_value': -6.863979960371245, 'soft_opc': nan} step=21024




2022-04-17 18:20.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_21024.pt


Epoch 25/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:20.44 [info     ] FQE_20220417181917: epoch=25 step=21900 epoch=25 metrics={'time_sample_batch': 0.00015703456042564078, 'time_algorithm_update': 0.0036225229093473253, 'loss': 0.07653282951674165, 'time_step': 0.003853842275872078, 'init_value': -8.115034103393555, 'ave_value': -7.100253703919751, 'soft_opc': nan} step=21900




2022-04-17 18:20.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_21900.pt


Epoch 26/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:20.47 [info     ] FQE_20220417181917: epoch=26 step=22776 epoch=26 metrics={'time_sample_batch': 0.00015493206781883764, 'time_algorithm_update': 0.0035227518103438426, 'loss': 0.07370693828500212, 'time_step': 0.003750037929238794, 'init_value': -8.179459571838379, 'ave_value': -7.232338152942961, 'soft_opc': nan} step=22776




2022-04-17 18:20.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_22776.pt


Epoch 27/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:20.51 [info     ] FQE_20220417181917: epoch=27 step=23652 epoch=27 metrics={'time_sample_batch': 0.00016205359811652197, 'time_algorithm_update': 0.0037216693843336412, 'loss': 0.0756108831400562, 'time_step': 0.003962604422547501, 'init_value': -8.392924308776855, 'ave_value': -7.480911115637166, 'soft_opc': nan} step=23652




2022-04-17 18:20.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_23652.pt


Epoch 28/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:20.55 [info     ] FQE_20220417181917: epoch=28 step=24528 epoch=28 metrics={'time_sample_batch': 0.0001653051811810498, 'time_algorithm_update': 0.0037364543300785432, 'loss': 0.07785459767870784, 'time_step': 0.003982109294090097, 'init_value': -8.415689468383789, 'ave_value': -7.492802610779936, 'soft_opc': nan} step=24528




2022-04-17 18:20.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_24528.pt


Epoch 29/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:20.58 [info     ] FQE_20220417181917: epoch=29 step=25404 epoch=29 metrics={'time_sample_batch': 0.0001651190187288746, 'time_algorithm_update': 0.003811726014908046, 'loss': 0.07891854875376575, 'time_step': 0.0040527723695589525, 'init_value': -8.540231704711914, 'ave_value': -7.81753262044241, 'soft_opc': nan} step=25404




2022-04-17 18:20.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_25404.pt


Epoch 30/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:21.02 [info     ] FQE_20220417181917: epoch=30 step=26280 epoch=30 metrics={'time_sample_batch': 0.00015882161109959155, 'time_algorithm_update': 0.0036405213347308714, 'loss': 0.08087600949464885, 'time_step': 0.003873567330782816, 'init_value': -8.561981201171875, 'ave_value': -7.94971814370152, 'soft_opc': nan} step=26280




2022-04-17 18:21.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_26280.pt


Epoch 31/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:21.06 [info     ] FQE_20220417181917: epoch=31 step=27156 epoch=31 metrics={'time_sample_batch': 0.00015465826748712968, 'time_algorithm_update': 0.003530428289822792, 'loss': 0.08141805206055511, 'time_step': 0.003758255749532621, 'init_value': -8.452730178833008, 'ave_value': -7.976783680356704, 'soft_opc': nan} step=27156




2022-04-17 18:21.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_27156.pt


Epoch 32/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:21.09 [info     ] FQE_20220417181917: epoch=32 step=28032 epoch=32 metrics={'time_sample_batch': 0.0001630208807993153, 'time_algorithm_update': 0.0036770089031898812, 'loss': 0.08162261169540938, 'time_step': 0.003915143067433954, 'init_value': -8.466660499572754, 'ave_value': -7.949736862522256, 'soft_opc': nan} step=28032




2022-04-17 18:21.09 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_28032.pt


Epoch 33/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:21.13 [info     ] FQE_20220417181917: epoch=33 step=28908 epoch=33 metrics={'time_sample_batch': 0.00015401187008374357, 'time_algorithm_update': 0.003520374973070676, 'loss': 0.08324267443978614, 'time_step': 0.0037480685264552564, 'init_value': -8.728558540344238, 'ave_value': -8.19717268342102, 'soft_opc': nan} step=28908




2022-04-17 18:21.13 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_28908.pt


Epoch 34/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:21.16 [info     ] FQE_20220417181917: epoch=34 step=29784 epoch=34 metrics={'time_sample_batch': 0.00014903964517323394, 'time_algorithm_update': 0.0034984739403746442, 'loss': 0.08672810573615644, 'time_step': 0.003717341923822551, 'init_value': -8.760335922241211, 'ave_value': -8.161949234693436, 'soft_opc': nan} step=29784




2022-04-17 18:21.16 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_29784.pt


Epoch 35/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:21.20 [info     ] FQE_20220417181917: epoch=35 step=30660 epoch=35 metrics={'time_sample_batch': 0.00015311453440417983, 'time_algorithm_update': 0.003513833159181081, 'loss': 0.08747242930527037, 'time_step': 0.003741528617736956, 'init_value': -8.885348320007324, 'ave_value': -8.446577702853997, 'soft_opc': nan} step=30660




2022-04-17 18:21.20 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_30660.pt


Epoch 36/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:21.23 [info     ] FQE_20220417181917: epoch=36 step=31536 epoch=36 metrics={'time_sample_batch': 0.00015149078412687397, 'time_algorithm_update': 0.0035204060001460384, 'loss': 0.08930670138003015, 'time_step': 0.0037407559346934976, 'init_value': -8.82131290435791, 'ave_value': -8.46515324908099, 'soft_opc': nan} step=31536




2022-04-17 18:21.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_31536.pt


Epoch 37/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:21.27 [info     ] FQE_20220417181917: epoch=37 step=32412 epoch=37 metrics={'time_sample_batch': 0.0001507061257209952, 'time_algorithm_update': 0.0034959708174614056, 'loss': 0.09425880288233189, 'time_step': 0.0037196058116547048, 'init_value': -8.81568717956543, 'ave_value': -8.587051657076488, 'soft_opc': nan} step=32412




2022-04-17 18:21.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_32412.pt


Epoch 38/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:21.30 [info     ] FQE_20220417181917: epoch=38 step=33288 epoch=38 metrics={'time_sample_batch': 0.0001625750707164747, 'time_algorithm_update': 0.0037476434010893242, 'loss': 0.09449193910211877, 'time_step': 0.0039867233467972985, 'init_value': -9.031393051147461, 'ave_value': -8.72286925108583, 'soft_opc': nan} step=33288




2022-04-17 18:21.30 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_33288.pt


Epoch 39/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:21.34 [info     ] FQE_20220417181917: epoch=39 step=34164 epoch=39 metrics={'time_sample_batch': 0.00016369150109487037, 'time_algorithm_update': 0.003732152997630916, 'loss': 0.0969584672863289, 'time_step': 0.0039701257666496384, 'init_value': -8.82226848602295, 'ave_value': -8.651394325172578, 'soft_opc': nan} step=34164




2022-04-17 18:21.34 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_34164.pt


Epoch 40/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:21.38 [info     ] FQE_20220417181917: epoch=40 step=35040 epoch=40 metrics={'time_sample_batch': 0.000158446292354636, 'time_algorithm_update': 0.0036635431524825422, 'loss': 0.09964319425592513, 'time_step': 0.003896147148794235, 'init_value': -8.995022773742676, 'ave_value': -8.787946781120725, 'soft_opc': nan} step=35040




2022-04-17 18:21.38 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_35040.pt


Epoch 41/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:21.41 [info     ] FQE_20220417181917: epoch=41 step=35916 epoch=41 metrics={'time_sample_batch': 0.00014768561271771992, 'time_algorithm_update': 0.003490164399691368, 'loss': 0.09769002042527919, 'time_step': 0.003706490340298169, 'init_value': -8.663272857666016, 'ave_value': -8.54060381674082, 'soft_opc': nan} step=35916




2022-04-17 18:21.41 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_35916.pt


Epoch 42/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:21.45 [info     ] FQE_20220417181917: epoch=42 step=36792 epoch=42 metrics={'time_sample_batch': 0.000147819246875641, 'time_algorithm_update': 0.0034585987051872357, 'loss': 0.09861936048098487, 'time_step': 0.003671071845102528, 'init_value': -8.590283393859863, 'ave_value': -8.458615792477588, 'soft_opc': nan} step=36792




2022-04-17 18:21.45 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_36792.pt


Epoch 43/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:21.49 [info     ] FQE_20220417181917: epoch=43 step=37668 epoch=43 metrics={'time_sample_batch': 0.00015457062960759689, 'time_algorithm_update': 0.003594785248307877, 'loss': 0.09798711183806244, 'time_step': 0.0038215509832721867, 'init_value': -8.476556777954102, 'ave_value': -8.447855786012301, 'soft_opc': nan} step=37668




2022-04-17 18:21.49 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_37668.pt


Epoch 44/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:21.52 [info     ] FQE_20220417181917: epoch=44 step=38544 epoch=44 metrics={'time_sample_batch': 0.00015591622487595093, 'time_algorithm_update': 0.0036012002322227442, 'loss': 0.10047156555066736, 'time_step': 0.003826236888153912, 'init_value': -8.653292655944824, 'ave_value': -8.669982143950623, 'soft_opc': nan} step=38544




2022-04-17 18:21.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_38544.pt


Epoch 45/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:21.56 [info     ] FQE_20220417181917: epoch=45 step=39420 epoch=45 metrics={'time_sample_batch': 0.0001576482977496979, 'time_algorithm_update': 0.0036741538679218727, 'loss': 0.10127061295623946, 'time_step': 0.0039057258057267697, 'init_value': -8.479665756225586, 'ave_value': -8.519536207038238, 'soft_opc': nan} step=39420




2022-04-17 18:21.56 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_39420.pt


Epoch 46/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:22.00 [info     ] FQE_20220417181917: epoch=46 step=40296 epoch=46 metrics={'time_sample_batch': 0.00015846425539826694, 'time_algorithm_update': 0.0037587146236471936, 'loss': 0.10136232706670305, 'time_step': 0.003992163155176868, 'init_value': -8.70954418182373, 'ave_value': -8.588151434693016, 'soft_opc': nan} step=40296




2022-04-17 18:22.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_40296.pt


Epoch 47/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:22.03 [info     ] FQE_20220417181917: epoch=47 step=41172 epoch=47 metrics={'time_sample_batch': 0.00014742787025834872, 'time_algorithm_update': 0.0035401607212955004, 'loss': 0.10250946074207505, 'time_step': 0.003753158599818678, 'init_value': -8.978919982910156, 'ave_value': -8.751639527301094, 'soft_opc': nan} step=41172




2022-04-17 18:22.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_41172.pt


Epoch 48/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:22.07 [info     ] FQE_20220417181917: epoch=48 step=42048 epoch=48 metrics={'time_sample_batch': 0.00016117232030929496, 'time_algorithm_update': 0.0037181995230722645, 'loss': 0.10178075114144948, 'time_step': 0.003950739560061938, 'init_value': -8.945009231567383, 'ave_value': -8.67460863342324, 'soft_opc': nan} step=42048




2022-04-17 18:22.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_42048.pt


Epoch 49/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:22.11 [info     ] FQE_20220417181917: epoch=49 step=42924 epoch=49 metrics={'time_sample_batch': 0.00015976603172685458, 'time_algorithm_update': 0.003771919910221884, 'loss': 0.10616267497968099, 'time_step': 0.004007022674769572, 'init_value': -9.010398864746094, 'ave_value': -8.733091700887895, 'soft_opc': nan} step=42924




2022-04-17 18:22.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_42924.pt


Epoch 50/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:22.14 [info     ] FQE_20220417181917: epoch=50 step=43800 epoch=50 metrics={'time_sample_batch': 0.00015032618013146806, 'time_algorithm_update': 0.0035264581850130263, 'loss': 0.10733864799787218, 'time_step': 0.0037450480134519812, 'init_value': -9.028890609741211, 'ave_value': -8.633875228018576, 'soft_opc': nan} step=43800




2022-04-17 18:22.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_43800.pt


Epoch 51/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:22.18 [info     ] FQE_20220417181917: epoch=51 step=44676 epoch=51 metrics={'time_sample_batch': 0.0001627552454874396, 'time_algorithm_update': 0.0038694638639824577, 'loss': 0.10907290566161905, 'time_step': 0.004110114759506156, 'init_value': -9.048338890075684, 'ave_value': -8.758199316549812, 'soft_opc': nan} step=44676




2022-04-17 18:22.18 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_44676.pt


Epoch 52/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:22.22 [info     ] FQE_20220417181917: epoch=52 step=45552 epoch=52 metrics={'time_sample_batch': 0.00015524750975169004, 'time_algorithm_update': 0.0037765361402677076, 'loss': 0.10978960548527539, 'time_step': 0.00400325206861104, 'init_value': -9.174015045166016, 'ave_value': -8.74366245905424, 'soft_opc': nan} step=45552




2022-04-17 18:22.22 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_45552.pt


Epoch 53/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:22.25 [info     ] FQE_20220417181917: epoch=53 step=46428 epoch=53 metrics={'time_sample_batch': 0.0001490222264642585, 'time_algorithm_update': 0.003572774804346093, 'loss': 0.11081263365688603, 'time_step': 0.0037873705772504414, 'init_value': -9.313291549682617, 'ave_value': -8.884122893323635, 'soft_opc': nan} step=46428




2022-04-17 18:22.25 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_46428.pt


Epoch 54/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:22.29 [info     ] FQE_20220417181917: epoch=54 step=47304 epoch=54 metrics={'time_sample_batch': 0.0001597235736237269, 'time_algorithm_update': 0.0037709733122559988, 'loss': 0.11070747955167479, 'time_step': 0.00400571654376374, 'init_value': -9.001346588134766, 'ave_value': -8.603529074933917, 'soft_opc': nan} step=47304




2022-04-17 18:22.29 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_47304.pt


Epoch 55/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:22.33 [info     ] FQE_20220417181917: epoch=55 step=48180 epoch=55 metrics={'time_sample_batch': 0.00016193139498636602, 'time_algorithm_update': 0.003836573531094207, 'loss': 0.11065882500595962, 'time_step': 0.004072462314884413, 'init_value': -9.208115577697754, 'ave_value': -8.746280361571694, 'soft_opc': nan} step=48180




2022-04-17 18:22.33 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_48180.pt


Epoch 56/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:22.37 [info     ] FQE_20220417181917: epoch=56 step=49056 epoch=56 metrics={'time_sample_batch': 0.0001589386430505204, 'time_algorithm_update': 0.0037527838254083784, 'loss': 0.11317814837943987, 'time_step': 0.003986877665672128, 'init_value': -9.255398750305176, 'ave_value': -8.843748424315976, 'soft_opc': nan} step=49056




2022-04-17 18:22.37 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_49056.pt


Epoch 57/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:22.40 [info     ] FQE_20220417181917: epoch=57 step=49932 epoch=57 metrics={'time_sample_batch': 0.00016087239191412382, 'time_algorithm_update': 0.0037371946252099998, 'loss': 0.11722272887191264, 'time_step': 0.003970853542084019, 'init_value': -9.285041809082031, 'ave_value': -8.878366364794742, 'soft_opc': nan} step=49932




2022-04-17 18:22.40 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_49932.pt


Epoch 58/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:22.44 [info     ] FQE_20220417181917: epoch=58 step=50808 epoch=58 metrics={'time_sample_batch': 0.00014388615682244844, 'time_algorithm_update': 0.0034698364941496826, 'loss': 0.11725332439939819, 'time_step': 0.0036813042479562977, 'init_value': -9.464723587036133, 'ave_value': -8.949541403984165, 'soft_opc': nan} step=50808




2022-04-17 18:22.44 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_50808.pt


Epoch 59/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:22.48 [info     ] FQE_20220417181917: epoch=59 step=51684 epoch=59 metrics={'time_sample_batch': 0.00016013971746784366, 'time_algorithm_update': 0.0038040655933014333, 'loss': 0.12196372849834854, 'time_step': 0.004035531113681184, 'init_value': -9.551424026489258, 'ave_value': -9.161577614006495, 'soft_opc': nan} step=51684




2022-04-17 18:22.48 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_51684.pt


Epoch 60/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:22.52 [info     ] FQE_20220417181917: epoch=60 step=52560 epoch=60 metrics={'time_sample_batch': 0.00015787201929310141, 'time_algorithm_update': 0.003821021073485074, 'loss': 0.12057101708139424, 'time_step': 0.004048770693339169, 'init_value': -9.477863311767578, 'ave_value': -9.033202322017033, 'soft_opc': nan} step=52560




2022-04-17 18:22.52 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_52560.pt


Epoch 61/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:22.55 [info     ] FQE_20220417181917: epoch=61 step=53436 epoch=61 metrics={'time_sample_batch': 0.00015286903947455698, 'time_algorithm_update': 0.0036728844795052866, 'loss': 0.12324950873977228, 'time_step': 0.0038961278249139654, 'init_value': -9.43408489227295, 'ave_value': -8.759429162120162, 'soft_opc': nan} step=53436




2022-04-17 18:22.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_53436.pt


Epoch 62/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:22.58 [info     ] FQE_20220417181917: epoch=62 step=54312 epoch=62 metrics={'time_sample_batch': 0.000102131334069657, 'time_algorithm_update': 0.0026751996175339234, 'loss': 0.12266188868577699, 'time_step': 0.002825869000665673, 'init_value': -9.512548446655273, 'ave_value': -8.72030467792003, 'soft_opc': nan} step=54312




2022-04-17 18:22.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_54312.pt


Epoch 63/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:23.00 [info     ] FQE_20220417181917: epoch=63 step=55188 epoch=63 metrics={'time_sample_batch': 0.00010268818842221613, 'time_algorithm_update': 0.0026751982566972847, 'loss': 0.12280338724541423, 'time_step': 0.002827742328382518, 'init_value': -9.398948669433594, 'ave_value': -8.701766486308864, 'soft_opc': nan} step=55188




2022-04-17 18:23.00 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_55188.pt


Epoch 64/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:23.03 [info     ] FQE_20220417181917: epoch=64 step=56064 epoch=64 metrics={'time_sample_batch': 0.00010992185165892998, 'time_algorithm_update': 0.002775611670594237, 'loss': 0.12067229887704496, 'time_step': 0.002936458750946881, 'init_value': -9.58038330078125, 'ave_value': -8.933907408612269, 'soft_opc': nan} step=56064




2022-04-17 18:23.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_56064.pt


Epoch 65/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:23.07 [info     ] FQE_20220417181917: epoch=65 step=56940 epoch=65 metrics={'time_sample_batch': 0.00014728334940731797, 'time_algorithm_update': 0.003599091207600075, 'loss': 0.1273278283427329, 'time_step': 0.00381393084243008, 'init_value': -9.808682441711426, 'ave_value': -9.027698133639522, 'soft_opc': nan} step=56940




2022-04-17 18:23.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_56940.pt


Epoch 66/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:23.11 [info     ] FQE_20220417181917: epoch=66 step=57816 epoch=66 metrics={'time_sample_batch': 0.0001684748418799274, 'time_algorithm_update': 0.004075937619492344, 'loss': 0.1305299386384495, 'time_step': 0.0043202118786502645, 'init_value': -9.543200492858887, 'ave_value': -8.872439529182271, 'soft_opc': nan} step=57816




2022-04-17 18:23.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_57816.pt


Epoch 67/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:23.15 [info     ] FQE_20220417181917: epoch=67 step=58692 epoch=67 metrics={'time_sample_batch': 0.0001622038344814353, 'time_algorithm_update': 0.0038595831013161297, 'loss': 0.13370007855369026, 'time_step': 0.004096392355008757, 'init_value': -9.907514572143555, 'ave_value': -9.328478944396918, 'soft_opc': nan} step=58692




2022-04-17 18:23.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_58692.pt


Epoch 68/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:23.19 [info     ] FQE_20220417181917: epoch=68 step=59568 epoch=68 metrics={'time_sample_batch': 0.00016308674529262875, 'time_algorithm_update': 0.0039684206383413375, 'loss': 0.1345609155851395, 'time_step': 0.004206098650144116, 'init_value': -9.602767944335938, 'ave_value': -9.151614641530758, 'soft_opc': nan} step=59568




2022-04-17 18:23.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_59568.pt


Epoch 69/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:23.23 [info     ] FQE_20220417181917: epoch=69 step=60444 epoch=69 metrics={'time_sample_batch': 0.00017040015355637085, 'time_algorithm_update': 0.004175870658055832, 'loss': 0.1324606471953884, 'time_step': 0.004422949329358802, 'init_value': -9.843343734741211, 'ave_value': -9.425151420277249, 'soft_opc': nan} step=60444




2022-04-17 18:23.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_60444.pt


Epoch 70/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:23.27 [info     ] FQE_20220417181917: epoch=70 step=61320 epoch=70 metrics={'time_sample_batch': 0.00017430738771342797, 'time_algorithm_update': 0.0042710636304393755, 'loss': 0.13519245474391986, 'time_step': 0.004522323064063782, 'init_value': -9.78466796875, 'ave_value': -9.246852433411991, 'soft_opc': nan} step=61320




2022-04-17 18:23.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_61320.pt


Epoch 71/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:23.31 [info     ] FQE_20220417181917: epoch=71 step=62196 epoch=71 metrics={'time_sample_batch': 0.00016141591006762362, 'time_algorithm_update': 0.0038901357890264083, 'loss': 0.1358207371614801, 'time_step': 0.004128084879487617, 'init_value': -9.504148483276367, 'ave_value': -8.825489941514727, 'soft_opc': nan} step=62196




2022-04-17 18:23.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_62196.pt


Epoch 72/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:23.35 [info     ] FQE_20220417181917: epoch=72 step=63072 epoch=72 metrics={'time_sample_batch': 0.0001555221265853812, 'time_algorithm_update': 0.0037858320153467185, 'loss': 0.13581247458600687, 'time_step': 0.004013626542809891, 'init_value': -9.486536979675293, 'ave_value': -8.931186501347367, 'soft_opc': nan} step=63072




2022-04-17 18:23.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_63072.pt


Epoch 73/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:23.39 [info     ] FQE_20220417181917: epoch=73 step=63948 epoch=73 metrics={'time_sample_batch': 0.0001598915008649434, 'time_algorithm_update': 0.003960879426024276, 'loss': 0.14026950861084952, 'time_step': 0.004193913991048456, 'init_value': -9.685304641723633, 'ave_value': -9.050607225378085, 'soft_opc': nan} step=63948




2022-04-17 18:23.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_63948.pt


Epoch 74/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:23.42 [info     ] FQE_20220417181917: epoch=74 step=64824 epoch=74 metrics={'time_sample_batch': 0.0001606388723469216, 'time_algorithm_update': 0.003911999534798539, 'loss': 0.14310630858869433, 'time_step': 0.004148008888714934, 'init_value': -9.547499656677246, 'ave_value': -8.81505939364299, 'soft_opc': nan} step=64824




2022-04-17 18:23.42 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_64824.pt


Epoch 75/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:23.46 [info     ] FQE_20220417181917: epoch=75 step=65700 epoch=75 metrics={'time_sample_batch': 0.0001562164254384498, 'time_algorithm_update': 0.0038354549233771897, 'loss': 0.14111085032651383, 'time_step': 0.004064386565935666, 'init_value': -9.627923965454102, 'ave_value': -8.748698037481859, 'soft_opc': nan} step=65700




2022-04-17 18:23.46 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_65700.pt


Epoch 76/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:23.50 [info     ] FQE_20220417181917: epoch=76 step=66576 epoch=76 metrics={'time_sample_batch': 0.0001579112113882962, 'time_algorithm_update': 0.0039228791515576785, 'loss': 0.1436117167830984, 'time_step': 0.0041503228553353925, 'init_value': -9.73494815826416, 'ave_value': -8.75251481577754, 'soft_opc': nan} step=66576




2022-04-17 18:23.50 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_66576.pt


Epoch 77/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:23.54 [info     ] FQE_20220417181917: epoch=77 step=67452 epoch=77 metrics={'time_sample_batch': 0.00016055205096937206, 'time_algorithm_update': 0.003945234431523711, 'loss': 0.14819818396149437, 'time_step': 0.004179346234831092, 'init_value': -9.85126781463623, 'ave_value': -8.858825591458931, 'soft_opc': nan} step=67452




2022-04-17 18:23.54 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_67452.pt


Epoch 78/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:23.58 [info     ] FQE_20220417181917: epoch=78 step=68328 epoch=78 metrics={'time_sample_batch': 0.00016800889141483394, 'time_algorithm_update': 0.004024739406968905, 'loss': 0.1555130765015859, 'time_step': 0.004268490832690234, 'init_value': -10.329465866088867, 'ave_value': -9.234789281766695, 'soft_opc': nan} step=68328




2022-04-17 18:23.58 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_68328.pt


Epoch 79/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:24.02 [info     ] FQE_20220417181917: epoch=79 step=69204 epoch=79 metrics={'time_sample_batch': 0.00016369395060082006, 'time_algorithm_update': 0.003996981605547204, 'loss': 0.1506878334741038, 'time_step': 0.004237219078900063, 'init_value': -10.136194229125977, 'ave_value': -9.183639431760923, 'soft_opc': nan} step=69204




2022-04-17 18:24.02 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_69204.pt


Epoch 80/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:24.06 [info     ] FQE_20220417181917: epoch=80 step=70080 epoch=80 metrics={'time_sample_batch': 0.00016162928925257295, 'time_algorithm_update': 0.003959721354044736, 'loss': 0.15204937882257888, 'time_step': 0.004195305854762526, 'init_value': -10.13707160949707, 'ave_value': -9.220669770830117, 'soft_opc': nan} step=70080




2022-04-17 18:24.06 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_70080.pt


Epoch 81/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:24.10 [info     ] FQE_20220417181917: epoch=81 step=70956 epoch=81 metrics={'time_sample_batch': 0.0001714784805088827, 'time_algorithm_update': 0.0041607893220910195, 'loss': 0.15465358879122687, 'time_step': 0.004408731308157585, 'init_value': -10.066045761108398, 'ave_value': -9.033973360356983, 'soft_opc': nan} step=70956




2022-04-17 18:24.10 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_70956.pt


Epoch 82/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:24.14 [info     ] FQE_20220417181917: epoch=82 step=71832 epoch=82 metrics={'time_sample_batch': 0.00017474013376453696, 'time_algorithm_update': 0.004267847156960126, 'loss': 0.15786896899119807, 'time_step': 0.004524821015797794, 'init_value': -10.322181701660156, 'ave_value': -9.174154971145295, 'soft_opc': nan} step=71832




2022-04-17 18:24.14 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_71832.pt


Epoch 83/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:24.19 [info     ] FQE_20220417181917: epoch=83 step=72708 epoch=83 metrics={'time_sample_batch': 0.00017308535641186857, 'time_algorithm_update': 0.004270522017457169, 'loss': 0.15986299186588274, 'time_step': 0.004522207120782164, 'init_value': -10.318578720092773, 'ave_value': -9.251713371956134, 'soft_opc': nan} step=72708




2022-04-17 18:24.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_72708.pt


Epoch 84/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:24.23 [info     ] FQE_20220417181917: epoch=84 step=73584 epoch=84 metrics={'time_sample_batch': 0.00016889751773990996, 'time_algorithm_update': 0.004096005877403364, 'loss': 0.15833097439619168, 'time_step': 0.004346417509801856, 'init_value': -10.483083724975586, 'ave_value': -9.332627083192001, 'soft_opc': nan} step=73584




2022-04-17 18:24.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_73584.pt


Epoch 85/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:24.27 [info     ] FQE_20220417181917: epoch=85 step=74460 epoch=85 metrics={'time_sample_batch': 0.00017279277653454644, 'time_algorithm_update': 0.004270196777500518, 'loss': 0.15833576575169322, 'time_step': 0.004521318766624416, 'init_value': -10.388422966003418, 'ave_value': -9.213849182128905, 'soft_opc': nan} step=74460




2022-04-17 18:24.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_74460.pt


Epoch 86/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:24.31 [info     ] FQE_20220417181917: epoch=86 step=75336 epoch=86 metrics={'time_sample_batch': 0.0001643460635180887, 'time_algorithm_update': 0.0040126750458321075, 'loss': 0.16424015226941896, 'time_step': 0.004253052413191425, 'init_value': -10.534529685974121, 'ave_value': -9.483809079337227, 'soft_opc': nan} step=75336




2022-04-17 18:24.31 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_75336.pt


Epoch 87/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:24.35 [info     ] FQE_20220417181917: epoch=87 step=76212 epoch=87 metrics={'time_sample_batch': 0.000173953570187364, 'time_algorithm_update': 0.004260458086179272, 'loss': 0.1714400295176135, 'time_step': 0.0045148670401203034, 'init_value': -11.061117172241211, 'ave_value': -9.804470417934494, 'soft_opc': nan} step=76212




2022-04-17 18:24.35 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_76212.pt


Epoch 88/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:24.39 [info     ] FQE_20220417181917: epoch=88 step=77088 epoch=88 metrics={'time_sample_batch': 0.00015989830504813696, 'time_algorithm_update': 0.003974784999133245, 'loss': 0.17343250156107007, 'time_step': 0.004205177635907038, 'init_value': -10.84206771850586, 'ave_value': -9.66900682165816, 'soft_opc': nan} step=77088




2022-04-17 18:24.39 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_77088.pt


Epoch 89/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:24.43 [info     ] FQE_20220417181917: epoch=89 step=77964 epoch=89 metrics={'time_sample_batch': 0.00014723925830022384, 'time_algorithm_update': 0.0037981386054052064, 'loss': 0.17630064325565556, 'time_step': 0.004011980474811711, 'init_value': -10.988020896911621, 'ave_value': -9.73072460717983, 'soft_opc': nan} step=77964




2022-04-17 18:24.43 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_77964.pt


Epoch 90/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:24.47 [info     ] FQE_20220417181917: epoch=90 step=78840 epoch=90 metrics={'time_sample_batch': 0.0001683469232358889, 'time_algorithm_update': 0.0041783087329777405, 'loss': 0.1797335337080406, 'time_step': 0.004422397374018143, 'init_value': -11.071490287780762, 'ave_value': -9.831277397666966, 'soft_opc': nan} step=78840




2022-04-17 18:24.47 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_78840.pt


Epoch 91/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:24.51 [info     ] FQE_20220417181917: epoch=91 step=79716 epoch=91 metrics={'time_sample_batch': 0.00016474560515521324, 'time_algorithm_update': 0.004065751212916962, 'loss': 0.17869661351082183, 'time_step': 0.004305559206226645, 'init_value': -11.000558853149414, 'ave_value': -9.681193836680404, 'soft_opc': nan} step=79716




2022-04-17 18:24.51 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_79716.pt


Epoch 92/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:24.55 [info     ] FQE_20220417181917: epoch=92 step=80592 epoch=92 metrics={'time_sample_batch': 0.00016139277584476558, 'time_algorithm_update': 0.004027974932161096, 'loss': 0.18358373213397258, 'time_step': 0.004263677281331799, 'init_value': -11.402236938476562, 'ave_value': -9.991933731216568, 'soft_opc': nan} step=80592




2022-04-17 18:24.55 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_80592.pt


Epoch 93/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:24.59 [info     ] FQE_20220417181917: epoch=93 step=81468 epoch=93 metrics={'time_sample_batch': 0.00015398084300838105, 'time_algorithm_update': 0.003838526059503425, 'loss': 0.18566876761780066, 'time_step': 0.004061206562878335, 'init_value': -11.485244750976562, 'ave_value': -10.03008084883561, 'soft_opc': nan} step=81468




2022-04-17 18:24.59 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_81468.pt


Epoch 94/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:25.03 [info     ] FQE_20220417181917: epoch=94 step=82344 epoch=94 metrics={'time_sample_batch': 0.00016035309665279302, 'time_algorithm_update': 0.0039423758580804415, 'loss': 0.1917589803806508, 'time_step': 0.004174270314168713, 'init_value': -11.70460319519043, 'ave_value': -10.151565913123054, 'soft_opc': nan} step=82344




2022-04-17 18:25.03 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_82344.pt


Epoch 95/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:25.07 [info     ] FQE_20220417181917: epoch=95 step=83220 epoch=95 metrics={'time_sample_batch': 0.00016852764234150927, 'time_algorithm_update': 0.00410208718417442, 'loss': 0.19475839262483768, 'time_step': 0.004348816664795897, 'init_value': -11.929109573364258, 'ave_value': -10.459808030799703, 'soft_opc': nan} step=83220




2022-04-17 18:25.07 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_83220.pt


Epoch 96/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:25.11 [info     ] FQE_20220417181917: epoch=96 step=84096 epoch=96 metrics={'time_sample_batch': 0.0001720266255069541, 'time_algorithm_update': 0.004188190856480707, 'loss': 0.19019434193986975, 'time_step': 0.004439016455384694, 'init_value': -11.77196216583252, 'ave_value': -10.307757109833194, 'soft_opc': nan} step=84096




2022-04-17 18:25.11 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_84096.pt


Epoch 97/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:25.15 [info     ] FQE_20220417181917: epoch=97 step=84972 epoch=97 metrics={'time_sample_batch': 0.0001547976171589333, 'time_algorithm_update': 0.003838380449983083, 'loss': 0.18743122682016827, 'time_step': 0.004062901620995508, 'init_value': -11.734931945800781, 'ave_value': -10.156131369933352, 'soft_opc': nan} step=84972




2022-04-17 18:25.15 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_84972.pt


Epoch 98/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:25.19 [info     ] FQE_20220417181917: epoch=98 step=85848 epoch=98 metrics={'time_sample_batch': 0.00016856710660403178, 'time_algorithm_update': 0.004132996138916712, 'loss': 0.18622705251864607, 'time_step': 0.004380501296422253, 'init_value': -11.620454788208008, 'ave_value': -9.967251707336924, 'soft_opc': nan} step=85848




2022-04-17 18:25.19 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_85848.pt


Epoch 99/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:25.23 [info     ] FQE_20220417181917: epoch=99 step=86724 epoch=99 metrics={'time_sample_batch': 0.00016379764635268955, 'time_algorithm_update': 0.004082679748535156, 'loss': 0.19165319993163854, 'time_step': 0.004322569936377817, 'init_value': -11.748817443847656, 'ave_value': -10.027300319634042, 'soft_opc': nan} step=86724




2022-04-17 18:25.23 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_86724.pt


Epoch 100/100:   0%|          | 0/876 [00:00<?, ?it/s]



2022-04-17 18:25.27 [info     ] FQE_20220417181917: epoch=100 step=87600 epoch=100 metrics={'time_sample_batch': 0.00016191887528928993, 'time_algorithm_update': 0.00403139934147874, 'loss': 0.19798493267544098, 'time_step': 0.004266368199701179, 'init_value': -11.891968727111816, 'ave_value': -10.092297953292055, 'soft_opc': nan} step=87600




2022-04-17 18:25.27 [info     ] Model parameters are saved to d3rlpy_logs/FQE_20220417181917/model_87600.pt


[(1,
  {'time_sample_batch': 0.0001437021717088952,
   'time_algorithm_update': 0.0030731534304684156,
   'loss': 0.007757241122975316,
   'time_step': 0.0032807988663242286,
   'init_value': -1.0139727592468262,
   'ave_value': -0.7032107809085298,
   'soft_opc': nan}),
 (2,
  {'time_sample_batch': 0.00014131580857925762,
   'time_algorithm_update': 0.003101518437198308,
   'loss': 0.0060979163775977474,
   'time_step': 0.003304141841522635,
   'init_value': -1.4636434316635132,
   'ave_value': -1.0110418039432785,
   'soft_opc': nan}),
 (3,
  {'time_sample_batch': 0.00013856392472846323,
   'time_algorithm_update': 0.00310577458986953,
   'loss': 0.007260493202970605,
   'time_step': 0.0033101047555061237,
   'init_value': -1.9516700506210327,
   'ave_value': -1.4125365340394211,
   'soft_opc': nan}),
 (4,
  {'time_sample_batch': 0.0001628491432155104,
   'time_algorithm_update': 0.003677782947069978,
   'loss': 0.010123744105827701,
   'time_step': 0.003917415664620595,
   'init_val

In [12]:
# from d3rlpy.ope import FQE
# # metrics to evaluate with
# from d3rlpy.metrics.scorer import soft_opc_scorer


# ope_dataset = get_dataset([i*2 for i in range(100)], path="collected_data/rl_stochastic.txt") #change if you'd prefer different chunks
# ope_train_episodes, ope_test_episodes = train_test_split(ope_dataset, test_size=0.2)

# fqe = FQE(algo=model, action_scaler = action_scaler, use_gpu=False) #change this if you have one!
# fqe.fit(ope_train_episodes, eval_episodes=ope_test_episodes,
#         tensorboard_dir='runs',
#         n_epochs=100, n_steps_per_epoch=10000, #change if overfitting/underfitting
#         scorers={
#            'init_value': initial_state_value_estimation_scorer,
#             'ave_value': average_value_estimation_scorer,
#            'soft_opc': soft_opc_scorer(return_threshold=0)
#         })

In [13]:
from d3rlpy.torch_utility import to_cpu
to_cpu(model)
model.save_policy("cqlStochpid2000Ep20CPU.pt")
model.save_model("cqlStochpid2000Ep20modelCPU.pt")

  minimum = torch.tensor(
  maximum = torch.tensor(
