# Sample Workflow for d3rlpy Experiments

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools
import math
import subprocess
import os
import d3rlpy
plt.style.use('matplotlibrc')

from Python.data_sampler import *

## Building an MDPDataset

We first read in a large batch of samples from the file. As `d3rlpy` wants it in the form (observations, actions, rewards, terminal flags), we go ahead and do that. Here's a helper function to get a dataset from a list of chunks of your choosing.

In [2]:
def get_dataset(chunks : list, batch_size=30000, 
                path="collected_data/rl_deterministic.txt") -> d3rlpy.dataset.MDPDataset :
    random.seed(0)
    samples = DataSampler(path_to_data="collected_data/rl_purestochastic.txt")
    samples.setting("coarse")
    states = []
    actions = []
    rewards = []
    next_states = []
    for chunk in chunks:
        samples.use_chunk(chunk)
        samples.read_chunk()
        [statesChunk, actionsChunk, rewardsChunk, nextStatesChunk] = samples.get_batch(batch_size)
        states.append(statesChunk)
        actions.append(actionsChunk)
        rewards.append(rewardsChunk)
        next_states.append(nextStatesChunk)
    states = torch.cat(states)
    actions = torch.cat(actions)
    rewards = torch.cat(rewards)
    next_states = torch.cat(next_states)
    terminals = np.zeros(len(states))
    terminals[::1111] = 1 #episode length 100, change if necessary
    print(states.shape)
    dataset = d3rlpy.dataset.MDPDataset(states.numpy(), 
                                        actions.numpy(), 
                                        rewards.numpy(), terminals)
    return dataset

We can build the dataset from there, just like this, and split into train and test sets.

In [3]:
dataset = get_dataset([i for i in range(2000)])

start
[ 0.00000000e+00  7.95731469e+08  1.14110892e-01  2.46000047e-02
 -2.40000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33891077e-02 -4.23999953e-02
  2.84999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.95938047e-01 -6.00000000e-01 -4.85130982e-02]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.97089108e-01 -5.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.84874650e-01  5.98976918e-01 -3.12414656e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33189108e-01  1.48000047e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.33187685e-01  5.18887883e-01  2.92787205e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.58910892e-01 -5.15999953e-02
  1.93999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000

start
[ 0.00000000e+00  7.95731469e+08 -1.30089108e-01  1.26000047e-02
  2.65999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -3.00498971e-01 -1.09507040e-01]
Read chunk # 44 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.35910892e-01 -5.37999953e-02
 -2.50000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.38838169e-01  6.00000000e-01 -3.75898719e-01]
Read chunk # 45 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.08910771e-03  5.96000047e-02
 -2.94000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 46 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.03810892e-01  9.20000469e-03
 -2.18000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -4.59576187e-01 -6.00000000e-01]
Read chunk # 47 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -5.44891077e-02 -4.83999953e-02
 -2.32000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.2198

start
[ 0.00000000e+00  7.95731469e+08  6.52108923e-02 -3.41999953e-02
 -3.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.91108872e-01  6.00000000e-01  6.00000000e-01]
Read chunk # 90 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.33910892e-01  3.22000047e-02
  7.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -8.45803493e-02  1.26420156e-01  3.84138893e-01]
Read chunk # 91 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.13889108e-01 -4.77999953e-02
 -6.50001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.37698138e-01 -5.84863104e-01  1.26168065e-01]
Read chunk # 92 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.03310892e-01 -3.81999953e-02
 -2.60000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -6.00000000e-01 -2.81368998e-01]
Read chunk # 93 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.46510892e-01 -4.51999953e-02
  3.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.4518

[ 0.00000000e+00  7.95731469e+08  4.03010892e-01 -5.37999953e-02
 -1.47000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.90620116e-01  5.40034474e-01  6.00000000e-01]
Read chunk # 138 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.86810892e-01  7.40000469e-03
 -4.90001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -1.77450742e-01  5.93783869e-01]
Read chunk # 139 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.99110892e-01 -4.99999953e-02
 -2.75000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 140 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.46610892e-01 -3.79999953e-02
  2.20999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.58817901e-01  3.59906503e-02  6.00000000e-01]
Read chunk # 141 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.09289108e-01 -5.73999953e-02
  4.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.000000

[ 0.00000000e+00  7.95731469e+08 -3.65889108e-01  3.06000047e-02
 -2.15000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.97465868e-01 -4.70555188e-01 -1.77657165e-01]
Read chunk # 187 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.33689108e-01 -1.83999953e-02
 -1.80000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -1.84808444e-01  5.59598370e-01]
Read chunk # 188 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.98489108e-01  7.20000469e-03
 -8.60001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.75998508e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 189 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.78110892e-01 -4.47999953e-02
 -2.81000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.96648664e-02  6.00000000e-01]
Read chunk # 190 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  7.99108923e-02  5.14000047e-02
 -1.40001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.310905

[ 0.00000000e+00  7.95731469e+08 -2.47389108e-01  5.64000047e-02
  2.37999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.39901182e-03  3.49713265e-01  2.88231488e-01]
Read chunk # 228 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.11189108e-01  4.06000047e-02
 -2.58000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 229 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.81110892e-01  3.12000047e-02
 -2.80000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.02574530e-02 -4.54514981e-01  6.00000000e-01]
Read chunk # 230 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.57108923e-02 -3.61999953e-02
  1.85999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -1.74238701e-01  3.55440381e-01]
Read chunk # 231 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.15310892e-01 -5.47999953e-02
  2.53999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.578163

start
[ 0.00000000e+00  7.95731469e+08 -4.18389108e-01 -3.85999953e-02
 -2.00001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.90034334e-01  4.16167898e-01  6.00000000e-01]
Read chunk # 276 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.24310892e-01 -4.53999953e-02
  2.29998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 277 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -6.79891077e-02 -1.41999953e-02
 -8.40001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -3.86615913e-01 -1.90826324e-02]
Read chunk # 278 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.73189108e-01 -3.77999953e-02
  2.74999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.84634006e-01  6.00000000e-01  6.00000000e-01]
Read chunk # 279 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  8.02108923e-02  5.52000047e-02
 -2.80001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.

start
[ 0.00000000e+00  7.95731469e+08  3.03410892e-01  3.22000047e-02
 -2.03000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  6.00000000e-01  6.00000000e-01]
Read chunk # 321 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.87108923e-02 -2.19999953e-02
 -1.57000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.22890554e-02  6.00000000e-01  5.25905845e-01]
Read chunk # 322 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.41810892e-01  4.34000047e-02
  9.89998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  1.87136558e-01  2.50020518e-01]
Read chunk # 323 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.00310892e-01 -5.07999953e-02
  2.59999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.07901304e-02  1.14285891e-01  6.00000000e-01]
Read chunk # 324 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.55810892e-01  3.80000469e-03
  1.46999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.

[ 0.00000000e+00  7.95731469e+08 -2.20689108e-01 -4.19999531e-03
  2.99986580e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.65875705e-01  6.00000000e-01 -2.58736317e-02]
Read chunk # 403 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.45310892e-01  2.62000047e-02
  9.09998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 404 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.10810892e-01  3.14000047e-02
 -2.77000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.73164187e-01 -6.00000000e-01  3.30177469e-01]
Read chunk # 405 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.38689108e-01  2.66000047e-02
 -1.35000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.19713434e-01 -5.17413592e-01  6.26854678e-02]
Read chunk # 406 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.93910892e-01 -3.59999953e-02
  2.58999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.081872

[ 0.00000000e+00  7.95731469e+08  4.33910892e-01  4.60000469e-03
 -2.85000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 449 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.81589108e-01 -6.19999531e-03
  1.56999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  9.82364105e-02  2.25793074e-01 -6.00000000e-01]
Read chunk # 450 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.18910892e-01 -8.19999531e-03
  2.50999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -5.00637783e-01  6.00000000e-01]
Read chunk # 451 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  7.24108923e-02  6.40000469e-03
  2.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.90883888e-01 -2.26790642e-01 -4.07807355e-01]
Read chunk # 452 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.23389108e-01  5.40000047e-02
  1.96999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.000000

[ 0.00000000e+00  7.95731469e+08  1.11410892e-01 -4.17999953e-02
 -8.90001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -8.23628623e-04  4.03842666e-01 -2.72305572e-01]
Read chunk # 492 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.25010892e-01 -2.43999953e-02
  6.29998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -3.19370093e-01  4.61045958e-01]
Read chunk # 493 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.82689108e-01  1.16000047e-02
  2.95999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.12486673e-01  6.00000000e-01  1.94308480e-01]
Read chunk # 494 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.35810892e-01  2.14000047e-02
  1.62999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -6.00000000e-01 -7.72296763e-02]
Read chunk # 495 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.10889108e-01  4.20000469e-03
 -1.44000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.512754

start
[ 0.00000000e+00  7.95731469e+08 -2.41989108e-01  2.48000047e-02
  1.97999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.77935887e-01  6.00000000e-01 -5.57719061e-01]
Read chunk # 532 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.14610892e-01 -5.33999953e-02
 -2.40001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.01098446e-01  2.27941815e-01 -6.00000000e-01]
Read chunk # 533 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.15989108e-01 -3.93999953e-02
  1.56999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  5.06130255e-01  6.00000000e-01]
Read chunk # 534 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -9.43891077e-02 -2.41999953e-02
  3.89998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.71944211e-01 -5.73685326e-01  5.36393899e-01]
Read chunk # 535 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.35889108e-01  2.58000047e-02
 -2.78000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.

[ 0.00000000e+00  7.95731469e+08  9.31108923e-02 -1.31999953e-02
 -1.49000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.13653159e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 573 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.35589108e-01  4.40000469e-03
 -6.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  3.17274650e-01  3.87604712e-01]
Read chunk # 574 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.32410892e-01 -5.59999953e-02
 -2.76000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.17720225e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 575 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.27010892e-01 -2.59999531e-03
  1.10999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.78991422e-01  6.00000000e-01]
Read chunk # 576 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.42610892e-01 -5.97999953e-02
  9.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.503786

[ 0.00000000e+00  7.95731469e+08 -2.14891077e-02  1.16000047e-02
  1.57999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.03452973e-01 -1.39597465e-01  1.19332153e-01]
Read chunk # 613 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.03889108e-01 -4.13999953e-02
 -2.19000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.84964515e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 614 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.28589108e-01 -2.19999531e-03
 -1.99000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.58948121e-01 -6.00000000e-01  4.72368154e-01]
Read chunk # 615 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.58110892e-01  5.96000047e-02
 -9.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -6.00000000e-01  1.99061708e-01]
Read chunk # 616 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.66010892e-01 -3.19999531e-03
  1.70999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.000000

start
[ 0.00000000e+00  7.95731469e+08  3.15410892e-01  3.16000047e-02
 -5.00001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  5.25899599e-01 -2.04928199e-01]
Read chunk # 658 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  5.89108923e-02 -4.95999953e-02
 -2.62000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  6.00000000e-01 -4.65405233e-01]
Read chunk # 659 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -5.53891077e-02 -5.57999953e-02
  4.99986580e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.39165086e-02 -6.00000000e-01  2.70937098e-01]
Read chunk # 660 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -7.09891077e-02 -3.89999953e-02
 -2.50000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -6.00000000e-01 -1.31101003e-01]
Read chunk # 661 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.59989108e-01  3.52000047e-02
 -2.10000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.

start
[ 0.00000000e+00  7.95731469e+08 -3.22389108e-01 -3.79999953e-02
  6.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.83565257e-01 -4.36651124e-01  6.00000000e-01]
Read chunk # 704 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.28889108e-01 -1.43999953e-02
 -1.76000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 705 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.09010892e-01  4.50000047e-02
  9.59998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.79685612e-01  9.49075525e-02 -6.00000000e-01]
Read chunk # 706 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.33689108e-01  7.40000469e-03
  1.85999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  3.87475919e-01  6.00000000e-01]
Read chunk # 707 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.51310892e-01 -5.99999531e-03
 -1.45000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.

start
[ 0.00000000e+00  7.95731469e+08  1.93108923e-02  1.32000047e-02
  1.35999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  6.00000000e-01  2.07705545e-01]
Read chunk # 748 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.30589108e-01 -2.93999953e-02
  2.58999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  6.00000000e-01 -2.10257066e-01]
Read chunk # 749 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.39089108e-01  4.26000047e-02
 -5.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  6.00000000e-01 -4.58450864e-01]
Read chunk # 750 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.65910892e-01  4.82000047e-02
 -2.73000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  2.11323018e-01 -6.00000000e-01]
Read chunk # 751 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.94589108e-01 -9.99995308e-04
  1.36999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.

[ 0.00000000e+00  7.95731469e+08 -1.84689108e-01  1.80000469e-03
  2.39999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.50736360e-01 -6.00000000e-01 -8.24560854e-02]
Read chunk # 791 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.75089108e-01  3.42000047e-02
  1.94999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.34125313e-01  5.44869641e-01  6.00000000e-01]
Read chunk # 792 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.54210892e-01 -3.05999953e-02
 -9.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -1.79823033e-01 -6.00000000e-01]
Read chunk # 793 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.95010892e-01 -3.75999953e-02
  1.85999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.54130662e-03  1.10863663e-01  1.38603915e-01]
Read chunk # 794 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  9.40108923e-02  5.52000047e-02
  6.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.000000

[ 0.00000000e+00  7.95731469e+08 -8.41891077e-02 -8.99999531e-03
  2.91999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.48269796e-02 -6.00000000e-01  1.30205337e-01]
Read chunk # 836 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.87189108e-01 -1.93999953e-02
  2.74999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -3.57695492e-01  6.00000000e-01]
Read chunk # 837 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.30891077e-02 -5.75999953e-02
  8.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.11828445e-01  6.00000000e-01 -4.04464219e-01]
Read chunk # 838 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.73310892e-01  5.20000047e-02
  9.89998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  6.00000000e-01  2.06872591e-01]
Read chunk # 839 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.23310892e-01  1.20000469e-03
  2.91999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.806655

[ 0.00000000e+00  7.95731469e+08 -3.88089108e-01 -4.59999531e-03
  1.31999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.27417668e-01  4.25796328e-01  6.00000000e-01]
Read chunk # 876 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.21010892e-01  8.00000469e-03
  2.20999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  6.00000000e-01 -4.87289269e-01]
Read chunk # 877 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.73891077e-02  2.24000047e-02
  1.16999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.47191781e-01 -1.74650480e-02 -6.00000000e-01]
Read chunk # 878 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.06989108e-01 -2.65999953e-02
 -5.70001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.62731106e-01  6.00000000e-01 -1.42595984e-01]
Read chunk # 879 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.53310892e-01  3.40000047e-02
 -2.60000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.355973

[ 0.00000000e+00  7.95731469e+08 -3.75489108e-01 -1.87999953e-02
 -8.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -6.00000000e-01  2.92846559e-01]
Read chunk # 916 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.38289108e-01 -5.67999953e-02
 -2.04000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -5.93642864e-01  3.81968129e-02]
Read chunk # 917 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.68889108e-01  2.00000047e-02
  4.69998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  6.00000000e-01 -2.22744221e-01]
Read chunk # 918 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.39891077e-02  7.40000469e-03
 -9.90001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.15394265e-01 -3.86618371e-02  5.06061523e-01]
Read chunk # 919 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.06210892e-01  4.04000047e-02
  1.59998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.662831

[ 0.00000000e+00  7.95731469e+08 -3.94089108e-01  2.92000047e-02
  2.66999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.19504628e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 960 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.49489108e-01  5.54000047e-02
 -1.77000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -4.08868688e-01  6.00000000e-01]
Read chunk # 961 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -7.90891077e-02 -6.99999531e-03
  4.19998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  5.05387092e-01 -6.00000000e-01]
Read chunk # 962 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.66891077e-02  1.14000047e-02
 -2.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -7.20078671e-02 -2.09920818e-01  6.00000000e-01]
Read chunk # 963 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.19510892e-01  2.76000047e-02
  5.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.256384

[ 0.00000000e+00  7.95731469e+08  1.90010892e-01  4.60000047e-02
 -5.60001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  6.00000000e-01 -1.66126716e-01]
Read chunk # 1006 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.52389108e-01  1.56000047e-02
 -2.60001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.78020995e-01 -3.98217908e-01 -5.60410551e-01]
Read chunk # 1007 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.41389108e-01  4.68000047e-02
  2.66999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -1.03635607e-01 -6.00000000e-01]
Read chunk # 1008 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  7.91089229e-03  1.32000047e-02
 -2.03000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -6.00000000e-01 -8.05277764e-02]
Read chunk # 1009 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.32010892e-01  4.40000047e-02
  1.98999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00

[ 0.00000000e+00  7.95731469e+08 -1.78989108e-01 -7.99995308e-04
  2.86999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -2.93841269e-01  6.00000000e-01]
Read chunk # 1052 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.62810892e-01 -4.53999953e-02
 -2.76000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  6.00000000e-01  3.74384669e-01]
Read chunk # 1053 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.00989108e-01 -3.97999953e-02
  1.74999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.39035243e-01 -6.00000000e-01  1.11029213e-01]
Read chunk # 1054 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.19410892e-01  3.82000047e-02
 -4.80001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.39280373e-01  5.36356590e-01  6.00000000e-01]
Read chunk # 1055 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.27910892e-01  4.08000047e-02
  8.99986580e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.24

[ 0.00000000e+00  7.95731469e+08 -1.05489108e-01 -7.99999531e-03
  9.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.49086249e-01 -1.94764175e-01  6.00000000e-01]
Read chunk # 1097 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.37891077e-02  1.60000047e-02
 -2.92000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  8.31138720e-02 -4.24255908e-01]
Read chunk # 1098 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.71789108e-01 -1.71999953e-02
  9.29998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.73132027e-01 -6.00000000e-01  3.16094172e-01]
Read chunk # 1099 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.84110892e-01  4.36000047e-02
 -2.98000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.10908963e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1100 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.90891077e-02  5.40000469e-03
 -1.24000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00

[ 0.00000000e+00  7.95731469e+08  2.46410892e-01  1.64000047e-02
 -2.41000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -6.00000000e-01 -2.20950213e-01]
Read chunk # 1144 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.86110892e-01  2.54000047e-02
  9.49998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  4.60057803e-01 -4.93590882e-01]
Read chunk # 1145 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.71410892e-01 -1.11999953e-02
 -2.03000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -2.45848340e-01  2.54658566e-01]
Read chunk # 1146 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.94810892e-01 -4.27999953e-02
 -6.20001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.04674872e-01 -6.00000000e-01 -1.41704637e-02]
Read chunk # 1147 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.83910892e-01 -3.93999953e-02
  2.60999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.82

[ 0.00000000e+00  7.95731469e+08 -2.57589108e-01  4.90000047e-02
  2.79999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  4.18987738e-01 -6.47203778e-02]
Read chunk # 1193 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.94210892e-01 -5.55999953e-02
 -2.07000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.42563252e-01  3.25016081e-01 -4.75423145e-01]
Read chunk # 1194 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.41789108e-01 -3.27999953e-02
  1.09999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  6.00000000e-01  2.70335052e-01]
Read chunk # 1195 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.18389108e-01 -2.39999531e-03
  1.50999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.34296453e-01  6.00000000e-01  2.89706933e-03]
Read chunk # 1196 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.48510892e-01 -2.77999953e-02
  1.19999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.14

[ 0.00000000e+00  7.95731469e+08 -2.33589108e-01  1.06000047e-02
 -1.20001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.33503864e-01 -6.00000000e-01  1.58483728e-01]
Read chunk # 1245 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.80010892e-01  3.20000047e-02
  2.46999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  4.03975526e-01  6.00000000e-01]
Read chunk # 1246 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.03610892e-01  3.60000047e-02
 -3.60001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.48946884e-01 -6.00000000e-01 -5.15036673e-01]
Read chunk # 1247 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.88810892e-01  1.90000047e-02
 -2.70000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.03054669e-01  2.22334771e-01]
Read chunk # 1248 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.33610892e-01  1.48000047e-02
  6.09998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.24

start
[ 0.00000000e+00  7.95731469e+08  5.77108923e-02 -5.43999953e-02
 -5.00001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.99532548e-01  2.63182969e-01  6.00000000e-01]
Read chunk # 1295 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.09689108e-01  2.40000047e-02
 -1.15000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.99619572e-01 -4.68484591e-01 -3.72117255e-01]
Read chunk # 1296 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.48891077e-02  4.62000047e-02
 -7.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -8.45821673e-02  6.00000000e-01  3.67666158e-01]
Read chunk # 1297 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.80689108e-01  1.68000047e-02
 -6.90001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.58991643e-01 -2.53923856e-01  6.00000000e-01]
Read chunk # 1298 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.13910892e-01  4.60000047e-02
 -3.40001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00


start
[ 0.00000000e+00  7.95731469e+08  3.18410892e-01  1.52000047e-02
  9.29998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.33719008e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1344 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.02810892e-01 -3.59999531e-03
  2.06999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1345 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.19110892e-01 -5.01999953e-02
  2.63999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  6.00000000e-01  3.88193675e-01]
Read chunk # 1346 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.57310892e-01 -1.39999531e-03
  2.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.21264512e-02 -6.00000000e-01]
Read chunk # 1347 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.40810892e-01  3.26000047e-02
 -1.72000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00


[ 0.00000000e+00  7.95731469e+08  1.12610892e-01  4.78000047e-02
 -2.49000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.47156204e-04 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1392 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.10891077e-02  2.86000047e-02
  1.67999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  3.38946731e-01 -6.00000000e-01]
Read chunk # 1393 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -7.03891077e-02 -2.99999953e-02
  2.49999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.73437156e-01 -3.75184112e-01  6.00000000e-01]
Read chunk # 1394 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.05089108e-01  1.94000047e-02
  2.59999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.50273586e-01  4.05899727e-01]
Read chunk # 1395 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.20789108e-01  6.60000469e-03
  1.51999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.49

[ 0.00000000e+00  7.95731469e+08 -4.37589108e-01  5.80000469e-03
  2.04999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.54690006e-01  8.54587881e-03 -2.62696658e-01]
Read chunk # 1440 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.68710892e-01  7.60000469e-03
 -2.84000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.66326206e-01 -7.10485070e-02  3.40629644e-01]
Read chunk # 1441 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.99689108e-01  5.72000047e-02
 -2.47000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.74585512e-01  2.07777886e-01 -6.00000000e-01]
Read chunk # 1442 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.36910892e-01  5.42000047e-02
 -3.90001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.07964539e-01 -5.09380698e-01  6.00000000e-01]
Read chunk # 1443 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.56410892e-01 -2.69999953e-02
 -2.51000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.01

start
[ 0.00000000e+00  7.95731469e+08  2.01410892e-01 -3.71999953e-02
 -1.56000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.39859373e-01  2.71030738e-01  6.00000000e-01]
Read chunk # 1482 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.78589108e-01  2.20000469e-03
 -2.21000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  5.36673641e-01 -5.42438367e-01]
Read chunk # 1483 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.46310892e-01  5.46000047e-02
 -1.90000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1484 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.28010892e-01 -4.01999953e-02
 -1.43000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.53912579e-02  1.07282972e-01 -6.19555055e-02]
Read chunk # 1485 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -6.88891077e-02  1.40000047e-02
 -2.89000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00


[ 0.00000000e+00  7.95731469e+08 -3.43389108e-01 -5.17999953e-02
 -5.40001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -6.00000000e-01  1.48739735e-01]
Read chunk # 1530 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.15891077e-02 -1.55999953e-02
 -3.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.94817725e-01  1.30028786e-01  6.00000000e-01]
Read chunk # 1531 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  8.65108923e-02  4.64000047e-02
  2.68999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.85590759e-01  4.82677190e-01]
Read chunk # 1532 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.19710892e-01  4.02000047e-02
 -2.55000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -3.13711677e-01  2.10781140e-01]
Read chunk # 1533 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.94210892e-01 -5.35999953e-02
  1.21999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.25

Read chunk # 1577 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  5.59108923e-02  1.60000047e-02
 -7.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  6.00000000e-01 -5.89593702e-01]
Read chunk # 1578 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.02589108e-01  3.12000047e-02
 -4.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.65138809e-02 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1579 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.42108923e-02 -4.31999953e-02
  1.24999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.47030418e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1580 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.59889108e-01 -4.33999953e-02
 -1.31000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  6.00000000e-01 -3.68072829e-01]
Read chunk # 1581 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.10289108e-01 -4.61999953e-02
  1.94999866e-02  0.00000000e+00 -5

start
[ 0.00000000e+00  7.95731469e+08 -1.69389108e-01  3.16000047e-02
  1.44999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1624 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.93891077e-02  2.24000047e-02
 -9.50001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  4.58304474e-01 -6.00000000e-01]
Read chunk # 1625 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  8.38108923e-02  1.90000047e-02
 -2.34000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.72091016e-02 -2.65169979e-01 -1.70444959e-01]
Read chunk # 1626 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -7.87891077e-02  5.42000047e-02
  1.81999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -4.44692413e-01]
Read chunk # 1627 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.92289108e-01 -4.69999953e-02
 -1.14000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00


[ 0.00000000e+00  7.95731469e+08  3.86810892e-01  1.52000047e-02
 -1.20001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.06080561e-02 -6.00000000e-01 -2.24516434e-01]
Read chunk # 1672 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.47110892e-01 -1.71999953e-02
  2.66999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -5.06061131e-01]
Read chunk # 1673 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -8.11891077e-02  1.76000047e-02
  9.69998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.44386955e-02 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1674 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  6.04108923e-02  5.20000469e-03
 -7.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.13206282e-01  6.00000000e-01]
Read chunk # 1675 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.72289108e-01 -5.93999953e-02
  2.82999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.48

start
[ 0.00000000e+00  7.95731469e+08  3.23510892e-01 -3.23999953e-02
 -2.97000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.94650699e-01  2.08067476e-01 -4.71225460e-02]
Read chunk # 1717 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.26989108e-01 -1.81999953e-02
 -1.98000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.02436576e-01  6.00000000e-01]
Read chunk # 1718 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.94210892e-01  5.40000047e-02
 -8.80001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.50309848e-01  4.28054613e-01  3.17447043e-01]
Read chunk # 1719 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.51210892e-01  3.44000047e-02
 -6.60001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.22142740e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 1720 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.42210892e-01  3.20000469e-03
  8.49998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00


[ 0.00000000e+00  7.95731469e+08 -4.27389108e-01  2.98000047e-02
 -1.51000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.52994626e-01  5.11746014e-01  5.00855105e-01]
Read chunk # 1764 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.69910892e-01  1.00000469e-03
 -6.60001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.34717615e-01 -2.00116557e-01 -6.00000000e-01]
Read chunk # 1765 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.32710892e-01 -5.09999953e-02
 -7.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.28798416e-01 -6.00000000e-01 -2.52882675e-02]
Read chunk # 1766 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -8.68891077e-02  3.04000047e-02
 -2.30000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.62391533e-02 -6.00000000e-01 -2.89683541e-01]
Read chunk # 1767 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.42789108e-01  1.64000047e-02
 -2.22000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00

[ 0.00000000e+00  7.95731469e+08 -4.32189108e-01 -4.29999953e-02
  4.09998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.09513570e-01  1.84095824e-01  1.85422746e-01]
Read chunk # 1811 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.87210892e-01  2.52000047e-02
 -2.07000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.12152461e-01 -5.49303658e-01 -9.56883855e-02]
Read chunk # 1812 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.69108923e-02 -2.77999953e-02
  1.90999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.43848681e-02  6.00000000e-01  6.00000000e-01]
Read chunk # 1813 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -6.16891077e-02  1.58000047e-02
 -8.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  3.89156433e-01]
Read chunk # 1814 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.90189108e-01 -2.03999953e-02
 -9.50001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.82

[ 0.00000000e+00  7.95731469e+08  4.20710892e-01 -2.77999953e-02
 -2.33000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.09539286e-01  2.88121640e-01  3.88875427e-01]
Read chunk # 1856 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.02010892e-01 -8.39999531e-03
  2.68999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.14359323e-01  5.64374331e-01  1.22091660e-01]
Read chunk # 1857 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.21989108e-01  8.60000469e-03
 -1.83000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -1.59982083e-01 -1.47306429e-01]
Read chunk # 1858 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.70310892e-01  2.66000047e-02
  2.04999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.39006116e-01 -2.41762819e-01  4.57396317e-01]
Read chunk # 1859 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.96689108e-01  5.82000047e-02
  2.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00

[ 0.00000000e+00  7.95731469e+08  7.61089229e-03  1.36000047e-02
 -6.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.45514124e-01  6.00000000e-01 -3.45621909e-01]
Read chunk # 1904 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.15410892e-01 -4.67999953e-02
 -1.43000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.28180442e-01 -6.00000000e-01 -5.91834629e-01]
Read chunk # 1905 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.57910892e-01  4.18000047e-02
  1.89998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.17487952e-01  4.37245698e-01 -6.00000000e-01]
Read chunk # 1906 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.17089108e-01 -4.19999953e-02
  7.59998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -2.86216320e-01  6.00000000e-01]
Read chunk # 1907 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  6.25108923e-02 -2.13999953e-02
 -1.17000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00

[ 0.00000000e+00  7.95731469e+08  1.96310892e-01  3.42000047e-02
 -5.60001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.62048999e-01  5.66433524e-02 -6.00000000e-01]
Read chunk # 1948 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.35710892e-01 -4.19999953e-02
  7.59998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.72990701e-02 -3.66037139e-02 -1.42005470e-01]
Read chunk # 1949 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.59410892e-01  3.20000469e-03
  6.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.64576497e-01 -6.00000000e-01  5.86511624e-01]
Read chunk # 1950 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.82710892e-01  1.60000047e-02
 -2.91000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.75406591e-01 -6.00000000e-01]
Read chunk # 1951 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.18089108e-01  7.40000469e-03
  1.59998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.87

[ 0.00000000e+00  7.95731469e+08  3.90410892e-01  1.24000047e-02
  9.89998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  5.24360876e-01]
Read chunk # 1989 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.48610892e-01 -3.87999953e-02
  1.98999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01 -6.00000000e-01]
Read chunk # 1990 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  7.99108923e-02  1.28000047e-02
  3.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.44419055e-01  5.87838278e-01 -8.12496758e-03]
Read chunk # 1991 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.05589108e-01 -1.51999953e-02
  2.33999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.56807043e-01  6.00000000e-01  6.00000000e-01]
Read chunk # 1992 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.45689108e-01 -1.59999531e-03
  7.99986580e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.37

In [4]:
print("The behavior policy value statistics are:")
dataset.compute_stats()['return']

The behavior policy value statistics are:


{'mean': -286.53177,
 'std': 142.0723,
 'min': -662.35175,
 'max': 0.0,
 'histogram': (array([ 11,  29,  40,  71,  63,  71,  75,  97, 106, 123, 145, 177, 178,
         166, 167, 188, 158, 116,  17,   1]),
  array([-662.35175 , -629.23413 , -596.1166  , -562.99896 , -529.8814  ,
         -496.7638  , -463.6462  , -430.52863 , -397.41104 , -364.29346 ,
         -331.17587 , -298.0583  , -264.9407  , -231.8231  , -198.70552 ,
         -165.58794 , -132.47035 ,  -99.35276 ,  -66.235176,  -33.117588,
            0.      ], dtype=float32))}

In [5]:
from sklearn.model_selection import train_test_split
train_episodes, test_episodes = train_test_split(dataset, test_size=0.2)

## Setting up an Algorithm

In [6]:
from d3rlpy.algos import CQL

from d3rlpy.preprocessing import MinMaxActionScaler
action_scaler = MinMaxActionScaler(minimum=-0.6, maximum=0.6)
#cql = CQL(action_scaler=action_scaler)

# Using hyperparameters found from the random search
# [0.004343795776698955, 3.716957428808228e-05, 3]
actor_lr_this_iter = 0.004343795776698955
critic_lr_this_iter = 3.716957428808228e-05
n_steps_this_iter = 3

actor_encoder = VectorEncoderFactory(hidden_units=[12, 24, 36, 24, 12],
                                      activation='relu', use_batch_norm=True, dropout_rate=0.2)
critic_encoder = VectorEncoderFactory(hidden_units=[12, 24, 24, 12],
                                      activation='relu', use_batch_norm=True, dropout_rate=0.2)


model = TD3PlusBC(q_func_factory='qr', #qr -> quantile regression q function, but you don't have to use this
            reward_scaler='standard',
            actor_encoder_factory = actor_encoder,
            critic_encoder_factory = critic_encoder,
            action_scaler=action_scaler,
            actor_learning_rate=actor_lr_this_iter, 
            critic_learning_rate=critic_lr_this_iter,
            n_steps=n_steps_this_iter, 
            use_gpu=True) #change it to true if you have one
model.build_with_dataset(dataset)

In [7]:
from d3rlpy.metrics.scorer import td_error_scorer
from d3rlpy.metrics.scorer import average_value_estimation_scorer
from d3rlpy.metrics.scorer import initial_state_value_estimation_scorer

# calculate metrics with test dataset
ave_error_init = average_value_estimation_scorer(model, test_episodes)
print(ave_error_init)

0.010758407734730911


In [8]:
%load_ext tensorboard
%tensorboard --logdir runs

ERROR: Failed to launch TensorBoard (exited with 1).
Contents of stderr:
Traceback (most recent call last):
  File "/home/dasc/anaconda3/envs/jbreeden3.10/bin/tensorboard", line 6, in <module>
    from tensorboard.main import run_main
  File "/home/dasc/anaconda3/envs/jbreeden3.10/lib/python3.10/site-packages/tensorboard/main.py", line 40, in <module>
    from tensorboard import default
  File "/home/dasc/anaconda3/envs/jbreeden3.10/lib/python3.10/site-packages/tensorboard/default.py", line 38, in <module>
    from tensorboard.plugins.audio import audio_plugin
  File "/home/dasc/anaconda3/envs/jbreeden3.10/lib/python3.10/site-packages/tensorboard/plugins/audio/audio_plugin.py", line 25, in <module>
    from tensorboard import plugin_util
  File "/home/dasc/anaconda3/envs/jbreeden3.10/lib/python3.10/site-packages/tensorboard/plugin_util.py", line 21, in <module>
    from tensorboard._vendor import bleach
  File "/home/dasc/anaconda3/envs/jbreeden3.10/lib/python3.10/site-packages/tensorb

In [9]:
model.fit(train_episodes,
        eval_episodes=test_episodes,
        n_epochs=40, 
        tensorboard_dir='runs',
        scorers={
            'td_error': td_error_scorer,
            'init_value': initial_state_value_estimation_scorer,
            'ave_value': average_value_estimation_scorer
        })

2022-04-20 15:28.06 [debug    ] RoundIterator is selected.
2022-04-20 15:28.06 [info     ] Directory is created at d3rlpy_logs/CQL_20220420152806
2022-04-20 15:28.06 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-20 15:28.06 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-20 15:28.06 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220420152806/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 1e-05, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'alpha_learning_rate': 0.0001, 'alpha_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'alpha_threshold': 10.0, 'batch_size': 256, 'conser

Epoch 1/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 15:38.14 [info     ] CQL_20220420152806: epoch=1 step=6928 epoch=1 metrics={'time_sample_batch': 0.0003016272400598328, 'time_algorithm_update': 0.08602012817771551, 'temp_loss': 3.252458093502918, 'temp': 0.7384563273477086, 'alpha_loss': -14.509580498617185, 'alpha': 1.4357602483061664, 'critic_loss': 22.86941206895727, 'actor_loss': 39.18387807384648, 'time_step': 0.08663777600948035, 'td_error': 15.165782817228687, 'init_value': -82.5303726196289, 'ave_value': -83.63484396477433} step=6928
2022-04-20 15:38.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_6928.pt


Epoch 2/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 15:48.05 [info     ] CQL_20220420152806: epoch=2 step=13856 epoch=2 metrics={'time_sample_batch': 0.0003192173897807097, 'time_algorithm_update': 0.0833366665705247, 'temp_loss': 0.7667452684515177, 'temp': 0.42597445298425046, 'alpha_loss': -13.523500026709144, 'alpha': 2.8465004737637334, 'critic_loss': 54.11067065066609, 'actor_loss': 117.59847816749203, 'time_step': 0.08401468031400897, 'td_error': 35.829020571729394, 'init_value': -158.5977325439453, 'ave_value': -160.53637813901472} step=13856
2022-04-20 15:48.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_13856.pt


Epoch 3/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 15:58.11 [info     ] CQL_20220420152806: epoch=3 step=20784 epoch=3 metrics={'time_sample_batch': 0.0003262850483900956, 'time_algorithm_update': 0.08564589962397656, 'temp_loss': -0.057509789080580846, 'temp': 0.4073021462440973, 'alpha_loss': -5.95373821547271, 'alpha': 5.0684919406167355, 'critic_loss': 101.12514720516447, 'actor_loss': 191.8296038184926, 'time_step': 0.08634203049519872, 'td_error': 68.4345160614772, 'init_value': -231.90155029296875, 'ave_value': -234.53910332629917} step=20784
2022-04-20 15:58.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_20784.pt


Epoch 4/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 16:08.15 [info     ] CQL_20220420152806: epoch=4 step=27712 epoch=4 metrics={'time_sample_batch': 0.00032626443454354795, 'time_algorithm_update': 0.08523050711143926, 'temp_loss': -0.00042094514759281095, 'temp': 0.46756788538489275, 'alpha_loss': 2.501007389986735, 'alpha': 5.282934047952811, 'critic_loss': 155.48460545991364, 'actor_loss': 260.64224418701804, 'time_step': 0.08592843241972406, 'td_error': 94.98451616456474, 'init_value': -291.9490966796875, 'ave_value': -295.00815580158405} step=27712
2022-04-20 16:08.15 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_27712.pt


Epoch 5/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 16:18.22 [info     ] CQL_20220420152806: epoch=5 step=34640 epoch=5 metrics={'time_sample_batch': 0.0003270921544720339, 'time_algorithm_update': 0.08573519330101806, 'temp_loss': 0.015189210176707178, 'temp': 0.4439238531829028, 'alpha_loss': 1.6911755942183146, 'alpha': 3.949912402942329, 'critic_loss': 204.64342612289667, 'actor_loss': 307.29209841316344, 'time_step': 0.08643313333839522, 'td_error': 115.89553441675329, 'init_value': -327.8329772949219, 'ave_value': -331.1182408740069} step=34640
2022-04-20 16:18.22 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_34640.pt


Epoch 6/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 16:28.28 [info     ] CQL_20220420152806: epoch=6 step=41568 epoch=6 metrics={'time_sample_batch': 0.00032412603187781283, 'time_algorithm_update': 0.08558211074690346, 'temp_loss': 0.00511643863653449, 'temp': 0.423378706171971, 'alpha_loss': 0.4297883011879837, 'alpha': 3.452151598335689, 'critic_loss': 240.12584175461984, 'actor_loss': 335.4156111966104, 'time_step': 0.08627687731873906, 'td_error': 128.19673022962073, 'init_value': -349.8582458496094, 'ave_value': -353.29743726311074} step=41568
2022-04-20 16:28.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_41568.pt


Epoch 7/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 16:38.28 [info     ] CQL_20220420152806: epoch=7 step=48496 epoch=7 metrics={'time_sample_batch': 0.00032505991827808404, 'time_algorithm_update': 0.08473807814214852, 'temp_loss': 0.0013214605927290438, 'temp': 0.4147958146670605, 'alpha_loss': 0.06020161391848346, 'alpha': 3.3255196431217238, 'critic_loss': 264.8711132089205, 'actor_loss': 353.2638330173272, 'time_step': 0.08543689290866015, 'td_error': 138.7597448285057, 'init_value': -363.4537353515625, 'ave_value': -366.9697619120967} step=48496
2022-04-20 16:38.28 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_48496.pt


Epoch 8/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 16:48.23 [info     ] CQL_20220420152806: epoch=8 step=55424 epoch=8 metrics={'time_sample_batch': 0.00032150783246989614, 'time_algorithm_update': 0.0839976051663141, 'temp_loss': 0.0007352947157847439, 'temp': 0.41351145846601445, 'alpha_loss': 0.11684504026181865, 'alpha': 3.2958778453341258, 'critic_loss': 281.12860491830537, 'actor_loss': 364.5360411584515, 'time_step': 0.0846895869761086, 'td_error': 145.01771960106527, 'init_value': -372.8621826171875, 'ave_value': -376.6000759439554} step=55424
2022-04-20 16:48.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_55424.pt


Epoch 9/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 16:58.16 [info     ] CQL_20220420152806: epoch=9 step=62352 epoch=9 metrics={'time_sample_batch': 0.0003209845023122175, 'time_algorithm_update': 0.08371424475387944, 'temp_loss': 0.00015511747296925369, 'temp': 0.41083254891406856, 'alpha_loss': 0.012657348054780651, 'alpha': 3.2352171536130663, 'critic_loss': 292.2612079660006, 'actor_loss': 372.4126708799367, 'time_step': 0.08440496492055492, 'td_error': 146.62462569359582, 'init_value': -378.7864074707031, 'ave_value': -382.48896892472} step=62352
2022-04-20 16:58.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_62352.pt


Epoch 10/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 17:08.05 [info     ] CQL_20220420152806: epoch=10 step=69280 epoch=10 metrics={'time_sample_batch': 0.0003229959181770166, 'time_algorithm_update': 0.08317748445148557, 'temp_loss': 6.839575902795458e-05, 'temp': 0.41042351828109447, 'alpha_loss': 0.08721634723995975, 'alpha': 3.220432578691434, 'critic_loss': 300.13835881777777, 'actor_loss': 377.65393913480483, 'time_step': 0.08387450584776132, 'td_error': 152.63290619030133, 'init_value': -384.4639892578125, 'ave_value': -388.2389237983635} step=69280
2022-04-20 17:08.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_69280.pt


Epoch 11/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 17:17.39 [info     ] CQL_20220420152806: epoch=11 step=76208 epoch=11 metrics={'time_sample_batch': 0.00031692742888426393, 'time_algorithm_update': 0.0809102267646349, 'temp_loss': 0.002213452617049284, 'temp': 0.4078481106203712, 'alpha_loss': 0.02607878880981543, 'alpha': 3.1662019023168444, 'critic_loss': 305.7491776228777, 'actor_loss': 381.1900491736502, 'time_step': 0.08159472337358267, 'td_error': 152.25271129726787, 'init_value': -387.3818054199219, 'ave_value': -391.11520324198403} step=76208
2022-04-20 17:17.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_76208.pt


Epoch 12/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 17:27.12 [info     ] CQL_20220420152806: epoch=12 step=83136 epoch=12 metrics={'time_sample_batch': 0.00031753001394778423, 'time_algorithm_update': 0.08094240683743915, 'temp_loss': 0.0016971251329578627, 'temp': 0.406557518137107, 'alpha_loss': 0.21253741092029294, 'alpha': 3.089473470907839, 'critic_loss': 307.88399511541286, 'actor_loss': 383.0788144030142, 'time_step': 0.08162913834525587, 'td_error': 152.05088942319682, 'init_value': -387.0000915527344, 'ave_value': -390.8253248286204} step=83136
2022-04-20 17:27.12 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_83136.pt


Epoch 13/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 17:36.47 [info     ] CQL_20220420152806: epoch=13 step=90064 epoch=13 metrics={'time_sample_batch': 0.00031687064616839827, 'time_algorithm_update': 0.08108410910288126, 'temp_loss': -1.7868005368142338e-05, 'temp': 0.4015838572579085, 'alpha_loss': 0.1453173688519652, 'alpha': 3.012710156706409, 'critic_loss': 307.6913929973959, 'actor_loss': 382.84055553903204, 'time_step': 0.08176694600466493, 'td_error': 154.5457456953617, 'init_value': -386.3783874511719, 'ave_value': -390.3127131709193} step=90064
2022-04-20 17:36.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_90064.pt


Epoch 14/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 17:46.23 [info     ] CQL_20220420152806: epoch=14 step=96992 epoch=14 metrics={'time_sample_batch': 0.0003185291488506502, 'time_algorithm_update': 0.08127157995799948, 'temp_loss': 0.0019392069349947008, 'temp': 0.39898135409395774, 'alpha_loss': 0.027307488860814463, 'alpha': 2.952804238670968, 'critic_loss': 306.8745130155984, 'actor_loss': 382.2925036476611, 'time_step': 0.08196215149161447, 'td_error': 153.37744306939155, 'init_value': -385.77471923828125, 'ave_value': -389.8894928476832} step=96992
2022-04-20 17:46.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_96992.pt


Epoch 15/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 17:55.57 [info     ] CQL_20220420152806: epoch=15 step=103920 epoch=15 metrics={'time_sample_batch': 0.000317477636194119, 'time_algorithm_update': 0.08105248880441415, 'temp_loss': 0.00016652110493021392, 'temp': 0.3988769215469272, 'alpha_loss': 0.09581157639647557, 'alpha': 2.939034146779404, 'critic_loss': 305.2395613207278, 'actor_loss': 381.084674606147, 'time_step': 0.08173817554467269, 'td_error': 150.83230985566144, 'init_value': -384.3565368652344, 'ave_value': -388.6183944038185} step=103920
2022-04-20 17:55.57 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_103920.pt


Epoch 16/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 18:06.02 [info     ] CQL_20220420152806: epoch=16 step=110848 epoch=16 metrics={'time_sample_batch': 0.0003256621936176996, 'time_algorithm_update': 0.08544689785112684, 'temp_loss': 0.001056482785343199, 'temp': 0.3967659787573867, 'alpha_loss': 0.14674160829218272, 'alpha': 2.8599903667496203, 'critic_loss': 303.50363338227635, 'actor_loss': 379.72472162665173, 'time_step': 0.08615594816675913, 'td_error': 150.8201905219228, 'init_value': -384.1368713378906, 'ave_value': -388.51655735462634} step=110848
2022-04-20 18:06.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_110848.pt


Epoch 17/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 18:15.47 [info     ] CQL_20220420152806: epoch=17 step=117776 epoch=17 metrics={'time_sample_batch': 0.0003195748455805261, 'time_algorithm_update': 0.08243331378519397, 'temp_loss': 0.0027306363544175208, 'temp': 0.3935903649751364, 'alpha_loss': 0.03593843239706048, 'alpha': 2.7903135732401325, 'critic_loss': 302.20853224596306, 'actor_loss': 378.74756396339893, 'time_step': 0.08312637167907477, 'td_error': 149.19271373018415, 'init_value': -382.38055419921875, 'ave_value': -386.93150880487116} step=117776
2022-04-20 18:15.47 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_117776.pt


Epoch 18/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 18:25.45 [info     ] CQL_20220420152806: epoch=18 step=124704 epoch=18 metrics={'time_sample_batch': 0.00032719732294457065, 'time_algorithm_update': 0.08444803736364043, 'temp_loss': -0.0008835652656261712, 'temp': 0.3925906618493534, 'alpha_loss': 0.05658290624711723, 'alpha': 2.7636650936983473, 'critic_loss': 300.37127405005845, 'actor_loss': 377.3480703560915, 'time_step': 0.08516735278981938, 'td_error': 150.88014132910735, 'init_value': -379.435546875, 'ave_value': -384.11794202515887} step=124704
2022-04-20 18:25.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_124704.pt


Epoch 19/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 18:35.51 [info     ] CQL_20220420152806: epoch=19 step=131632 epoch=19 metrics={'time_sample_batch': 0.0003276275982757639, 'time_algorithm_update': 0.0855669965644907, 'temp_loss': 0.0013646683838502364, 'temp': 0.3924759008876515, 'alpha_loss': 0.07501911178098776, 'alpha': 2.7217066250483377, 'critic_loss': 298.73061083749315, 'actor_loss': 376.28123776741995, 'time_step': 0.08627895367338255, 'td_error': 148.8274143141246, 'init_value': -378.4768371582031, 'ave_value': -383.3459352294303} step=131632
2022-04-20 18:35.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_131632.pt


Epoch 20/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 18:46.03 [info     ] CQL_20220420152806: epoch=20 step=138560 epoch=20 metrics={'time_sample_batch': 0.00032445846886888113, 'time_algorithm_update': 0.08644907933597476, 'temp_loss': -0.0010471031899920472, 'temp': 0.39129041940163106, 'alpha_loss': 0.057392833633631825, 'alpha': 2.682205930111315, 'critic_loss': 298.2111357592958, 'actor_loss': 375.91105251884903, 'time_step': 0.08715547617808776, 'td_error': 149.73068521695063, 'init_value': -379.6229248046875, 'ave_value': -384.7416417180654} step=138560
2022-04-20 18:46.03 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_138560.pt


Epoch 21/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 18:55.59 [info     ] CQL_20220420152806: epoch=21 step=145488 epoch=21 metrics={'time_sample_batch': 0.0003257913140722015, 'time_algorithm_update': 0.0841722529699841, 'temp_loss': -0.00021026998865321348, 'temp': 0.3889710552706523, 'alpha_loss': 0.0004072860805993936, 'alpha': 2.6733519726069366, 'critic_loss': 297.29790157238, 'actor_loss': 375.21719411008627, 'time_step': 0.08486979397973067, 'td_error': 148.32996202443468, 'init_value': -378.78863525390625, 'ave_value': -384.0768215835846} step=145488
2022-04-20 18:55.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_145488.pt


Epoch 22/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 19:05.49 [info     ] CQL_20220420152806: epoch=22 step=152416 epoch=22 metrics={'time_sample_batch': 0.0003204657147718082, 'time_algorithm_update': 0.08321831129302054, 'temp_loss': 0.00015466173118148128, 'temp': 0.3892969376005353, 'alpha_loss': 0.0854935034511043, 'alpha': 2.654599910795551, 'critic_loss': 296.1565119817543, 'actor_loss': 374.6401719936873, 'time_step': 0.08390759458167449, 'td_error': 146.96622365148966, 'init_value': -377.44390869140625, 'ave_value': -382.8588482812761} step=152416
2022-04-20 19:05.49 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_152416.pt


Epoch 23/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 19:15.24 [info     ] CQL_20220420152806: epoch=23 step=159344 epoch=23 metrics={'time_sample_batch': 0.0003176186638120691, 'time_algorithm_update': 0.08117018743282653, 'temp_loss': 0.002589611458448547, 'temp': 0.38731832411269257, 'alpha_loss': 0.058667933689445145, 'alpha': 2.622918041349834, 'critic_loss': 295.1302064994466, 'actor_loss': 374.16219451851435, 'time_step': 0.08184939416396425, 'td_error': 145.6551757114064, 'init_value': -375.1539001464844, 'ave_value': -380.5634380744728} step=159344
2022-04-20 19:15.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_159344.pt


Epoch 24/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 19:25.05 [info     ] CQL_20220420152806: epoch=24 step=166272 epoch=24 metrics={'time_sample_batch': 0.0003248639318739294, 'time_algorithm_update': 0.08187092926705958, 'temp_loss': -0.0002172960631652062, 'temp': 0.38623581871424756, 'alpha_loss': -0.004385814855173337, 'alpha': 2.6046990920503474, 'critic_loss': 293.90692396534234, 'actor_loss': 373.5542466029284, 'time_step': 0.08256775126996822, 'td_error': 147.0839823180111, 'init_value': -377.20843505859375, 'ave_value': -382.7907013117472} step=166272
2022-04-20 19:25.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_166272.pt


Epoch 25/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 19:34.24 [info     ] CQL_20220420152806: epoch=25 step=173200 epoch=25 metrics={'time_sample_batch': 0.0003214794066981961, 'time_algorithm_update': 0.07883937890893042, 'temp_loss': -0.0005968416706679333, 'temp': 0.38497941391693113, 'alpha_loss': -0.0003559062534315575, 'alpha': 2.603527492449961, 'critic_loss': 293.21378193640817, 'actor_loss': 373.33657764232186, 'time_step': 0.079532820722798, 'td_error': 146.38596107823147, 'init_value': -378.23175048828125, 'ave_value': -383.9554917474008} step=173200
2022-04-20 19:34.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_173200.pt


Epoch 26/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 19:43.54 [info     ] CQL_20220420152806: epoch=26 step=180128 epoch=26 metrics={'time_sample_batch': 0.0003175520387587867, 'time_algorithm_update': 0.08051156870349725, 'temp_loss': 0.00017726631004602044, 'temp': 0.38638260394472446, 'alpha_loss': 0.05526868406492721, 'alpha': 2.60668095024023, 'critic_loss': 294.1054653386321, 'actor_loss': 374.6586740859386, 'time_step': 0.08119514064893436, 'td_error': 146.7227014695116, 'init_value': -376.8963317871094, 'ave_value': -382.7452992227228} step=180128
2022-04-20 19:43.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_180128.pt


Epoch 27/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 19:53.23 [info     ] CQL_20220420152806: epoch=27 step=187056 epoch=27 metrics={'time_sample_batch': 0.00031690918958765253, 'time_algorithm_update': 0.08018966838201254, 'temp_loss': -0.0020944280410077915, 'temp': 0.38445683615174636, 'alpha_loss': 0.015730928034096273, 'alpha': 2.5665628689128313, 'critic_loss': 294.21361828084355, 'actor_loss': 374.95952437050624, 'time_step': 0.08086936199912849, 'td_error': 146.64652066498127, 'init_value': -378.8770751953125, 'ave_value': -384.718354118897} step=187056
2022-04-20 19:53.23 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_187056.pt


Epoch 28/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 20:02.59 [info     ] CQL_20220420152806: epoch=28 step=193984 epoch=28 metrics={'time_sample_batch': 0.00031774196833991565, 'time_algorithm_update': 0.08133410798347024, 'temp_loss': 0.002530748452671307, 'temp': 0.38398735777541615, 'alpha_loss': 0.00749153480704587, 'alpha': 2.543444472048905, 'critic_loss': 294.54922442974447, 'actor_loss': 375.5807452741451, 'time_step': 0.0820167232605932, 'td_error': 146.07839369954843, 'init_value': -379.4012451171875, 'ave_value': -385.23427167242926} step=193984
2022-04-20 20:02.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_193984.pt


Epoch 29/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 20:12.44 [info     ] CQL_20220420152806: epoch=29 step=200912 epoch=29 metrics={'time_sample_batch': 0.00032250497137426503, 'time_algorithm_update': 0.08250118179491982, 'temp_loss': 0.001047623371827743, 'temp': 0.3834465094788245, 'alpha_loss': 0.054970351315512, 'alpha': 2.5291426366251004, 'critic_loss': 293.0302240369502, 'actor_loss': 374.88218170941286, 'time_step': 0.08319476181447644, 'td_error': 145.4932061330668, 'init_value': -378.2215270996094, 'ave_value': -384.144532281068} step=200912
2022-04-20 20:12.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_200912.pt


Epoch 30/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 20:22.25 [info     ] CQL_20220420152806: epoch=30 step=207840 epoch=30 metrics={'time_sample_batch': 0.00031815101037675184, 'time_algorithm_update': 0.08197109353046769, 'temp_loss': 0.0009931626405156116, 'temp': 0.3832080283785114, 'alpha_loss': 0.046813488731452774, 'alpha': 2.4940114706135237, 'critic_loss': 292.8615829250141, 'actor_loss': 374.93854965844275, 'time_step': 0.08265181384791404, 'td_error': 145.52081507656558, 'init_value': -377.70062255859375, 'ave_value': -383.6865444654173} step=207840
2022-04-20 20:22.25 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_207840.pt


Epoch 31/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 20:32.07 [info     ] CQL_20220420152806: epoch=31 step=214768 epoch=31 metrics={'time_sample_batch': 0.00032003960350644506, 'time_algorithm_update': 0.08209493360535927, 'temp_loss': -0.0011510241725084136, 'temp': 0.3811175966415959, 'alpha_loss': 0.08419124258721418, 'alpha': 2.4599037891722717, 'critic_loss': 292.43728886231537, 'actor_loss': 374.57472699013374, 'time_step': 0.08278291264665044, 'td_error': 145.1542326859634, 'init_value': -375.93475341796875, 'ave_value': -382.01751314647777} step=214768
2022-04-20 20:32.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_214768.pt


Epoch 32/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 20:41.50 [info     ] CQL_20220420152806: epoch=32 step=221696 epoch=32 metrics={'time_sample_batch': 0.0003199161957372969, 'time_algorithm_update': 0.0823575634741618, 'temp_loss': 0.0007854604805046994, 'temp': 0.38016706244809245, 'alpha_loss': 0.009156122993417466, 'alpha': 2.431178231712852, 'critic_loss': 291.6540359432097, 'actor_loss': 373.9857983357912, 'time_step': 0.08304536965510036, 'td_error': 144.84631037459152, 'init_value': -376.95867919921875, 'ave_value': -383.1214693957492} step=221696
2022-04-20 20:41.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_221696.pt


Epoch 33/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 20:51.45 [info     ] CQL_20220420152806: epoch=33 step=228624 epoch=33 metrics={'time_sample_batch': 0.00032277329551705586, 'time_algorithm_update': 0.08376394876431648, 'temp_loss': 0.0010524322233091841, 'temp': 0.3800228905818892, 'alpha_loss': 0.054110325275702455, 'alpha': 2.42970007176212, 'critic_loss': 291.0408859438764, 'actor_loss': 373.4200504029871, 'time_step': 0.08445659868298722, 'td_error': 143.89712618662423, 'init_value': -375.4156188964844, 'ave_value': -381.6717528769003} step=228624
2022-04-20 20:51.45 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_228624.pt


Epoch 34/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 21:02.43 [info     ] CQL_20220420152806: epoch=34 step=235552 epoch=34 metrics={'time_sample_batch': 0.00034252191112994047, 'time_algorithm_update': 0.09271713337639463, 'temp_loss': 0.0007342141222178626, 'temp': 0.3784887109201307, 'alpha_loss': -0.01790269076953307, 'alpha': 2.3921851133433005, 'critic_loss': 290.84206654010967, 'actor_loss': 373.08463649881907, 'time_step': 0.09344434060207966, 'td_error': 144.0343591255528, 'init_value': -375.17578125, 'ave_value': -381.4450501621349} step=235552
2022-04-20 21:02.43 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_235552.pt


Epoch 35/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 21:13.51 [info     ] CQL_20220420152806: epoch=35 step=242480 epoch=35 metrics={'time_sample_batch': 0.00034576798972026307, 'time_algorithm_update': 0.09414465615297851, 'temp_loss': -7.066304706586589e-05, 'temp': 0.3788032463512988, 'alpha_loss': 0.057575846577368556, 'alpha': 2.385815505445967, 'critic_loss': 289.135928805169, 'actor_loss': 372.4342362181419, 'time_step': 0.09487503281779433, 'td_error': 143.33815273599905, 'init_value': -374.56195068359375, 'ave_value': -380.93159128470893} step=242480
2022-04-20 21:13.51 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_242480.pt


Epoch 36/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 21:24.54 [info     ] CQL_20220420152806: epoch=36 step=249408 epoch=36 metrics={'time_sample_batch': 0.0003427601232264389, 'time_algorithm_update': 0.09343358740107292, 'temp_loss': 0.0018314840040179335, 'temp': 0.3772144189779808, 'alpha_loss': 0.018561690491502855, 'alpha': 2.3588697090572905, 'critic_loss': 289.0404808865408, 'actor_loss': 372.42191122200416, 'time_step': 0.09415620988703766, 'td_error': 143.32437883181555, 'init_value': -374.6293029785156, 'ave_value': -381.07332828452996} step=249408
2022-04-20 21:24.54 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_249408.pt


Epoch 37/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 21:36.42 [info     ] CQL_20220420152806: epoch=37 step=256336 epoch=37 metrics={'time_sample_batch': 0.0003563220382562686, 'time_algorithm_update': 0.09981767416826297, 'temp_loss': -0.0017683214372761686, 'temp': 0.3767741588277712, 'alpha_loss': 0.06674075923250623, 'alpha': 2.349365295769161, 'critic_loss': 288.24296009354725, 'actor_loss': 372.0226013324553, 'time_step': 0.10057004845720516, 'td_error': 142.27106001372937, 'init_value': -374.017822265625, 'ave_value': -380.35407568569013} step=256336
2022-04-20 21:36.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_256336.pt


Epoch 38/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 21:47.32 [info     ] CQL_20220420152806: epoch=38 step=263264 epoch=38 metrics={'time_sample_batch': 0.0003425639647534483, 'time_algorithm_update': 0.0915706607816401, 'temp_loss': 0.0009511170512109384, 'temp': 0.37675226345555063, 'alpha_loss': -0.022720968328562137, 'alpha': 2.3265422952918478, 'critic_loss': 287.028775191954, 'actor_loss': 371.2436026293466, 'time_step': 0.0922931827105771, 'td_error': 142.58112409646523, 'init_value': -373.369873046875, 'ave_value': -379.8474824890274} step=263264
2022-04-20 21:47.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_263264.pt


Epoch 39/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 21:58.32 [info     ] CQL_20220420152806: epoch=39 step=270192 epoch=39 metrics={'time_sample_batch': 0.0003448767075637747, 'time_algorithm_update': 0.0929982915830392, 'temp_loss': -0.00018753714137737516, 'temp': 0.3766378736334227, 'alpha_loss': -0.011947945028642411, 'alpha': 2.314976746160494, 'critic_loss': 285.85863863396315, 'actor_loss': 371.0312901600404, 'time_step': 0.09372722927059367, 'td_error': 141.4734840362178, 'init_value': -373.0046081542969, 'ave_value': -379.5004974193401} step=270192
2022-04-20 21:58.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_270192.pt


Epoch 40/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-20 22:09.35 [info     ] CQL_20220420152806: epoch=40 step=277120 epoch=40 metrics={'time_sample_batch': 0.0003421332420432815, 'time_algorithm_update': 0.09344303563202777, 'temp_loss': 0.0029535965536136017, 'temp': 0.375632077773848, 'alpha_loss': 0.02483469982435978, 'alpha': 2.30095637584531, 'critic_loss': 284.19343940758813, 'actor_loss': 370.460908828101, 'time_step': 0.09416803579958151, 'td_error': 141.2749599119681, 'init_value': -372.5274963378906, 'ave_value': -378.97610149782196} step=277120
2022-04-20 22:09.35 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220420152806/model_277120.pt


[(1,
  {'time_sample_batch': 0.0003016272400598328,
   'time_algorithm_update': 0.08602012817771551,
   'temp_loss': 3.252458093502918,
   'temp': 0.7384563273477086,
   'alpha_loss': -14.509580498617185,
   'alpha': 1.4357602483061664,
   'critic_loss': 22.86941206895727,
   'actor_loss': 39.18387807384648,
   'time_step': 0.08663777600948035,
   'td_error': 15.165782817228687,
   'init_value': -82.5303726196289,
   'ave_value': -83.63484396477433}),
 (2,
  {'time_sample_batch': 0.0003192173897807097,
   'time_algorithm_update': 0.0833366665705247,
   'temp_loss': 0.7667452684515177,
   'temp': 0.42597445298425046,
   'alpha_loss': -13.523500026709144,
   'alpha': 2.8465004737637334,
   'critic_loss': 54.11067065066609,
   'actor_loss': 117.59847816749203,
   'time_step': 0.08401468031400897,
   'td_error': 35.829020571729394,
   'init_value': -158.5977325439453,
   'ave_value': -160.53637813901472}),
 (3,
  {'time_sample_batch': 0.0003262850483900956,
   'time_algorithm_update': 0.08

In [15]:
model.save_model('cqlPurestoch2000_Ep40model_CPUOnly.pt')
model.save_policy('cqlPurestoch2000_Ep40_CPUOnly.pt')

## Off-Policy Evaluation

We do get some metrics on a test set of initial state value and average value. However, these estimates (using the critic's Q-function) of model performance are biased. They're useful for validation during training, but not much else. Instead, we fit a Q-function to the data (or a separate dataset, as I've done here) separately and evaluate the model's performance on it.

Feel free to change the chunks and number of steps.

In [11]:
# from d3rlpy.ope import FQE
# # metrics to evaluate with
# from d3rlpy.metrics.scorer import soft_opc_scorer


# ope_dataset = get_dataset([i+2000 for i in range(100)]) #change if you'd prefer different chunks
# ope_train_episodes, ope_test_episodes = train_test_split(ope_dataset, test_size=0.2)

# fqe = FQE(algo=model, action_scaler = action_scaler, use_gpu=False) #change this if you have one!
# fqe.fit(ope_train_episodes, eval_episodes=ope_test_episodes,
#         tensorboard_dir='runs',
#         n_epochs=100, n_steps_per_epoch=10000, #change if overfitting/underfitting
#         scorers={
#            'init_value': initial_state_value_estimation_scorer,
#             'ave_value': average_value_estimation_scorer,
#            'soft_opc': soft_opc_scorer(return_threshold=0)
#         })

In [12]:
# from d3rlpy.ope import FQE
# # metrics to evaluate with
# from d3rlpy.metrics.scorer import soft_opc_scorer


# ope_dataset = get_dataset([i*2 for i in range(100)], path="collected_data/rl_stochastic.txt") #change if you'd prefer different chunks
# ope_train_episodes, ope_test_episodes = train_test_split(ope_dataset, test_size=0.2)

# fqe = FQE(algo=model, action_scaler = action_scaler, use_gpu=False) #change this if you have one!
# fqe.fit(ope_train_episodes, eval_episodes=ope_test_episodes,
#         tensorboard_dir='runs',
#         n_epochs=100, n_steps_per_epoch=10000, #change if overfitting/underfitting
#         scorers={
#            'init_value': initial_state_value_estimation_scorer,
#             'ave_value': average_value_estimation_scorer,
#            'soft_opc': soft_opc_scorer(return_threshold=0)
#         })

In [13]:
# from d3rlpy.torch_utility import to_cpu
# to_cpu(model)
# model.save_policy("cqlStochpid2000Ep40CPU.pt")
# model.save_model("cqlStochpid2000Ep40modelCPU.pt")

In [14]:
# for key in dir(model):
#     module = getattr(model, key)
#     if isinstance(module, (torch.nn.Module, torch.nn.Parameter)):
#         print(yes)
#         print(key)
# dir(model)
# type(model)
# model.cpu()
# from d3rlpy.algos.torch.base import TorchImplBase
# new_model = TorchImplBase()
# from d3rlpy.torch_utility import _get_attributes
# model._device = "cpu:0"
# print(model._device)


# def my_get_state_dict(impl: Any) -> Dict[str, Any]:
#     rets = {}
#     for key in _get_attributes(impl):
#         obj = getattr(impl, key)
#         if isinstance(obj, (torch.nn.Module, torch.optim.Optimizer)):
#             if isinstance(obj, (torch.nn.Module, torch.nn.Parameter)):
#                 obj.cpu()
#             rets[key] = obj.state_dict()
#     return rets

# torch.save(my_get_state_dict(model), "my_test_model.pt")

# for key in dir(model):
#     obj = getattr(model, key)
#     if isinstance(obj, (torch.nn.Module, torch.nn.Parameter)):
#         obj.cpu()
#         print("convert to cpu")
# model.save_policy("cqlStochpid2000Ep40modelCPU.pt")

# import trace
# tracer = trace.Trace()
# tracer.run('model.save_policy("cqlStochpid2000Ep40modelCPU.pt")')