# Sample Workflow for d3rlpy Experiments

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools
import math
import subprocess
import os
import d3rlpy
plt.style.use('matplotlibrc')

from Python.data_sampler import *

## Building an MDPDataset

We first read in a large batch of samples from the file. As `d3rlpy` wants it in the form (observations, actions, rewards, terminal flags), we go ahead and do that. Here's a helper function to get a dataset from a list of chunks of your choosing.

In [2]:
def get_dataset(chunks : list, batch_size=30000, 
                path="collected_data/rl_deterministic.txt") -> d3rlpy.dataset.MDPDataset :
    random.seed(0)
    samples = DataSampler(path_to_data="collected_data/rl_deterministic_coarse.txt")
    samples.setting("coarse")
    states = []
    actions = []
    rewards = []
    next_states = []
    for chunk in chunks:
        samples.use_chunk(chunk)
        samples.read_chunk()
        [statesChunk, actionsChunk, rewardsChunk, nextStatesChunk] = samples.get_batch(batch_size)
        states.append(statesChunk)
        actions.append(actionsChunk)
        rewards.append(rewardsChunk)
        next_states.append(nextStatesChunk)
    states = torch.cat(states)
    actions = torch.cat(actions)
    rewards = torch.cat(rewards)
    next_states = torch.cat(next_states)
    terminals = np.zeros(len(states))
    terminals[::1111] = 1 #episode length 100, change if necessary
    print(states.shape)
    dataset = d3rlpy.dataset.MDPDataset(states.numpy(), 
                                        actions.numpy(), 
                                        rewards.numpy(), terminals)
    return dataset

We can build the dataset from there, just like this, and split into train and test sets.

In [3]:
dataset = get_dataset([i for i in range(2000)])

start
[ 0.00000000e+00  7.95731469e+08  1.89410892e-01 -5.99995308e-04
 -9.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.49650472e-01  5.60452370e-01 -6.00000000e-01]
Read chunk # 1 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.86510892e-01 -2.39999531e-03
 -2.92000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 2 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.93110892e-01  4.74000047e-02
 -2.36000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 3 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -9.10891077e-02 -2.85999953e-02
 -2.90000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -5.36786897e-02  6.00000000e-01]
Read chunk # 4 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.55589108e-01  1.14000047e-02
  1.12999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.67705737

start
[ 0.00000000e+00  7.95731469e+08  3.19108923e-02  3.62000047e-02
  9.89998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.30476298e-01  2.11530543e-02 -1.79693603e-01]
Read chunk # 43 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.51089229e-03  2.40000047e-02
  1.50999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.22928456e-01 -9.01716789e-02  5.24122209e-02]
Read chunk # 44 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.02710892e-01 -2.45999953e-02
  1.32999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.66016346e-01  1.86622984e-01 -6.00000000e-01]
Read chunk # 45 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.58789108e-01  4.60000047e-02
 -2.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -5.21367193e-01  6.00000000e-01]
Read chunk # 46 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.46989108e-01 -4.83999953e-02
  8.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.2479

[ 0.00000000e+00  7.95731469e+08 -8.08891077e-02 -4.51999953e-02
 -2.94000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -2.41049546e-02  5.99364821e-01]
Read chunk # 92 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.62289108e-01 -2.45999953e-02
 -1.09000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.04223805e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 93 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.58989108e-01  4.96000047e-02
 -2.58000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 94 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.01891077e-02 -9.39999531e-03
 -2.59000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  8.75229115e-02  2.46667824e-01]
Read chunk # 95 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.74889108e-01  3.68000047e-02
  2.28999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.88140679e-

[ 0.00000000e+00  7.95731469e+08  3.99710892e-01  5.98000047e-02
  1.71999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.38964924e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 141 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.75891077e-02  1.44000047e-02
  1.15999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.64104594e-01 -2.00211558e-01  4.83997914e-01]
Read chunk # 142 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.74110892e-01 -5.43999953e-02
 -5.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  7.47173217e-02  4.94179190e-01 -6.00000000e-01]
Read chunk # 143 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.69789108e-01  1.28000047e-02
 -2.20001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.11746518e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 144 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -9.79891077e-02  5.10000047e-02
  1.25999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.464996

[ 0.00000000e+00  7.95731469e+08  7.51108923e-02 -5.93999953e-02
 -2.11000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.48566820e-01  3.35279288e-01 -6.00000000e-01]
Read chunk # 186 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.06891077e-02 -4.79999531e-03
  1.42999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.15106846e-01 -1.19951500e-01  8.14678575e-02]
Read chunk # 187 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.63010892e-01 -1.43999953e-02
 -1.98000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.91210761e-01  5.59541096e-01 -6.00000000e-01]
Read chunk # 188 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.67789108e-01  3.30000047e-02
 -2.77000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -5.29719302e-01  6.00000000e-01]
Read chunk # 189 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  9.58108923e-02  4.18000047e-02
 -6.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.825622

[ 0.00000000e+00  7.95731469e+08 -7.99891077e-02  4.16000047e-02
  1.87999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.55531747e-01 -3.32365979e-01  6.00000000e-01]
Read chunk # 234 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.60710892e-01 -2.17999953e-02
 -2.09000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.92298456e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 235 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.16910892e-01  2.16000047e-02
 -8.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.15627071e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 236 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.14489108e-01  2.94000047e-02
 -1.76000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -1.89081778e-01  6.00000000e-01]
Read chunk # 237 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.44189108e-01 -4.77999953e-02
  2.92999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.000000

start
[ 0.00000000e+00  7.95731469e+08  3.93710892e-01  2.36000047e-02
 -1.81000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.94986573e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 280 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.68910771e-03  2.52000047e-02
 -1.76000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.87490585e-01  1.01519906e-01  1.22749244e-01]
Read chunk # 281 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.60089108e-01  4.82000047e-02
 -8.20001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.18458642e-01 -3.70350648e-01  6.00000000e-01]
Read chunk # 282 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.93989108e-01  5.04000047e-02
  1.31999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.65499131e-01 -5.97992271e-01  6.00000000e-01]
Read chunk # 283 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.04510892e-01  3.26000047e-02
 -2.61000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.

start
[ 0.00000000e+00  7.95731469e+08  1.85510892e-01  9.80000469e-03
 -1.59000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.79270086e-01  5.93955606e-01 -6.00000000e-01]
Read chunk # 327 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.44289108e-01  1.52000047e-02
 -2.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 328 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.89389108e-01 -4.79999953e-02
  3.49998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.87494041e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 329 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.39310892e-01 -4.69999953e-02
  6.29998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.61072519e-01  3.28604342e-01 -6.00000000e-01]
Read chunk # 330 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.62891077e-02 -2.47999953e-02
  2.66999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.

start
[ 0.00000000e+00  7.95731469e+08  4.02710892e-01  5.26000047e-02
 -1.38000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.91023397e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 373 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.34789108e-01  5.52000047e-02
 -1.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.21911841e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 374 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.36389108e-01 -3.79999953e-02
  1.32999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.19213922e-01 -4.46189973e-01  6.00000000e-01]
Read chunk # 375 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.39689108e-01  5.24000047e-02
 -7.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.10206222e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 376 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.53510892e-01 -5.39999953e-02
  8.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.

[ 0.00000000e+00  7.95731469e+08  4.23110892e-01  3.50000047e-02
 -4.90001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.74658675e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 419 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.51210892e-01 -1.55999953e-02
  5.69998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.19797988e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 420 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  5.68108923e-02  7.20000469e-03
 -1.09000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.30468799e-01  2.21107942e-01 -5.03020054e-01]
Read chunk # 421 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.47110892e-01 -2.71999953e-02
  2.13999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  2.51930555e-01 -6.00000000e-01]
Read chunk # 422 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.90589108e-01  5.14000047e-02
  1.89998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.514342

[ 0.00000000e+00  7.95731469e+08  2.52710892e-01  1.04000047e-02
  2.05999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.29247333e-01  5.36572228e-01 -6.00000000e-01]
Read chunk # 463 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.72789108e-01  2.54000047e-02
 -9.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.52869650e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 464 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.29489108e-01  1.60000469e-03
 -1.60000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.49485933e-01 -2.39093263e-01  6.00000000e-01]
Read chunk # 465 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.65410892e-01  3.90000047e-02
 -1.53000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.78575638e-01  5.36891105e-01 -6.00000000e-01]
Read chunk # 466 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.60989108e-01  3.76000047e-02
 -1.61000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.951743

[ 0.00000000e+00  7.95731469e+08  3.60110892e-01 -2.39999531e-03
  1.74999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.94205719e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 508 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.26710892e-01 -1.21999953e-02
 -4.90001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -8.72761685e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 509 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.64189108e-01  5.78000047e-02
  2.44999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.49084741e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 510 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.93589108e-01  3.46000047e-02
 -5.60001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.92457800e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 511 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.35891077e-02  3.64000047e-02
  6.59998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.828591

start
[ 0.00000000e+00  7.95731469e+08  1.68710892e-01 -4.03999953e-02
  2.76999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  2.68495383e-01 -6.00000000e-01]
Read chunk # 556 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.01589108e-01 -2.23999953e-02
  2.49999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -4.29491752e-01  6.00000000e-01]
Read chunk # 557 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.14710892e-01 -1.95999953e-02
  2.57999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 558 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.78689108e-01 -3.39999531e-03
 -1.03000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.71769610e-01 -4.06044017e-01  6.00000000e-01]
Read chunk # 559 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.83910892e-01  1.14000047e-02
  2.27999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.

[ 0.00000000e+00  7.95731469e+08  4.29410892e-01 -5.89999953e-02
 -1.01000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.55001937e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 603 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.86710892e-01  1.64000047e-02
  2.09998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.94614581e-03  4.81123820e-01 -6.00000000e-01]
Read chunk # 604 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.28410892e-01 -3.49999953e-02
  1.95999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  4.78703617e-01 -6.00000000e-01]
Read chunk # 605 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.35789108e-01  4.32000047e-02
 -1.21000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.06622709e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 606 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.48891077e-02 -5.21999953e-02
  9.49998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.703431

Read chunk # 650 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.00410892e-01  4.60000469e-03
 -1.64000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.72474188e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 651 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.08789108e-01 -2.55999953e-02
 -1.88000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.19051241e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 652 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.96610892e-01  2.14000047e-02
 -1.06000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.78533587e-01  5.89175531e-01 -6.00000000e-01]
Read chunk # 653 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.64810892e-01 -5.63999953e-02
 -9.40001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.64349997e-02  4.97278351e-01 -6.00000000e-01]
Read chunk # 654 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.36108923e-02  3.92000047e-02
 -2.60001342e-03  0.00000000e+00 -5.3342

start
[ 0.00000000e+00  7.95731469e+08 -4.41789108e-01  5.12000047e-02
  2.50999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.91904057e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 696 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.79489108e-01 -3.99999953e-02
  1.96999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 697 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.00989108e-01  5.06000047e-02
  2.46999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.83207688e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 698 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.16810892e-01 -4.41999953e-02
 -7.50001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.22464532e-02  3.57994100e-01 -6.00000000e-01]
Read chunk # 699 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.34210892e-01 -1.59999531e-03
  2.86999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.

start
[ 0.00000000e+00  7.95731469e+08 -4.40289108e-01  3.94000047e-02
  2.92999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 742 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.52389108e-01 -3.21999953e-02
  3.69998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.30307715e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 743 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.30989108e-01  3.34000047e-02
 -2.47000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -1.86992813e-01  6.00000000e-01]
Read chunk # 744 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.35710892e-01  5.52000047e-02
  3.09998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.33285044e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 745 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.21710892e-01  5.88000047e-02
  1.43999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.

start
[ 0.00000000e+00  7.95731469e+08  4.12310892e-01 -3.53999953e-02
 -4.00001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.97533842e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 786 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.29189108e-01  5.62000047e-02
 -1.55000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 787 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.95810892e-01 -3.99999953e-02
  2.50999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 788 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.02410892e-01 -3.39999531e-03
  7.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.35064884e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 789 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.74891077e-02  3.56000047e-02
  1.99999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.

start
[ 0.00000000e+00  7.95731469e+08 -3.73689108e-01  1.50000047e-02
 -3.50001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.56485084e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 834 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.26189108e-01  4.54000047e-02
 -6.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.49181395e-01 -2.94163601e-01  6.00000000e-01]
Read chunk # 835 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.87410892e-01 -5.39999953e-02
  7.99986580e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.36534991e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 836 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.63410892e-01 -5.79999953e-02
 -2.63000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.98143321e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 837 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.22310892e-01  9.20000469e-03
 -2.54000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.

[ 0.00000000e+00  7.95731469e+08 -2.10189108e-01  3.52000047e-02
 -1.07000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.36088738e-01 -4.86835402e-01  6.00000000e-01]
Read chunk # 875 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -6.55891077e-02  3.24000047e-02
  1.19999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.03723513e-01 -2.50429128e-01  6.00000000e-01]
Read chunk # 876 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  5.26108923e-02  2.62000047e-02
 -9.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.28939458e-01  1.45543275e-01 -4.03019512e-01]
Read chunk # 877 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.03510892e-01 -4.37999953e-02
  1.22999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.14543864e-01  4.59849734e-01 -6.00000000e-01]
Read chunk # 878 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.65710892e-01 -4.45999953e-02
 -4.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.350750

[ 0.00000000e+00  7.95731469e+08  2.83310892e-01 -1.11999953e-02
  1.25999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.93431658e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 922 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.95789108e-01 -3.59999953e-02
 -2.22000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.71929541e-01 -3.74607636e-01  6.00000000e-01]
Read chunk # 923 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.02189108e-01  1.30000047e-02
 -2.64000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 924 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.51089229e-03 -5.39999531e-03
 -2.75000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  1.84380023e-01 -4.23274508e-02]
Read chunk # 925 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.25108923e-02 -6.19999531e-03
  2.79998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.021622

start
[ 0.00000000e+00  7.95731469e+08 -2.44689108e-01 -1.03999953e-02
 -2.36000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.95005779e-01  6.00000000e-01]
Read chunk # 969 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.04310892e-01 -1.95999953e-02
  2.28999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 970 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.21889108e-01  2.34000047e-02
 -2.66000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.15327633e-01  6.00000000e-01]
Read chunk # 971 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.89107708e-04  5.28000047e-02
  2.38999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.52317059e-01 -1.54826521e-01  1.72914304e-01]
Read chunk # 972 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.95910892e-01 -1.95999953e-02
  2.62999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.

start
[ 0.00000000e+00  7.95731469e+08  7.93108923e-02  4.28000047e-02
  2.36999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.86477568e-01  5.76647710e-02 -5.96020322e-01]
Read chunk # 1013 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -8.20891077e-02  5.54000047e-02
  6.59998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.71434791e-02 -2.59296452e-01  6.00000000e-01]
Read chunk # 1014 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.33589108e-01 -4.99999531e-03
 -2.55000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.53382749e-01  6.00000000e-01]
Read chunk # 1015 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.17489108e-01 -4.81999953e-02
  9.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.68311215e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1016 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.72810892e-01 -5.99999953e-02
 -1.26000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00


start
[ 0.00000000e+00  7.95731469e+08 -3.58689108e-01  1.54000047e-02
  2.10999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.23245428e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1059 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.17910892e-01  9.20000469e-03
 -8.90001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.83016987e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1060 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.23789108e-01 -5.21999953e-02
 -8.90001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.92609298e-02 -2.69766000e-01  6.00000000e-01]
Read chunk # 1061 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.10189108e-01 -1.07999953e-02
  2.40999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1062 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.44789108e-01  3.70000047e-02
  1.54999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00


[ 0.00000000e+00  7.95731469e+08 -3.76891077e-02  4.56000047e-02
  2.96999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -2.90662015e-01  4.93141589e-01]
Read chunk # 1104 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.44110892e-01 -3.69999953e-02
 -5.20001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.87028477e-03  4.15424310e-01 -6.00000000e-01]
Read chunk # 1105 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.70610892e-01  2.90000047e-02
 -1.90001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.67751243e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1106 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.94410892e-01 -3.69999953e-02
  2.33999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1107 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.71710892e-01 -2.39999953e-02
  4.59998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.22

start
[ 0.00000000e+00  7.95731469e+08  2.47108923e-02 -4.55999953e-02
 -1.10000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.23624056e-01  1.36795107e-01 -3.76818918e-01]
Read chunk # 1149 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.36810892e-01  5.60000469e-03
  2.35999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  4.75155937e-01 -6.00000000e-01]
Read chunk # 1150 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.84110892e-01 -3.57999953e-02
 -2.76000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1151 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.98891077e-02  5.78000047e-02
  1.80999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.71831148e-01 -1.95257623e-01  4.60446058e-01]
Read chunk # 1152 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.60410892e-01 -7.79999531e-03
 -1.66000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00


[ 0.00000000e+00  7.95731469e+08 -1.69989108e-01  5.26000047e-02
  1.21999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.29069327e-01 -5.28027902e-01  6.00000000e-01]
Read chunk # 1193 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.96310892e-01 -6.59999531e-03
 -2.37000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1194 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.52710892e-01 -5.61999953e-02
 -6.60001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.03194538e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 1195 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.49210892e-01 -3.05999953e-02
 -1.88000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.99201399e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1196 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.65989108e-01 -2.09999953e-02
  2.46999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00

start
[ 0.00000000e+00  7.95731469e+08  3.83210892e-01 -2.19999531e-03
  2.19999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1239 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.07489108e-01 -5.47999953e-02
 -2.54000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.85920933e-01 -3.84949787e-01  6.00000000e-01]
Read chunk # 1240 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.74089108e-01 -2.81999953e-02
  1.81999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1241 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.44210892e-01  2.42000047e-02
  3.19998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -7.44643768e-03  6.00000000e-01 -6.00000000e-01]
Read chunk # 1242 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.25110892e-01 -1.11999953e-02
 -2.33000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00


[ 0.00000000e+00  7.95731469e+08 -1.05189108e-01 -4.59999531e-03
 -1.10000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.86392759e-01 -2.07004154e-01  6.00000000e-01]
Read chunk # 1288 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.82610892e-01 -9.19999531e-03
 -1.94000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.00776246e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1289 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.88310892e-01 -1.63999953e-02
 -2.19000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.41432159e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1290 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.15210892e-01 -3.13999953e-02
  2.65999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  3.98653766e-01 -6.00000000e-01]
Read chunk # 1291 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.14108923e-02 -2.77999953e-02
 -1.61000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.35

[ 0.00000000e+00  7.95731469e+08  1.77110892e-01  2.96000047e-02
  1.44999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.84079109e-01  3.75799564e-01 -6.00000000e-01]
Read chunk # 1332 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.25610892e-01 -1.35999953e-02
 -9.20001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.00810471e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1333 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.24010892e-01  8.00004692e-04
 -7.60001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.13664617e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1334 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.49889108e-01 -1.91999953e-02
  2.46999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.55391271e-01  6.00000000e-01]
Read chunk # 1335 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.77989108e-01  2.88000047e-02
 -6.40001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.91

[ 0.00000000e+00  7.95731469e+08  1.24610892e-01  1.58000047e-02
  9.49998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.00385303e-01  2.69075050e-01 -6.00000000e-01]
Read chunk # 1375 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.76410892e-01  3.36000047e-02
 -1.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.36160524e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1376 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  8.35108923e-02 -4.97999953e-02
  6.49998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.77727606e-01  1.79632543e-01 -6.00000000e-01]
Read chunk # 1377 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.19310892e-01 -2.93999953e-02
  9.99986580e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.44412944e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1378 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.34210892e-01 -4.35999953e-02
 -1.07000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.23

[ 0.00000000e+00  7.95731469e+08  1.13810892e-01  2.46000047e-02
  2.53999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  1.38017755e-01 -6.00000000e-01]
Read chunk # 1422 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.61510892e-01  3.20000047e-02
  1.51999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.93938297e-01  3.30000480e-01 -6.00000000e-01]
Read chunk # 1423 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.98489108e-01 -2.03999953e-02
 -1.18000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.45823958e-01 -4.48780286e-01  6.00000000e-01]
Read chunk # 1424 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.93889108e-01  8.00004692e-04
  7.59998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.07312668e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1425 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.49789108e-01  3.30000047e-02
 -1.28000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.85

[ 0.00000000e+00  7.95731469e+08  2.75510892e-01 -3.09999953e-02
  2.59998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.95078292e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1470 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.97510892e-01  1.00000469e-03
 -1.32000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.69555505e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1471 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.78810892e-01  9.00000469e-03
 -2.34000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1472 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.52108923e-02  1.54000047e-02
 -1.88000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.81819944e-01  2.14854915e-01 -2.77185771e-01]
Read chunk # 1473 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.03010892e-01 -5.09999953e-02
 -4.00001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  9.16

[ 0.00000000e+00  7.95731469e+08 -9.55891077e-02  5.40000469e-03
  1.14999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.97064722e-01 -3.26606058e-01  6.00000000e-01]
Read chunk # 1516 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.31108923e-02  1.14000047e-02
 -1.93000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.79788007e-01  2.12519397e-01 -2.70688410e-01]
Read chunk # 1517 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.94610892e-01  1.68000047e-02
  1.04999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.24111208e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1518 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.24989108e-01 -4.69999953e-02
 -1.34000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.84536198e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1519 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.55489108e-01  3.44000047e-02
  2.97999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00

[ 0.00000000e+00  7.95731469e+08  3.18710892e-01 -5.45999953e-02
 -2.20001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.55829350e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1563 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.36510892e-01  4.26000047e-02
 -5.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.15908785e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1564 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.36510892e-01 -5.03999953e-02
  8.69998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.41040509e-01  5.70390590e-01 -6.00000000e-01]
Read chunk # 1565 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.39689108e-01 -5.83999953e-02
  8.99998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.81109018e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1566 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.15489108e-01 -2.01999953e-02
  1.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.18

start
[ 0.00000000e+00  7.95731469e+08  1.91810892e-01 -1.09999953e-02
 -2.75000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1611 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.41710892e-01  1.06000047e-02
  6.59998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.40711104e-01  3.33022828e-01 -6.00000000e-01]
Read chunk # 1612 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -9.61891077e-02 -2.23999953e-02
  2.41999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -4.10043966e-01  6.00000000e-01]
Read chunk # 1613 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.51510892e-01 -2.85999953e-02
  2.07999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  5.32107279e-01 -6.00000000e-01]
Read chunk # 1614 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.63589108e-01 -1.67999953e-02
  3.59998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00


[ 0.00000000e+00  7.95731469e+08 -2.14089108e-01  3.46000047e-02
 -1.62000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.86034064e-01 -4.61710504e-01  6.00000000e-01]
Read chunk # 1659 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.11989108e-01  3.08000047e-02
 -2.89000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -3.74302628e-01  6.00000000e-01]
Read chunk # 1660 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.33589108e-01  5.66000047e-02
 -1.84000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -4.99141366e-01  6.00000000e-01]
Read chunk # 1661 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.00089108e-01  2.14000047e-02
 -2.33000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1662 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -9.43891077e-02 -3.01999953e-02
 -8.40001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.12

[ 0.00000000e+00  7.95731469e+08  2.04410892e-01 -5.81999953e-02
  3.39998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.25218130e-01  5.19591108e-01 -6.00000000e-01]
Read chunk # 1704 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -5.92891077e-02 -1.45999953e-02
 -2.79000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  2.33952764e-02  4.98561126e-01]
Read chunk # 1705 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.20410892e-01  1.16000047e-02
 -2.36000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1706 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.51810892e-01  3.12000047e-02
 -2.28000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1707 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -6.76891077e-02  3.72000047e-02
 -1.22000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.85

[ 0.00000000e+00  7.95731469e+08 -2.52489108e-01  4.66000047e-02
 -2.59000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01 -5.00826400e-01  6.00000000e-01]
Read chunk # 1752 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.05189108e-01 -8.79999531e-03
 -3.50001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.19998374e-02 -2.55340721e-01  6.00000000e-01]
Read chunk # 1753 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.98891077e-02  2.78000047e-02
 -2.20001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.71296043e-01 -6.44266473e-02  3.63772924e-01]
Read chunk # 1754 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.33789108e-01 -5.43999953e-02
 -9.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  1.91039992e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1755 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.93889108e-01  5.62000047e-02
  2.59998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.51

[ 0.00000000e+00  7.95731469e+08 -3.90189108e-01  5.70000047e-02
  2.93999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.87970498e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1799 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.53810892e-01 -2.33999953e-02
  2.48999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1800 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.76189108e-01  1.40000047e-02
  1.89998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -2.95739654e-03 -6.00000000e-01  6.00000000e-01]
Read chunk # 1801 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.52489108e-01  5.80000469e-03
 -1.61000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.68929388e-01 -5.63986181e-01  6.00000000e-01]
Read chunk # 1802 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.17110892e-01 -4.99999531e-03
  1.16999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.43

[ 0.00000000e+00  7.95731469e+08  2.36510892e-01  1.20000469e-03
 -5.00013420e-04  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -1.86118990e-02  6.00000000e-01 -6.00000000e-01]
Read chunk # 1844 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -1.05189108e-01  3.20000469e-03
  1.57999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.24890910e-01 -3.79726821e-01  6.00000000e-01]
Read chunk # 1845 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.48589108e-01 -1.65999953e-02
 -2.17000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -5.35098991e-01 -5.17572960e-01  6.00000000e-01]
Read chunk # 1846 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -2.42889108e-01  1.38000047e-02
  2.20999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.57293252e-01 -6.00000000e-01  6.00000000e-01]
Read chunk # 1847 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.31689108e-01 -2.99999531e-03
  1.56999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  4.46

[ 0.00000000e+00  7.95731469e+08  3.47810892e-01  3.16000047e-02
  1.77999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  3.67535557e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1889 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.00510892e-01  1.50000047e-02
 -1.13000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -3.72512902e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1890 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.17289108e-01  2.20000047e-02
  4.09998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  2.62137793e-02 -6.00000000e-01  6.00000000e-01]
Read chunk # 1891 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.06210892e-01  4.76000047e-02
  9.09998658e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.30619591e-02  4.87619279e-01 -6.00000000e-01]
Read chunk # 1892 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.70710892e-01 -3.69999953e-02
  1.61999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.95

start
[ 0.00000000e+00  7.95731469e+08 -1.63389108e-01 -4.49999953e-02
  2.01999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01 -5.62119046e-01  6.00000000e-01]
Read chunk # 1934 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.94891077e-02 -1.45999953e-02
  1.69999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  5.28791396e-01 -2.13576057e-01  3.15768357e-01]
Read chunk # 1935 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  3.26510892e-01 -4.79999531e-03
  2.78999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1936 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.48010892e-01 -3.69999953e-02
  2.25999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00000000e-01  2.46578685e-01 -6.00000000e-01]
Read chunk # 1937 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -4.22889108e-01 -3.85999953e-02
 -2.10001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00


[ 0.00000000e+00  7.95731469e+08 -6.58891077e-02  8.60000469e-03
 -2.91000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  1.36612664e-02  6.00000000e-01]
Read chunk # 1980 out of 4999
start
[ 0.00000000e+00  7.95731469e+08 -3.95889108e-01  5.88000047e-02
  1.06999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.29117118e-02 -6.00000000e-01  6.00000000e-01]
Read chunk # 1981 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  4.15310892e-01  5.28000047e-02
 -8.30001342e-03  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -4.39490084e-01  6.00000000e-01 -6.00000000e-01]
Read chunk # 1982 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  1.62410892e-01  1.40000047e-02
 -2.58000134e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
 -6.00000000e-01  5.96622362e-01 -6.00000000e-01]
Read chunk # 1983 out of 4999
start
[ 0.00000000e+00  7.95731469e+08  2.62910892e-01 -4.85999953e-02
  1.86999866e-02  0.00000000e+00 -5.33423489e+00 -1.57091618e+00
  6.00

In [4]:
print("The behavior policy value statistics are:")
dataset.compute_stats()['return']

The behavior policy value statistics are:


{'mean': -95.8347,
 'std': 62.380466,
 'min': -288.50906,
 'max': 0.0,
 'histogram': (array([  8,  12,  20,  23,  31,  39,  52,  79, 106,  92, 107, 119, 132,
         142, 139, 185, 204, 238, 265,   6]),
  array([-288.50906 , -274.08362 , -259.65814 , -245.2327  , -230.80725 ,
         -216.3818  , -201.95634 , -187.53088 , -173.10544 , -158.68    ,
         -144.25453 , -129.82907 , -115.403625, -100.97817 ,  -86.55272 ,
          -72.127266,  -57.701813,  -43.27636 ,  -28.850906,  -14.425453,
            0.      ], dtype=float32))}

In [5]:
from sklearn.model_selection import train_test_split
train_episodes, test_episodes = train_test_split(dataset, test_size=0.2)

## Setting up an Algorithm

In [6]:
from d3rlpy.algos import CQL

from d3rlpy.preprocessing import MinMaxActionScaler
action_scaler = MinMaxActionScaler(minimum=-0.6, maximum=0.6)
#cql = CQL(action_scaler=action_scaler)

model = CQL(q_func_factory='mean', #qr -> quantile regression q function, but you don't have to use this
            reward_scaler='standard',
            action_scaler=action_scaler,
          actor_learning_rate=1e-5, 
          critic_learning_rate=0.0003, 
            use_gpu=False) #change it to true if you have one
model.build_with_dataset(dataset)

In [7]:
from d3rlpy.metrics.scorer import td_error_scorer
from d3rlpy.metrics.scorer import average_value_estimation_scorer
from d3rlpy.metrics.scorer import initial_state_value_estimation_scorer

# calculate metrics with test dataset
ave_error_init = average_value_estimation_scorer(model, test_episodes)
print(ave_error_init)

0.037819472543626755


In [8]:
%load_ext tensorboard
%tensorboard --logdir runs

ERROR: Failed to launch TensorBoard (exited with 1).
Contents of stderr:
Traceback (most recent call last):
  File "/home/dasc/anaconda3/envs/jbreeden3.10/bin/tensorboard", line 6, in <module>
    from tensorboard.main import run_main
  File "/home/dasc/anaconda3/envs/jbreeden3.10/lib/python3.10/site-packages/tensorboard/main.py", line 40, in <module>
    from tensorboard import default
  File "/home/dasc/anaconda3/envs/jbreeden3.10/lib/python3.10/site-packages/tensorboard/default.py", line 38, in <module>
    from tensorboard.plugins.audio import audio_plugin
  File "/home/dasc/anaconda3/envs/jbreeden3.10/lib/python3.10/site-packages/tensorboard/plugins/audio/audio_plugin.py", line 25, in <module>
    from tensorboard import plugin_util
  File "/home/dasc/anaconda3/envs/jbreeden3.10/lib/python3.10/site-packages/tensorboard/plugin_util.py", line 21, in <module>
    from tensorboard._vendor import bleach
  File "/home/dasc/anaconda3/envs/jbreeden3.10/lib/python3.10/site-packages/tensorb

In [9]:
model.fit(train_episodes,
        eval_episodes=test_episodes,
        n_epochs=40, 
        tensorboard_dir='runs',
        scorers={
            'td_error': td_error_scorer,
            'init_value': initial_state_value_estimation_scorer,
            'ave_value': average_value_estimation_scorer
        })

2022-04-22 13:21.22 [debug    ] RoundIterator is selected.
2022-04-22 13:21.22 [info     ] Directory is created at d3rlpy_logs/CQL_20220422132122
2022-04-22 13:21.22 [debug    ] Fitting action scaler...       action_scaler=min_max
2022-04-22 13:21.22 [debug    ] Fitting reward scaler...       reward_scaler=standard
2022-04-22 13:21.22 [info     ] Parameters are saved to d3rlpy_logs/CQL_20220422132122/params.json params={'action_scaler': {'type': 'min_max', 'params': {'minimum': array(-0.6), 'maximum': array(0.6)}}, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 1e-05, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'alpha_learning_rate': 0.0001, 'alpha_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'alpha_threshold': 10.0, 'batch_size': 256, 'conser

Epoch 1/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-22 13:33.41 [info     ] CQL_20220422132122: epoch=1 step=6928 epoch=1 metrics={'time_sample_batch': 0.0003420805201519444, 'time_algorithm_update': 0.10443223916493168, 'temp_loss': 1.2439730751121016, 'temp': 0.8144192236624056, 'alpha_loss': 8.933157939230755, 'alpha': 0.8295113597125434, 'critic_loss': 80.33013826481327, 'actor_loss': 50.652369707479465, 'time_step': 0.10508638499928402, 'td_error': 168.37741325843473, 'init_value': -131.1938018798828, 'ave_value': -119.46909325804828} step=6928
2022-04-22 13:33.41 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_6928.pt


Epoch 2/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-22 13:55.13 [info     ] CQL_20220422132122: epoch=2 step=13856 epoch=2 metrics={'time_sample_batch': 0.0003476064764049257, 'time_algorithm_update': 0.18429884376481828, 'temp_loss': -0.018478584296722556, 'temp': 0.7727976827247727, 'alpha_loss': 5.844538008385525, 'alpha': 0.44349783624137384, 'critic_loss': 458.05218048541707, 'actor_loss': 169.2142700884689, 'time_step': 0.18498408794403076, 'td_error': 301.03213845142466, 'init_value': -218.14340209960938, 'ave_value': -199.85732195974467} step=13856
2022-04-22 13:55.13 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_13856.pt


Epoch 3/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-22 14:19.58 [info     ] CQL_20220422132122: epoch=3 step=20784 epoch=3 metrics={'time_sample_batch': 0.00034553693568734045, 'time_algorithm_update': 0.2121201919146553, 'temp_loss': 0.006166911413438734, 'temp': 0.7857441801758479, 'alpha_loss': 1.9335797133707113, 'alpha': 0.2509209633058509, 'critic_loss': 654.3550470677177, 'actor_loss': 216.65763782298592, 'time_step': 0.21281203441752022, 'td_error': 344.3943516797587, 'init_value': -242.6202392578125, 'ave_value': -221.1992712437416} step=20784
2022-04-22 14:19.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_20784.pt


Epoch 4/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-22 14:45.17 [info     ] CQL_20220422132122: epoch=4 step=27712 epoch=4 metrics={'time_sample_batch': 0.00034707289094462404, 'time_algorithm_update': 0.21692671886630202, 'temp_loss': 0.022901673230895606, 'temp': 0.6943907277964004, 'alpha_loss': 0.49388950889601957, 'alpha': 0.1475999942619362, 'critic_loss': 699.3626159940251, 'actor_loss': 228.42382664713517, 'time_step': 0.21761885398942934, 'td_error': 352.3964928687706, 'init_value': -247.7012176513672, 'ave_value': -224.83294825761377} step=27712
2022-04-22 14:45.17 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_27712.pt


Epoch 5/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-22 15:09.31 [info     ] CQL_20220422132122: epoch=5 step=34640 epoch=5 metrics={'time_sample_batch': 0.00034958698870678694, 'time_algorithm_update': 0.20753756058821776, 'temp_loss': 0.010042346687658096, 'temp': 0.5966943292846305, 'alpha_loss': 0.008829902452982175, 'alpha': 0.11092651823501148, 'critic_loss': 687.4817655316401, 'actor_loss': 224.12989426980667, 'time_step': 0.20823669960124389, 'td_error': 324.61551613679825, 'init_value': -237.68414306640625, 'ave_value': -215.60246740841174} step=34640
2022-04-22 15:09.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_34640.pt


Epoch 6/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-22 15:34.24 [info     ] CQL_20220422132122: epoch=6 step=41568 epoch=6 metrics={'time_sample_batch': 0.00035304949547897586, 'time_algorithm_update': 0.21307579928677847, 'temp_loss': -0.005869528468521893, 'temp': 0.5964285812101932, 'alpha_loss': -0.09722211977941382, 'alpha': 0.13966758536200705, 'critic_loss': 636.7450756389329, 'actor_loss': 214.31016629067085, 'time_step': 0.2137777699678104, 'td_error': 298.6580596165512, 'init_value': -225.36293029785156, 'ave_value': -203.6566035130698} step=41568
2022-04-22 15:34.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_41568.pt


Epoch 7/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-22 16:01.08 [info     ] CQL_20220422132122: epoch=7 step=48496 epoch=7 metrics={'time_sample_batch': 0.00035411491129745237, 'time_algorithm_update': 0.22928161162014096, 'temp_loss': -0.006057847101686578, 'temp': 0.6445149163141262, 'alpha_loss': -0.05530031588389649, 'alpha': 0.1617046067701958, 'critic_loss': 594.1234786076855, 'actor_loss': 208.42202583251319, 'time_step': 0.22999003667203713, 'td_error': 277.7018851054635, 'init_value': -223.8412322998047, 'ave_value': -201.3950434172509} step=48496
2022-04-22 16:01.08 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_48496.pt


Epoch 8/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-22 16:29.07 [info     ] CQL_20220422132122: epoch=8 step=55424 epoch=8 metrics={'time_sample_batch': 0.00035178716408463054, 'time_algorithm_update': 0.23998108771188706, 'temp_loss': -0.007444588711063698, 'temp': 0.6719248231422956, 'alpha_loss': 0.0022510000249300376, 'alpha': 0.17886686558011428, 'critic_loss': 562.9483829474476, 'actor_loss': 204.82454745708932, 'time_step': 0.2406837311476538, 'td_error': 266.9780736753657, 'init_value': -218.13848876953125, 'ave_value': -194.26387903715843} step=55424
2022-04-22 16:29.07 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_55424.pt


Epoch 9/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-22 16:57.53 [info     ] CQL_20220422132122: epoch=9 step=62352 epoch=9 metrics={'time_sample_batch': 0.00035159021409499454, 'time_algorithm_update': 0.2467811118303766, 'temp_loss': -0.004270635046808648, 'temp': 0.7089949418160436, 'alpha_loss': 0.05198890228021856, 'alpha': 0.16306858610654332, 'critic_loss': 535.0915872077468, 'actor_loss': 199.1787520049626, 'time_step': 0.24748574610257534, 'td_error': 250.30161031616322, 'init_value': -211.19488525390625, 'ave_value': -187.54639870459047} step=62352
2022-04-22 16:57.53 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_62352.pt


Epoch 10/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-22 17:27.02 [info     ] CQL_20220422132122: epoch=10 step=69280 epoch=10 metrics={'time_sample_batch': 0.0003512801460526006, 'time_algorithm_update': 0.2500693237809056, 'temp_loss': 0.0012064667107194865, 'temp': 0.7186770148351617, 'alpha_loss': 0.04657254511747416, 'alpha': 0.1471291313369438, 'critic_loss': 508.48533889431053, 'actor_loss': 191.98997372898162, 'time_step': 0.2507736310534885, 'td_error': 239.43414121548744, 'init_value': -201.89808654785156, 'ave_value': -178.69479855872456} step=69280
2022-04-22 17:27.02 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_69280.pt


Epoch 11/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-22 17:55.24 [info     ] CQL_20220422132122: epoch=11 step=76208 epoch=11 metrics={'time_sample_batch': 0.0003534718556437151, 'time_algorithm_update': 0.24324253349463879, 'temp_loss': 0.0022329619915301773, 'temp': 0.7063419947327559, 'alpha_loss': 0.06748173731545394, 'alpha': 0.12696288885574522, 'critic_loss': 494.1321166618867, 'actor_loss': 186.6244356935074, 'time_step': 0.24395276657703566, 'td_error': 232.7522511780897, 'init_value': -198.80076599121094, 'ave_value': -175.77671777645565} step=76208
2022-04-22 17:55.24 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_76208.pt


Epoch 12/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-22 18:22.58 [info     ] CQL_20220422132122: epoch=12 step=83136 epoch=12 metrics={'time_sample_batch': 0.0003524887231425931, 'time_algorithm_update': 0.23637198259318398, 'temp_loss': -0.0001189091297831197, 'temp': 0.7035917994366232, 'alpha_loss': -0.0007832894471487384, 'alpha': 0.11037539845809995, 'critic_loss': 476.4548816192921, 'actor_loss': 181.01679883542843, 'time_step': 0.2370766658705872, 'td_error': 225.52770680463613, 'init_value': -192.34100341796875, 'ave_value': -170.66844654002466} step=83136
2022-04-22 18:22.58 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_83136.pt


Epoch 13/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-22 18:49.00 [info     ] CQL_20220422132122: epoch=13 step=90064 epoch=13 metrics={'time_sample_batch': 0.00035355665116607455, 'time_algorithm_update': 0.22296850706771118, 'temp_loss': 0.0040243991291413666, 'temp': 0.6963543136524548, 'alpha_loss': -0.05164696316964241, 'alpha': 0.12290265734197774, 'critic_loss': 454.9759976409599, 'actor_loss': 169.7867177441269, 'time_step': 0.22368354097387255, 'td_error': 211.89886494578454, 'init_value': -178.8857879638672, 'ave_value': -158.8360532123112} step=90064
2022-04-22 18:49.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_90064.pt


Epoch 14/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-22 19:13.21 [info     ] CQL_20220422132122: epoch=14 step=96992 epoch=14 metrics={'time_sample_batch': 0.0003546798132858981, 'time_algorithm_update': 0.20847733841894955, 'temp_loss': -0.017573920124919932, 'temp': 0.7014111314338844, 'alpha_loss': 0.06661051103993286, 'alpha': 0.1244695885773688, 'critic_loss': 409.0026686956233, 'actor_loss': 154.8120197842358, 'time_step': 0.20919428600587692, 'td_error': 180.726752456997, 'init_value': -152.63922119140625, 'ave_value': -136.03193234109952} step=96992
2022-04-22 19:13.21 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_96992.pt


Epoch 15/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-22 19:35.11 [info     ] CQL_20220422132122: epoch=15 step=103920 epoch=15 metrics={'time_sample_batch': 0.00035657790461533616, 'time_algorithm_update': 0.18652809530014805, 'temp_loss': 0.006385493720155726, 'temp': 0.7555676745745519, 'alpha_loss': 0.1709721659917526, 'alpha': 0.08359493157884437, 'critic_loss': 333.03280498600174, 'actor_loss': 130.50575791880937, 'time_step': 0.1872429721762897, 'td_error': 142.1696991359022, 'init_value': -127.86006927490234, 'ave_value': -115.0552805248076} step=103920
2022-04-22 19:35.11 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_103920.pt


Epoch 16/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-22 19:54.59 [info     ] CQL_20220422132122: epoch=16 step=110848 epoch=16 metrics={'time_sample_batch': 0.0003563362167283514, 'time_algorithm_update': 0.16916244447781914, 'temp_loss': 0.02435162390791166, 'temp': 0.6604965390254663, 'alpha_loss': 0.04781656755403936, 'alpha': 0.05380299574035213, 'critic_loss': 266.2240581220278, 'actor_loss': 105.81038061961291, 'time_step': 0.16987415941703127, 'td_error': 118.73497632251154, 'init_value': -97.59009552001953, 'ave_value': -88.0416207622735} step=110848
2022-04-22 19:54.59 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_110848.pt


Epoch 17/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-22 20:13.42 [info     ] CQL_20220422132122: epoch=17 step=117776 epoch=17 metrics={'time_sample_batch': 0.0003534386463583755, 'time_algorithm_update': 0.15970181912940865, 'temp_loss': 0.014816679104316525, 'temp': 0.5378653189214699, 'alpha_loss': -0.00660131318003772, 'alpha': 0.04673385622321819, 'critic_loss': 218.02497728337593, 'actor_loss': 83.7993033163542, 'time_step': 0.16040808901913448, 'td_error': 96.62463678274285, 'init_value': -78.00804901123047, 'ave_value': -70.37981834651423} step=117776
2022-04-22 20:13.42 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_117776.pt


Epoch 18/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-22 20:31.50 [info     ] CQL_20220422132122: epoch=18 step=124704 epoch=18 metrics={'time_sample_batch': 0.00035717801188761864, 'time_algorithm_update': 0.1547184911046788, 'temp_loss': 0.005703958656955978, 'temp': 0.4647982974749652, 'alpha_loss': -0.05462824025308985, 'alpha': 0.06335891134269088, 'critic_loss': 184.09149879791858, 'actor_loss': 64.91522095935724, 'time_step': 0.15543226205999802, 'td_error': 83.46478774737707, 'init_value': -58.292606353759766, 'ave_value': -51.97279626157454} step=124704
2022-04-22 20:31.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_124704.pt


Epoch 19/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-22 20:49.00 [info     ] CQL_20220422132122: epoch=19 step=131632 epoch=19 metrics={'time_sample_batch': 0.0003584132252334172, 'time_algorithm_update': 0.146283298317885, 'temp_loss': -0.002022571160832278, 'temp': 0.4671297699737838, 'alpha_loss': -0.043888395772890616, 'alpha': 0.08327002919388733, 'critic_loss': 163.53407897024178, 'actor_loss': 51.252377044246046, 'time_step': 0.14700459456608972, 'td_error': 76.7659250929839, 'init_value': -48.91828155517578, 'ave_value': -43.741008627092896} step=131632
2022-04-22 20:49.00 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_131632.pt


Epoch 20/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-22 21:06.14 [info     ] CQL_20220422132122: epoch=20 step=138560 epoch=20 metrics={'time_sample_batch': 0.00036141228180284037, 'time_algorithm_update': 0.14673397953323072, 'temp_loss': -0.006157488753038752, 'temp': 0.49014234257642586, 'alpha_loss': 0.013646985576091576, 'alpha': 0.09517696984413383, 'critic_loss': 153.6818130486763, 'actor_loss': 42.20641303447873, 'time_step': 0.14745679690435906, 'td_error': 74.78162569067601, 'init_value': -39.74223709106445, 'ave_value': -35.35247894730546} step=138560
2022-04-22 21:06.14 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_138560.pt


Epoch 21/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-22 21:24.32 [info     ] CQL_20220422132122: epoch=21 step=145488 epoch=21 metrics={'time_sample_batch': 0.00036607968339193224, 'time_algorithm_update': 0.15614449685357734, 'temp_loss': 0.0006980956929634212, 'temp': 0.5124020376791166, 'alpha_loss': 0.013618711134458275, 'alpha': 0.08604263897456109, 'critic_loss': 147.61574680211913, 'actor_loss': 35.924887423564876, 'time_step': 0.15687802984587865, 'td_error': 68.93932435050735, 'init_value': -33.729095458984375, 'ave_value': -30.135494665293823} step=145488
2022-04-22 21:24.32 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_145488.pt


Epoch 22/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-22 21:43.20 [info     ] CQL_20220422132122: epoch=22 step=152416 epoch=22 metrics={'time_sample_batch': 0.0003633909701750405, 'time_algorithm_update': 0.1602564781697738, 'temp_loss': -0.0036595059431814825, 'temp': 0.5115332631587844, 'alpha_loss': 0.04438352706180728, 'alpha': 0.07390135286298449, 'critic_loss': 135.6900423136787, 'actor_loss': 30.389186180697852, 'time_step': 0.16098265215651267, 'td_error': 64.8936062994529, 'init_value': -28.432649612426758, 'ave_value': -24.953201348687664} step=152416
2022-04-22 21:43.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_152416.pt


Epoch 23/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-22 22:02.30 [info     ] CQL_20220422132122: epoch=23 step=159344 epoch=23 metrics={'time_sample_batch': 0.00036376243237810376, 'time_algorithm_update': 0.16365389470828323, 'temp_loss': 0.004634484552604756, 'temp': 0.5162797607997686, 'alpha_loss': 0.03448070211069913, 'alpha': 0.05543841665350417, 'critic_loss': 124.07759250826084, 'actor_loss': 26.439733660549752, 'time_step': 0.16438315887924707, 'td_error': 59.18680209541547, 'init_value': -25.677358627319336, 'ave_value': -22.518948905933254} step=159344
2022-04-22 22:02.30 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_159344.pt


Epoch 24/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-22 22:21.52 [info     ] CQL_20220422132122: epoch=24 step=166272 epoch=24 metrics={'time_sample_batch': 0.0003646147581501315, 'time_algorithm_update': 0.16531008241083, 'temp_loss': -0.003091577657266664, 'temp': 0.5031627551942858, 'alpha_loss': 0.017786319717672135, 'alpha': 0.04651469537046228, 'critic_loss': 119.37176567390046, 'actor_loss': 23.607828870084454, 'time_step': 0.16604327487890494, 'td_error': 57.84089233294172, 'init_value': -22.06302261352539, 'ave_value': -18.885714237864985} step=166272
2022-04-22 22:21.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_166272.pt


Epoch 25/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-22 22:41.39 [info     ] CQL_20220422132122: epoch=25 step=173200 epoch=25 metrics={'time_sample_batch': 0.0003649572439612197, 'time_algorithm_update': 0.1689772116188089, 'temp_loss': 0.0009488222350158855, 'temp': 0.5137594603088106, 'alpha_loss': 0.009348844277871145, 'alpha': 0.0404366958046474, 'critic_loss': 114.02718623381702, 'actor_loss': 21.176268728617707, 'time_step': 0.16970531236354527, 'td_error': 54.83240985798754, 'init_value': -20.99884796142578, 'ave_value': -17.661086474938536} step=173200
2022-04-22 22:41.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_173200.pt


Epoch 26/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-22 23:01.33 [info     ] CQL_20220422132122: epoch=26 step=180128 epoch=26 metrics={'time_sample_batch': 0.00036155372238599675, 'time_algorithm_update': 0.16987232915523695, 'temp_loss': 0.0009498255109972606, 'temp': 0.5090848417741872, 'alpha_loss': -0.003396782137841295, 'alpha': 0.038804895313904034, 'critic_loss': 104.78661297237625, 'actor_loss': 20.295964419119077, 'time_step': 0.17059303076795837, 'td_error': 48.59609908154537, 'init_value': -19.427467346191406, 'ave_value': -16.053659212758816} step=180128
2022-04-22 23:01.33 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_180128.pt


Epoch 27/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-22 23:22.05 [info     ] CQL_20220422132122: epoch=27 step=187056 epoch=27 metrics={'time_sample_batch': 0.00035966712525480066, 'time_algorithm_update': 0.17531254822194714, 'temp_loss': 0.00029954337894698223, 'temp': 0.5139207061440372, 'alpha_loss': -0.0032392551043109613, 'alpha': 0.04016697239378084, 'critic_loss': 98.81467074296904, 'actor_loss': 20.088003795206824, 'time_step': 0.17602869276361707, 'td_error': 47.985321258937844, 'init_value': -18.39183807373047, 'ave_value': -15.24090586323291} step=187056
2022-04-22 23:22.05 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_187056.pt


Epoch 28/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-22 23:43.09 [info     ] CQL_20220422132122: epoch=28 step=193984 epoch=28 metrics={'time_sample_batch': 0.00035996494199607446, 'time_algorithm_update': 0.18011986517603348, 'temp_loss': -0.00415138885501235, 'temp': 0.5215323192538482, 'alpha_loss': 0.014372280739091829, 'alpha': 0.03814582801241144, 'critic_loss': 92.53840178499046, 'actor_loss': 19.910147719902543, 'time_step': 0.18084112026537263, 'td_error': 43.01335859640332, 'init_value': -18.120018005371094, 'ave_value': -14.864014593757698} step=193984
2022-04-22 23:43.09 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_193984.pt


Epoch 29/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-23 00:04.20 [info     ] CQL_20220422132122: epoch=29 step=200912 epoch=29 metrics={'time_sample_batch': 0.00036176485084771557, 'time_algorithm_update': 0.18109341237616594, 'temp_loss': 0.0026748576166547027, 'temp': 0.5269379306834097, 'alpha_loss': 0.0024520420670683316, 'alpha': 0.03358621848263191, 'critic_loss': 86.27546918910109, 'actor_loss': 18.735401037108264, 'time_step': 0.18181758709509985, 'td_error': 39.81759546169026, 'init_value': -18.762775421142578, 'ave_value': -15.8014163966017} step=200912
2022-04-23 00:04.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_200912.pt


Epoch 30/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-23 00:24.44 [info     ] CQL_20220422132122: epoch=30 step=207840 epoch=30 metrics={'time_sample_batch': 0.0003608689572464236, 'time_algorithm_update': 0.1742157309421491, 'temp_loss': 0.006103687987025121, 'temp': 0.4967192284349894, 'alpha_loss': -0.019506355049328638, 'alpha': 0.036740911106619874, 'critic_loss': 79.02531313765407, 'actor_loss': 18.4685782957607, 'time_step': 0.1749376390327207, 'td_error': 36.50987896115592, 'init_value': -17.8409423828125, 'ave_value': -15.103231736030095} step=207840
2022-04-23 00:24.44 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_207840.pt


Epoch 31/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-23 00:45.20 [info     ] CQL_20220422132122: epoch=31 step=214768 epoch=31 metrics={'time_sample_batch': 0.00036087893723890906, 'time_algorithm_update': 0.17606956282884365, 'temp_loss': -0.002200019430395621, 'temp': 0.48993305147003907, 'alpha_loss': 0.04057553737842145, 'alpha': 0.03561605201382405, 'critic_loss': 73.26064327459, 'actor_loss': 19.15469824280547, 'time_step': 0.17679329780866038, 'td_error': 37.85240540581456, 'init_value': -20.825817108154297, 'ave_value': -17.593196205314637} step=214768
2022-04-23 00:45.20 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_214768.pt


Epoch 32/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-23 01:06.31 [info     ] CQL_20220422132122: epoch=32 step=221696 epoch=32 metrics={'time_sample_batch': 0.0003560745688563957, 'time_algorithm_update': 0.1812581444378538, 'temp_loss': 0.009228764643711704, 'temp': 0.46958989471919815, 'alpha_loss': 0.02486759012212512, 'alpha': 0.02239166064522343, 'critic_loss': 72.82597981391616, 'actor_loss': 20.0646969096748, 'time_step': 0.18197389026161706, 'td_error': 36.17305554915017, 'init_value': -20.514463424682617, 'ave_value': -16.77857813497136} step=221696
2022-04-23 01:06.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_221696.pt


Epoch 33/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-23 01:27.52 [info     ] CQL_20220422132122: epoch=33 step=228624 epoch=33 metrics={'time_sample_batch': 0.0003529324198429909, 'time_algorithm_update': 0.18250597610759955, 'temp_loss': 0.0035779970691059908, 'temp': 0.4211297314226903, 'alpha_loss': 0.006671835779035108, 'alpha': 0.017748324992027623, 'critic_loss': 76.07895670974021, 'actor_loss': 18.3615490944656, 'time_step': 0.1832201175959501, 'td_error': 37.374272731730414, 'init_value': -17.731595993041992, 'ave_value': -13.845020339553102} step=228624
2022-04-23 01:27.52 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_228624.pt


Epoch 34/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-23 01:48.39 [info     ] CQL_20220422132122: epoch=34 step=235552 epoch=34 metrics={'time_sample_batch': 0.00035319128019980416, 'time_algorithm_update': 0.1776241367601082, 'temp_loss': 0.0004519340939016332, 'temp': 0.40657154358407227, 'alpha_loss': 0.0031417032254443668, 'alpha': 0.01561915520126942, 'critic_loss': 75.06012411441387, 'actor_loss': 16.184490697144362, 'time_step': 0.1783358193159654, 'td_error': 36.50140987547464, 'init_value': -17.731122970581055, 'ave_value': -13.642434266968115} step=235552
2022-04-23 01:48.39 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_235552.pt


Epoch 35/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-23 02:09.04 [info     ] CQL_20220422132122: epoch=35 step=242480 epoch=35 metrics={'time_sample_batch': 0.0003512936706631068, 'time_algorithm_update': 0.17448317729298016, 'temp_loss': 0.0032058680562567833, 'temp': 0.41170963932766513, 'alpha_loss': 0.0020853960852695065, 'alpha': 0.014298640460815866, 'critic_loss': 74.51948902759418, 'actor_loss': 14.718328608101015, 'time_step': 0.17518890747159513, 'td_error': 35.83033467512665, 'init_value': -15.129424095153809, 'ave_value': -11.098136941891331} step=242480
2022-04-23 02:09.04 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_242480.pt


Epoch 36/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-23 02:29.31 [info     ] CQL_20220422132122: epoch=36 step=249408 epoch=36 metrics={'time_sample_batch': 0.00035355228061764124, 'time_algorithm_update': 0.17470591695974935, 'temp_loss': 0.002189543111738184, 'temp': 0.3944070090993585, 'alpha_loss': -0.0018333680014242294, 'alpha': 0.014029823296986236, 'critic_loss': 75.24394696410616, 'actor_loss': 13.49300654934973, 'time_step': 0.175415596049322, 'td_error': 36.365098328293755, 'init_value': -14.674455642700195, 'ave_value': -10.456548656520022} step=249408
2022-04-23 02:29.31 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_249408.pt


Epoch 37/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-23 02:49.50 [info     ] CQL_20220422132122: epoch=37 step=256336 epoch=37 metrics={'time_sample_batch': 0.00034859218993858705, 'time_algorithm_update': 0.17355174476637852, 'temp_loss': -0.0013023974968884818, 'temp': 0.3859725968512184, 'alpha_loss': -0.007855985534464996, 'alpha': 0.016180716631353315, 'critic_loss': 76.3296006561943, 'actor_loss': 13.00458421158942, 'time_step': 0.1742568448289162, 'td_error': 36.120132239166814, 'init_value': -15.430970191955566, 'ave_value': -11.21363611395172} step=256336
2022-04-23 02:49.50 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_256336.pt


Epoch 38/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-23 03:10.16 [info     ] CQL_20220422132122: epoch=38 step=263264 epoch=38 metrics={'time_sample_batch': 0.0003514585126079533, 'time_algorithm_update': 0.17461431497927352, 'temp_loss': -0.002709179653579909, 'temp': 0.40105682014386596, 'alpha_loss': -0.012304663026017829, 'alpha': 0.02083289688861742, 'critic_loss': 76.05298805577513, 'actor_loss': 12.793378567318786, 'time_step': 0.17532120022157047, 'td_error': 34.84334024787681, 'init_value': -14.419123649597168, 'ave_value': -10.199376301885847} step=263264
2022-04-23 03:10.16 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_263264.pt


Epoch 39/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-23 03:30.19 [info     ] CQL_20220422132122: epoch=39 step=270192 epoch=39 metrics={'time_sample_batch': 0.0003498303284545969, 'time_algorithm_update': 0.17131448201577051, 'temp_loss': -0.00090944622302515, 'temp': 0.40626227861704783, 'alpha_loss': -0.01116106695183733, 'alpha': 0.026603096482283747, 'critic_loss': 73.52520556871804, 'actor_loss': 12.795844356918721, 'time_step': 0.17202154803358655, 'td_error': 34.377590636009884, 'init_value': -14.611069679260254, 'ave_value': -10.175477616043135} step=270192
2022-04-23 03:30.19 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_270192.pt


Epoch 40/40:   0%|          | 0/6928 [00:00<?, ?it/s]

2022-04-23 03:50.18 [info     ] CQL_20220422132122: epoch=40 step=277120 epoch=40 metrics={'time_sample_batch': 0.00034861521274883806, 'time_algorithm_update': 0.170657395180874, 'temp_loss': -0.0025526430015996643, 'temp': 0.4151186579214646, 'alpha_loss': -0.0061833723156299045, 'alpha': 0.029968462228951394, 'critic_loss': 71.4885447895107, 'actor_loss': 13.600749347315924, 'time_step': 0.1713605998363561, 'td_error': 36.17310510697233, 'init_value': -15.425052642822266, 'ave_value': -11.135538481891} step=277120
2022-04-23 03:50.18 [info     ] Model parameters are saved to d3rlpy_logs/CQL_20220422132122/model_277120.pt


[(1,
  {'time_sample_batch': 0.0003420805201519444,
   'time_algorithm_update': 0.10443223916493168,
   'temp_loss': 1.2439730751121016,
   'temp': 0.8144192236624056,
   'alpha_loss': 8.933157939230755,
   'alpha': 0.8295113597125434,
   'critic_loss': 80.33013826481327,
   'actor_loss': 50.652369707479465,
   'time_step': 0.10508638499928402,
   'td_error': 168.37741325843473,
   'init_value': -131.1938018798828,
   'ave_value': -119.46909325804828}),
 (2,
  {'time_sample_batch': 0.0003476064764049257,
   'time_algorithm_update': 0.18429884376481828,
   'temp_loss': -0.018478584296722556,
   'temp': 0.7727976827247727,
   'alpha_loss': 5.844538008385525,
   'alpha': 0.44349783624137384,
   'critic_loss': 458.05218048541707,
   'actor_loss': 169.2142700884689,
   'time_step': 0.18498408794403076,
   'td_error': 301.03213845142466,
   'init_value': -218.14340209960938,
   'ave_value': -199.85732195974467}),
 (3,
  {'time_sample_batch': 0.00034553693568734045,
   'time_algorithm_update'

In [10]:
model.save_model('cqlDet2000_Ep40model_CPUonly.pt')
model.save_policy('cqlDet2000_Ep40_CPUonly.pt')

  minimum = torch.tensor(
  maximum = torch.tensor(


## Off-Policy Evaluation

We do get some metrics on a test set of initial state value and average value. However, these estimates (using the critic's Q-function) of model performance are biased. They're useful for validation during training, but not much else. Instead, we fit a Q-function to the data (or a separate dataset, as I've done here) separately and evaluate the model's performance on it.

Feel free to change the chunks and number of steps.

In [11]:
# from d3rlpy.ope import FQE
# # metrics to evaluate with
# from d3rlpy.metrics.scorer import soft_opc_scorer


# ope_dataset = get_dataset([i+2000 for i in range(100)]) #change if you'd prefer different chunks
# ope_train_episodes, ope_test_episodes = train_test_split(ope_dataset, test_size=0.2)

# fqe = FQE(algo=model, action_scaler = action_scaler, use_gpu=False) #change this if you have one!
# fqe.fit(ope_train_episodes, eval_episodes=ope_test_episodes,
#         tensorboard_dir='runs',
#         n_epochs=100, n_steps_per_epoch=10000, #change if overfitting/underfitting
#         scorers={
#            'init_value': initial_state_value_estimation_scorer,
#             'ave_value': average_value_estimation_scorer,
#            'soft_opc': soft_opc_scorer(return_threshold=0)
#         })

In [12]:
# from d3rlpy.ope import FQE
# # metrics to evaluate with
# from d3rlpy.metrics.scorer import soft_opc_scorer


# ope_dataset = get_dataset([i*2 for i in range(100)], path="collected_data/rl_stochastic.txt") #change if you'd prefer different chunks
# ope_train_episodes, ope_test_episodes = train_test_split(ope_dataset, test_size=0.2)

# fqe = FQE(algo=model, action_scaler = action_scaler, use_gpu=False) #change this if you have one!
# fqe.fit(ope_train_episodes, eval_episodes=ope_test_episodes,
#         tensorboard_dir='runs',
#         n_epochs=100, n_steps_per_epoch=10000, #change if overfitting/underfitting
#         scorers={
#            'init_value': initial_state_value_estimation_scorer,
#             'ave_value': average_value_estimation_scorer,
#            'soft_opc': soft_opc_scorer(return_threshold=0)
#         })

In [13]:
# from d3rlpy.torch_utility import to_cpu
# to_cpu(model)
# model.save_policy("cqlStochpid2000Ep40CPU.pt")
# model.save_model("cqlStochpid2000Ep40modelCPU.pt")

In [14]:
# for key in dir(model):
#     module = getattr(model, key)
#     if isinstance(module, (torch.nn.Module, torch.nn.Parameter)):
#         print(yes)
#         print(key)
# dir(model)
# type(model)
# model.cpu()
# from d3rlpy.algos.torch.base import TorchImplBase
# new_model = TorchImplBase()
# from d3rlpy.torch_utility import _get_attributes
# model._device = "cpu:0"
# print(model._device)


# def my_get_state_dict(impl: Any) -> Dict[str, Any]:
#     rets = {}
#     for key in _get_attributes(impl):
#         obj = getattr(impl, key)
#         if isinstance(obj, (torch.nn.Module, torch.optim.Optimizer)):
#             if isinstance(obj, (torch.nn.Module, torch.nn.Parameter)):
#                 obj.cpu()
#             rets[key] = obj.state_dict()
#     return rets

# torch.save(my_get_state_dict(model), "my_test_model.pt")

# for key in dir(model):
#     obj = getattr(model, key)
#     if isinstance(obj, (torch.nn.Module, torch.nn.Parameter)):
#         obj.cpu()
#         print("convert to cpu")
# model.save_policy("cqlStochpid2000Ep40modelCPU.pt")

# import trace
# tracer = trace.Trace()
# tracer.run('model.save_policy("cqlStochpid2000Ep40modelCPU.pt")')