In [1]:
import d3rlpy
from d3rlpy.algos import COMBO
from sklearn.model_selection import train_test_split
import gymnasium as gym
import numpy as np
import encoders



## Params

In [2]:
seed = 1
d3rlpy.seed(seed)
use_gpu = True
# prepare environment
env = gym.make("InvertedPendulum-v4")
eval_env = gym.make("InvertedPendulum-v4")
env.reset(seed=seed)
eval_env.reset(seed=seed)

(array([ 0.00023643,  0.00900927, -0.00711681,  0.00897299]), {})

## Create dataset

In [14]:
actor_encoder = d3rlpy.models.encoders.DefaultEncoderFactory(dropout_rate=0.2)
# setup algorithm
sac = d3rlpy.algos.SAC(
    batch_size=256,
    actor_encoder_factory=actor_encoder,
    actor_learning_rate=3e-4,
    critic_learning_rate=3e-4,
    temp_learning_rate=3e-4,
    use_gpu=use_gpu
)

# prepare utilities
buffer = d3rlpy.online.buffers.ReplayBuffer(maxlen=1000000, env=env)

# start training
sac.fit_online(
    env,
    buffer,
    eval_env=eval_env,
    n_steps=100000,
    n_steps_per_epoch=1000,
    update_interval=1,
    update_start_step=1000,
    tensorboard_dir='tensorboard_logs'
)

[2m2023-10-01 15:22:21[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/SAC_online_20231001152221[0m
[2m2023-10-01 15:22:21[0m [[32m[1mdebug    [0m] [1mBuilding model...[0m
[2m2023-10-01 15:22:21[0m [[32m[1mdebug    [0m] [1mModel has been built.[0m
[2m2023-10-01 15:22:21[0m [[32m[1minfo     [0m] [1mParameters are saved to d3rlpy_logs/SAC_online_20231001152221/params.json[0m [36mparams[0m=[35m{'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': 0.2}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 256, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-

  0%|          | 0/100000 [00:00<?, ?it/s]

[2m2023-10-01 15:22:23[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/SAC_online_20231001152221/model_1000.pt[0m
[2m2023-10-01 15:22:23[0m [[32m[1minfo     [0m] [1mSAC_online_20231001152221: epoch=1 step=1000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_inference': 0.0011901423931121827, 'time_environment_step': 6.751775741577148e-05, 'time_step': 0.0013019063472747804, 'rollout_return': 10.06060606060606, 'evaluation': 14.7}[0m [36mstep[0m=[35m1000[0m
[2m2023-10-01 15:22:37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/SAC_online_20231001152221/model_2000.pt[0m
[2m2023-10-01 15:22:37[0m [[32m[1minfo     [0m] [1mSAC_online_20231001152221: epoch=2 step=2000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_inference': 0.0012940735816955567, 'time_environment_step': 0.0001036219596862793, 'time_sample_batch': 0.000309492826461792, 'time_algorithm_update': 0.011871881484985351, 'temp_loss'

In [15]:
# export replay buffer as MDPDataset
dataset = buffer.to_mdp_dataset()

# save MDPDataset
dataset.dump('d3rlpy_data/inverted_pendulum2.h5')

## Load the dataset

In [3]:
dataset = d3rlpy.dataset.MDPDataset.load('d3rlpy_data/inverted_pendulum2.h5')

In [4]:
train_episodes, test_episodes = train_test_split(dataset, random_state=seed)

## Dynamics learning

In [7]:
# encoder_factory = encoders.InvertedPendulumEncoderFactory()
dynamics = d3rlpy.dynamics.ProbabilisticEnsembleDynamics(learning_rate=1e-4, use_gpu=False, encoder_factory="inverted_pendulum")
#dynamics = d3rlpy.dynamics.ProbabilisticEnsembleDynamics(learning_rate=1e-4, use_gpu=True) # Baseline

Using InvertedPendulumEncoderFactory


In [8]:
# same as algorithms
dynamics.fit(train_episodes,
             eval_episodes=test_episodes,
             n_epochs=100,
             scorers={
                'observation_error': d3rlpy.metrics.scorer.dynamics_observation_prediction_error_scorer,
                'reward_error': d3rlpy.metrics.scorer.dynamics_reward_prediction_error_scorer,
                'variance': d3rlpy.metrics.scorer.dynamics_prediction_variance_scorer,
             },
            tensorboard_dir='tensorboard_logs/dynamics',
            #experiment_name=None,)

[2m2023-10-05 12:29:37[0m [[32m[1mdebug    [0m] [1mRoundIterator is selected.[0m
[2m2023-10-05 12:29:37[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937[0m
[2m2023-10-05 12:29:37[0m [[32m[1mdebug    [0m] [1mBuilding models...[0m
[2m2023-10-05 12:29:37[0m [[32m[1mdebug    [0m] [1mModels have been built.[0m
[2m2023-10-05 12:29:37[0m [[32m[1minfo     [0m] [1mParameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/params.json[0m [36mparams[0m=[35m{'action_scaler': None, 'batch_size': 100, 'discrete_action': False, 'encoder_factory': {'type': 'inverted_pendulum', 'params': {'hidden_units': [256, 256], 'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None, 'use_dense': False}}, 'gamma': 1.0, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_ensembles': 5, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps':

Epoch 1/100:   0%|          | 0/714 [00:00<?, ?it/s]

  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[2m2023-10-05 12:29:49[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=1 step=714[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023564370740361575, 'time_algorithm_update': 0.013745944373080042, 'loss': -22.885445690687735, 'time_step': 0.014056795117567902, 'observation_error': 0.03438636513912763, 'reward_error': 0.00039823976651985073, 'variance': 0.029693792782415095}[0m [36mstep[0m=[35m714[0m
[2m2023-10-05 12:29:49[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_714.pt[0m


Epoch 2/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:30:00[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=2 step=1428[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023534985817447095, 'time_algorithm_update': 0.014228190694536482, 'loss': -33.198769204422874, 'time_step': 0.014536314985665287, 'observation_error': 0.020555156495159713, 'reward_error': 0.0002807210454080229, 'variance': 0.015802806306843786}[0m [36mstep[0m=[35m1428[0m
[2m2023-10-05 12:30:00[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_1428.pt[0m


Epoch 3/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:30:13[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=3 step=2142[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002437102026632186, 'time_algorithm_update': 0.01521063418615432, 'loss': -41.05668649686819, 'time_step': 0.015527376273766953, 'observation_error': 0.013155272969279555, 'reward_error': 4.632182273780113e-05, 'variance': 0.006468071347963596}[0m [36mstep[0m=[35m2142[0m
[2m2023-10-05 12:30:13[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_2142.pt[0m


Epoch 4/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:30:25[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=4 step=2856[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000223257294556006, 'time_algorithm_update': 0.015178150823470257, 'loss': -47.99477893331138, 'time_step': 0.015466698077546448, 'observation_error': 0.010717851617716444, 'reward_error': 2.0268247567335142e-05, 'variance': 0.003646903202359238}[0m [36mstep[0m=[35m2856[0m
[2m2023-10-05 12:30:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_2856.pt[0m


Epoch 5/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:30:39[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=5 step=3570[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023138055614396638, 'time_algorithm_update': 0.015871588255510945, 'loss': -53.646257454607664, 'time_step': 0.01617125937250816, 'observation_error': 0.010566699674744008, 'reward_error': 1.1207291996522423e-05, 'variance': 0.0026037700370151657}[0m [36mstep[0m=[35m3570[0m
[2m2023-10-05 12:30:39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_3570.pt[0m


Epoch 6/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:30:53[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=6 step=4284[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022709870538791687, 'time_algorithm_update': 0.01653363324013077, 'loss': -58.1743355645519, 'time_step': 0.016824859197066278, 'observation_error': 0.014924224404072439, 'reward_error': 1.1892970212741865e-05, 'variance': 0.008151400924133863}[0m [36mstep[0m=[35m4284[0m
[2m2023-10-05 12:30:53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_4284.pt[0m


Epoch 7/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:31:07[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=7 step=4998[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022834055230063218, 'time_algorithm_update': 0.017458652247901725, 'loss': -60.91458752242123, 'time_step': 0.01775386086365088, 'observation_error': 0.017144245898000402, 'reward_error': 8.946861481960987e-06, 'variance': 0.016947886468242426}[0m [36mstep[0m=[35m4998[0m
[2m2023-10-05 12:31:07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_4998.pt[0m


Epoch 8/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:31:22[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=8 step=5712[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023259802692744577, 'time_algorithm_update': 0.017714596596084722, 'loss': -62.971406507892766, 'time_step': 0.01801470948868439, 'observation_error': 0.039250602456001545, 'reward_error': 1.2063328553341461e-05, 'variance': 0.02733317392356016}[0m [36mstep[0m=[35m5712[0m
[2m2023-10-05 12:31:22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_5712.pt[0m


Epoch 9/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:31:37[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=9 step=6426[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022745967245235495, 'time_algorithm_update': 0.01771002724057152, 'loss': -64.56523055122013, 'time_step': 0.018003874800118412, 'observation_error': 0.03422823660009942, 'reward_error': 2.2984145289778644e-05, 'variance': 0.036720250135848856}[0m [36mstep[0m=[35m6426[0m
[2m2023-10-05 12:31:37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_6426.pt[0m


Epoch 10/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:31:52[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=10 step=7140[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022256073831510142, 'time_algorithm_update': 0.017792226553631097, 'loss': -66.25913888199322, 'time_step': 0.018079139939209326, 'observation_error': 0.030784338825355324, 'reward_error': 1.0011356663344068e-05, 'variance': 0.03924046670106515}[0m [36mstep[0m=[35m7140[0m
[2m2023-10-05 12:31:52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_7140.pt[0m


Epoch 11/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:32:07[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=11 step=7854[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002272452960829107, 'time_algorithm_update': 0.017779339929254782, 'loss': -67.46987786947513, 'time_step': 0.01807247657401889, 'observation_error': 0.035229086810476644, 'reward_error': 7.088427800970694e-06, 'variance': 0.05062863933213344}[0m [36mstep[0m=[35m7854[0m
[2m2023-10-05 12:32:07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_7854.pt[0m


Epoch 12/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:32:23[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=12 step=8568[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022158469138693074, 'time_algorithm_update': 0.018003529527274166, 'loss': -68.74006387032047, 'time_step': 0.01829015206889946, 'observation_error': 0.049919655489935, 'reward_error': 9.016736050207444e-06, 'variance': 0.04888705530565096}[0m [36mstep[0m=[35m8568[0m
[2m2023-10-05 12:32:23[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_8568.pt[0m


Epoch 13/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:32:38[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=13 step=9282[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023069769060578334, 'time_algorithm_update': 0.018125677976955554, 'loss': -69.82779564844127, 'time_step': 0.018424883610060235, 'observation_error': 0.04909038796329976, 'reward_error': 8.681723337927951e-06, 'variance': 0.04827032874855462}[0m [36mstep[0m=[35m9282[0m
[2m2023-10-05 12:32:38[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_9282.pt[0m


Epoch 14/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:32:54[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=14 step=9996[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002238419877380884, 'time_algorithm_update': 0.01818281128292992, 'loss': -70.2001247953634, 'time_step': 0.018474588541089652, 'observation_error': 0.054226037650360846, 'reward_error': 5.443108019608079e-06, 'variance': 0.05418400946345673}[0m [36mstep[0m=[35m9996[0m
[2m2023-10-05 12:32:54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_9996.pt[0m


Epoch 15/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:33:09[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=15 step=10710[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023459186073110884, 'time_algorithm_update': 0.01841907474507137, 'loss': -71.02813982896778, 'time_step': 0.01872380541152313, 'observation_error': 0.03861407667508009, 'reward_error': 6.1236092443942364e-06, 'variance': 0.05364683615042192}[0m [36mstep[0m=[35m10710[0m
[2m2023-10-05 12:33:09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_10710.pt[0m


Epoch 16/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:33:25[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=16 step=11424[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022103839895638432, 'time_algorithm_update': 0.018360796094942494, 'loss': -70.14504104435277, 'time_step': 0.018646660639124422, 'observation_error': 0.04874736888609789, 'reward_error': 8.573727483250033e-06, 'variance': 0.05800482703923251}[0m [36mstep[0m=[35m11424[0m
[2m2023-10-05 12:33:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_11424.pt[0m


Epoch 17/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:33:41[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=17 step=12138[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002289439449791147, 'time_algorithm_update': 0.0183391210411777, 'loss': -71.63706970081276, 'time_step': 0.01863511336617777, 'observation_error': 0.05609958078163641, 'reward_error': 4.25071086424277e-06, 'variance': 0.056458473101067726}[0m [36mstep[0m=[35m12138[0m
[2m2023-10-05 12:33:41[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_12138.pt[0m


Epoch 18/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:33:56[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=18 step=12852[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022175766172863188, 'time_algorithm_update': 0.01747185910115389, 'loss': -71.57995035668381, 'time_step': 0.01775696297653583, 'observation_error': 0.04544924543657633, 'reward_error': 4.387173313848222e-06, 'variance': 0.0547018677556376}[0m [36mstep[0m=[35m12852[0m
[2m2023-10-05 12:33:56[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_12852.pt[0m


Epoch 19/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:34:10[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=19 step=13566[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022637042678704783, 'time_algorithm_update': 0.017003613693707464, 'loss': -72.33470759805844, 'time_step': 0.01729642476688246, 'observation_error': 0.04012866238034057, 'reward_error': 4.260302571770667e-06, 'variance': 0.05198818944492348}[0m [36mstep[0m=[35m13566[0m
[2m2023-10-05 12:34:10[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_13566.pt[0m


Epoch 20/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:34:25[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=20 step=14280[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002261997938823967, 'time_algorithm_update': 0.016746024457680413, 'loss': -71.71944748320165, 'time_step': 0.017040297430770405, 'observation_error': 0.049252820492110286, 'reward_error': 5.029223848335946e-06, 'variance': 0.051687930988462964}[0m [36mstep[0m=[35m14280[0m
[2m2023-10-05 12:34:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_14280.pt[0m


Epoch 21/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:34:40[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=21 step=14994[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002335777469709808, 'time_algorithm_update': 0.01690368325102563, 'loss': -73.08708406229313, 'time_step': 0.017206001348522196, 'observation_error': 0.04701189917468812, 'reward_error': 3.1076656142566932e-06, 'variance': 0.05055155684346093}[0m [36mstep[0m=[35m14994[0m
[2m2023-10-05 12:34:40[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_14994.pt[0m


Epoch 22/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:34:54[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=22 step=15708[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022573397607028651, 'time_algorithm_update': 0.016996941312688404, 'loss': -72.66022231932781, 'time_step': 0.017289242156747344, 'observation_error': 0.04238573210357979, 'reward_error': 3.84341451406785e-06, 'variance': 0.050638568611213325}[0m [36mstep[0m=[35m15708[0m
[2m2023-10-05 12:34:54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_15708.pt[0m


Epoch 23/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:35:09[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=23 step=16422[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023560263529545119, 'time_algorithm_update': 0.017074301129295713, 'loss': -72.7840761016397, 'time_step': 0.017377966592291825, 'observation_error': 0.040720321878622685, 'reward_error': 3.8087345103528406e-06, 'variance': 0.04679388107035337}[0m [36mstep[0m=[35m16422[0m
[2m2023-10-05 12:35:09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_16422.pt[0m


Epoch 24/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:35:24[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=24 step=17136[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023304347564526297, 'time_algorithm_update': 0.017314573295977936, 'loss': -73.47641388174533, 'time_step': 0.01761361530848912, 'observation_error': 0.04079673984763863, 'reward_error': 4.245297407524299e-06, 'variance': 0.04637346206754161}[0m [36mstep[0m=[35m17136[0m
[2m2023-10-05 12:35:24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_17136.pt[0m


Epoch 25/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:35:38[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=25 step=17850[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021596515879911536, 'time_algorithm_update': 0.01619674611826237, 'loss': -74.01504946155708, 'time_step': 0.016476344327632786, 'observation_error': 0.04814372663881641, 'reward_error': 3.978657718353848e-06, 'variance': 0.04170339333545961}[0m [36mstep[0m=[35m17850[0m
[2m2023-10-05 12:35:38[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_17850.pt[0m


Epoch 26/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:35:52[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=26 step=18564[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002080692964441636, 'time_algorithm_update': 0.016260836972575896, 'loss': -73.45025333570165, 'time_step': 0.01653365828409916, 'observation_error': 0.049729228083182205, 'reward_error': 2.769420720958432e-06, 'variance': 0.04117703171342766}[0m [36mstep[0m=[35m18564[0m
[2m2023-10-05 12:35:52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_18564.pt[0m


Epoch 27/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:36:06[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=27 step=19278[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002149901136296804, 'time_algorithm_update': 0.01646064643432446, 'loss': -73.54408091056247, 'time_step': 0.016739038526177072, 'observation_error': 0.03646929194562691, 'reward_error': 6.330472198175123e-06, 'variance': 0.03802277356037146}[0m [36mstep[0m=[35m19278[0m
[2m2023-10-05 12:36:06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_19278.pt[0m


Epoch 28/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:36:20[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=28 step=19992[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021306172806341775, 'time_algorithm_update': 0.01623387864323891, 'loss': -74.12603686436886, 'time_step': 0.01651029727038215, 'observation_error': 0.04189044531006651, 'reward_error': 3.155126625723106e-06, 'variance': 0.03485220578269261}[0m [36mstep[0m=[35m19992[0m
[2m2023-10-05 12:36:20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_19992.pt[0m


Epoch 29/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:36:34[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=29 step=20706[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021558716183617, 'time_algorithm_update': 0.01616489319574265, 'loss': -74.32192659545012, 'time_step': 0.016443350401913085, 'observation_error': 0.042007223780223577, 'reward_error': 3.0531510231384502e-06, 'variance': 0.035017610895059864}[0m [36mstep[0m=[35m20706[0m
[2m2023-10-05 12:36:34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_20706.pt[0m


Epoch 30/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:36:48[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=30 step=21420[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021374225616455078, 'time_algorithm_update': 0.01619528722362358, 'loss': -74.20032144594593, 'time_step': 0.01647251794318191, 'observation_error': 0.04004469983710231, 'reward_error': 5.054518034829914e-06, 'variance': 0.03504122219093074}[0m [36mstep[0m=[35m21420[0m
[2m2023-10-05 12:36:48[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_21420.pt[0m


Epoch 31/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:37:02[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=31 step=22134[0m [36mepoch[0m=[35m31[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021768918558329094, 'time_algorithm_update': 0.016264143444242932, 'loss': -74.03444581873276, 'time_step': 0.016546641745153263, 'observation_error': 0.032388785282649726, 'reward_error': 1.21383368895198e-05, 'variance': 0.032357406238959295}[0m [36mstep[0m=[35m22134[0m
[2m2023-10-05 12:37:02[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_22134.pt[0m


Epoch 32/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:37:16[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=32 step=22848[0m [36mepoch[0m=[35m32[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002163428218424821, 'time_algorithm_update': 0.016272422646274085, 'loss': -75.01475412171094, 'time_step': 0.016554031719346673, 'observation_error': 0.0385410821165983, 'reward_error': 2.875985054829015e-06, 'variance': 0.030019434817710126}[0m [36mstep[0m=[35m22848[0m
[2m2023-10-05 12:37:16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_22848.pt[0m


Epoch 33/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:37:30[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=33 step=23562[0m [36mepoch[0m=[35m33[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021072629453087387, 'time_algorithm_update': 0.016264699754260835, 'loss': -74.30530931702515, 'time_step': 0.016537836619785855, 'observation_error': 0.029406170489246482, 'reward_error': 3.142838558411659e-06, 'variance': 0.028172071624734146}[0m [36mstep[0m=[35m23562[0m
[2m2023-10-05 12:37:30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_23562.pt[0m


Epoch 34/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:37:44[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=34 step=24276[0m [36mepoch[0m=[35m34[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021302900394471754, 'time_algorithm_update': 0.01627450096173113, 'loss': -74.39636544486721, 'time_step': 0.01655160545968876, 'observation_error': 0.029997318925379518, 'reward_error': 6.312424091235011e-06, 'variance': 0.027808331965854322}[0m [36mstep[0m=[35m24276[0m
[2m2023-10-05 12:37:44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_24276.pt[0m


Epoch 35/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:37:58[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=35 step=24990[0m [36mepoch[0m=[35m35[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002144127666783266, 'time_algorithm_update': 0.01641297240217193, 'loss': -74.21868055808444, 'time_step': 0.016691316075685646, 'observation_error': 0.032318727836342744, 'reward_error': 3.274614424559911e-06, 'variance': 0.02649608679836469}[0m [36mstep[0m=[35m24990[0m
[2m2023-10-05 12:37:58[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_24990.pt[0m


Epoch 36/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:38:12[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=36 step=25704[0m [36mepoch[0m=[35m36[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021441443627621948, 'time_algorithm_update': 0.01651479683670343, 'loss': -75.78410187889548, 'time_step': 0.016792328751721635, 'observation_error': 0.026930346197257753, 'reward_error': 3.11534796520015e-06, 'variance': 0.023248255926155224}[0m [36mstep[0m=[35m25704[0m
[2m2023-10-05 12:38:12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_25704.pt[0m


Epoch 37/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:38:26[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=37 step=26418[0m [36mepoch[0m=[35m37[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00020660739652916832, 'time_algorithm_update': 0.01638871915534097, 'loss': -75.78281510644267, 'time_step': 0.01665739819449203, 'observation_error': 0.02723098026206033, 'reward_error': 2.873049714393601e-06, 'variance': 0.021908806452294077}[0m [36mstep[0m=[35m26418[0m
[2m2023-10-05 12:38:26[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_26418.pt[0m


Epoch 38/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:38:40[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=38 step=27132[0m [36mepoch[0m=[35m38[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002129395134976598, 'time_algorithm_update': 0.016657880708283068, 'loss': -74.11380171575466, 'time_step': 0.01693870507034601, 'observation_error': 0.026192768334730318, 'reward_error': 2.062701623813166e-06, 'variance': 0.021139662555466277}[0m [36mstep[0m=[35m27132[0m
[2m2023-10-05 12:38:40[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_27132.pt[0m


Epoch 39/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:38:54[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=39 step=27846[0m [36mepoch[0m=[35m39[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002157317490136924, 'time_algorithm_update': 0.016565525565160756, 'loss': -75.38013520227427, 'time_step': 0.0168438345110383, 'observation_error': 0.02611207030154128, 'reward_error': 2.8708947190322782e-06, 'variance': 0.02017000451486716}[0m [36mstep[0m=[35m27846[0m
[2m2023-10-05 12:38:54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_27846.pt[0m


Epoch 40/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:39:08[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=40 step=28560[0m [36mepoch[0m=[35m40[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021147327262814304, 'time_algorithm_update': 0.01640552065285648, 'loss': -75.55226380618012, 'time_step': 0.01667952737888368, 'observation_error': 0.02644846897253409, 'reward_error': 2.2158517453079494e-06, 'variance': 0.019291618314382867}[0m [36mstep[0m=[35m28560[0m
[2m2023-10-05 12:39:08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_28560.pt[0m


Epoch 41/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:39:22[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=41 step=29274[0m [36mepoch[0m=[35m41[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000215301326676911, 'time_algorithm_update': 0.01644575629247671, 'loss': -75.4419579225428, 'time_step': 0.016724736082787607, 'observation_error': 0.02663549863363847, 'reward_error': 2.8958399114055415e-06, 'variance': 0.01880939815681343}[0m [36mstep[0m=[35m29274[0m
[2m2023-10-05 12:39:22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_29274.pt[0m


Epoch 42/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:39:36[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=42 step=29988[0m [36mepoch[0m=[35m42[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021605865628111597, 'time_algorithm_update': 0.016572038666540834, 'loss': -75.61549258566036, 'time_step': 0.016853656421522468, 'observation_error': 0.022346574469734825, 'reward_error': 3.4954183126185034e-06, 'variance': 0.017382477575744293}[0m [36mstep[0m=[35m29988[0m
[2m2023-10-05 12:39:36[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_29988.pt[0m


Epoch 43/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:39:50[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=43 step=30702[0m [36mepoch[0m=[35m43[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021578050127216413, 'time_algorithm_update': 0.016290479681405033, 'loss': -76.72717630295526, 'time_step': 0.01656977402395895, 'observation_error': 0.02629461394546749, 'reward_error': 4.329921103231683e-06, 'variance': 0.016362924097959322}[0m [36mstep[0m=[35m30702[0m
[2m2023-10-05 12:39:50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_30702.pt[0m


Epoch 44/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:40:04[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=44 step=31416[0m [36mepoch[0m=[35m44[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002255746964313069, 'time_algorithm_update': 0.016541683039411444, 'loss': -76.40255104727437, 'time_step': 0.016835982392148143, 'observation_error': 0.02568949414418305, 'reward_error': 1.8550724555803353e-06, 'variance': 0.016417505208798762}[0m [36mstep[0m=[35m31416[0m
[2m2023-10-05 12:40:04[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_31416.pt[0m


Epoch 45/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:40:18[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=45 step=32130[0m [36mepoch[0m=[35m45[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002103108985751283, 'time_algorithm_update': 0.01620899862935897, 'loss': -75.29743326010825, 'time_step': 0.01648417908270486, 'observation_error': 0.022356621603438567, 'reward_error': 2.693971243965582e-06, 'variance': 0.015543689310296037}[0m [36mstep[0m=[35m32130[0m
[2m2023-10-05 12:40:18[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_32130.pt[0m


Epoch 46/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:40:31[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=46 step=32844[0m [36mepoch[0m=[35m46[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021472431364513578, 'time_algorithm_update': 0.016005222536936526, 'loss': -76.51656133053349, 'time_step': 0.016283612291352088, 'observation_error': 0.02059857519079226, 'reward_error': 2.253576553199603e-06, 'variance': 0.01484842741781107}[0m [36mstep[0m=[35m32844[0m
[2m2023-10-05 12:40:31[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_32844.pt[0m


Epoch 47/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:40:45[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=47 step=33558[0m [36mepoch[0m=[35m47[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021133002112893497, 'time_algorithm_update': 0.016318374655159916, 'loss': -76.06560798986953, 'time_step': 0.01659319147008474, 'observation_error': 0.021743190074323335, 'reward_error': 2.572737774921682e-06, 'variance': 0.015304662329173028}[0m [36mstep[0m=[35m33558[0m
[2m2023-10-05 12:40:45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_33558.pt[0m


Epoch 48/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:41:00[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=48 step=34272[0m [36mepoch[0m=[35m48[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002359365548740248, 'time_algorithm_update': 0.017493025929320092, 'loss': -76.10831816189763, 'time_step': 0.01779834462814972, 'observation_error': 0.02056339575824923, 'reward_error': 3.97787499082728e-06, 'variance': 0.015351990792291529}[0m [36mstep[0m=[35m34272[0m
[2m2023-10-05 12:41:00[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_34272.pt[0m


Epoch 49/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:41:14[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=49 step=34986[0m [36mepoch[0m=[35m49[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022647761497176995, 'time_algorithm_update': 0.016964771834408202, 'loss': -76.47905989922062, 'time_step': 0.017258679833398814, 'observation_error': 0.021039403251736494, 'reward_error': 2.3362173672944225e-06, 'variance': 0.013589896175565297}[0m [36mstep[0m=[35m34986[0m
[2m2023-10-05 12:41:14[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_34986.pt[0m


Epoch 50/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:41:28[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=50 step=35700[0m [36mepoch[0m=[35m50[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022311270737848362, 'time_algorithm_update': 0.016672806245606152, 'loss': -76.68448335984174, 'time_step': 0.016963750374417344, 'observation_error': 0.020393424915722026, 'reward_error': 1.983265340147757e-06, 'variance': 0.013707954754530606}[0m [36mstep[0m=[35m35700[0m
[2m2023-10-05 12:41:28[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_35700.pt[0m


Epoch 51/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:41:42[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=51 step=36414[0m [36mepoch[0m=[35m51[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022217806647805606, 'time_algorithm_update': 0.01617853681580359, 'loss': -76.64626911441151, 'time_step': 0.016467573262062392, 'observation_error': 0.020278522340812018, 'reward_error': 2.370829265393658e-06, 'variance': 0.01284082849733064}[0m [36mstep[0m=[35m36414[0m
[2m2023-10-05 12:41:42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_36414.pt[0m


Epoch 52/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:41:55[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=52 step=37128[0m [36mepoch[0m=[35m52[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021750886901086117, 'time_algorithm_update': 0.016306746573675247, 'loss': -77.17091026092444, 'time_step': 0.01659021123784597, 'observation_error': 0.0196822195214882, 'reward_error': 1.5994261852138785e-06, 'variance': 0.013281140298757807}[0m [36mstep[0m=[35m37128[0m
[2m2023-10-05 12:41:55[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_37128.pt[0m


Epoch 53/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:42:09[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=53 step=37842[0m [36mepoch[0m=[35m53[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022666260641829975, 'time_algorithm_update': 0.016452739552623417, 'loss': -76.80702762710614, 'time_step': 0.016745866513719745, 'observation_error': 0.019266720036326034, 'reward_error': 2.4139408526493052e-06, 'variance': 0.012870276261533467}[0m [36mstep[0m=[35m37842[0m
[2m2023-10-05 12:42:09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_37842.pt[0m


Epoch 54/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:42:22[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=54 step=38556[0m [36mepoch[0m=[35m54[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021566062414345622, 'time_algorithm_update': 0.01581868938371247, 'loss': -76.46605131005039, 'time_step': 0.016099246610112552, 'observation_error': 0.02161512328644021, 'reward_error': 3.0822212093431e-06, 'variance': 0.013021171031489684}[0m [36mstep[0m=[35m38556[0m
[2m2023-10-05 12:42:22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_38556.pt[0m


Epoch 55/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:42:36[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=55 step=39270[0m [36mepoch[0m=[35m55[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022976004442914862, 'time_algorithm_update': 0.01632099358641467, 'loss': -76.78960906758027, 'time_step': 0.016617190604116403, 'observation_error': 0.019493534011696668, 'reward_error': 2.513564979546373e-06, 'variance': 0.012627414997263362}[0m [36mstep[0m=[35m39270[0m
[2m2023-10-05 12:42:36[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_39270.pt[0m


Epoch 56/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:42:50[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=56 step=39984[0m [36mepoch[0m=[35m56[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023377041856781776, 'time_algorithm_update': 0.016464915596136526, 'loss': -77.82413045899207, 'time_step': 0.016765013128435577, 'observation_error': 0.018522049127317405, 'reward_error': 2.274611093765323e-06, 'variance': 0.011263977479399313}[0m [36mstep[0m=[35m39984[0m
[2m2023-10-05 12:42:50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_39984.pt[0m


Epoch 57/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:43:03[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=57 step=40698[0m [36mepoch[0m=[35m57[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002266302162191781, 'time_algorithm_update': 0.016121474300779884, 'loss': -78.03053409779439, 'time_step': 0.016412965723780357, 'observation_error': 0.018938151401788467, 'reward_error': 1.5049825195794953e-06, 'variance': 0.01078500612779959}[0m [36mstep[0m=[35m40698[0m
[2m2023-10-05 12:43:03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_40698.pt[0m


Epoch 58/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:43:17[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=58 step=41412[0m [36mepoch[0m=[35m58[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023008327858120787, 'time_algorithm_update': 0.01641933023094797, 'loss': -77.52345533397686, 'time_step': 0.016717924457304283, 'observation_error': 0.015482305564596803, 'reward_error': 1.8234888972061768e-06, 'variance': 0.010929776902801329}[0m [36mstep[0m=[35m41412[0m
[2m2023-10-05 12:43:17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_41412.pt[0m


Epoch 59/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:43:30[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=59 step=42126[0m [36mepoch[0m=[35m59[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023631154656076297, 'time_algorithm_update': 0.016669830020402326, 'loss': -77.70688045726104, 'time_step': 0.016974506925801935, 'observation_error': 0.018251278624144978, 'reward_error': 2.084777153529339e-06, 'variance': 0.010267265763158312}[0m [36mstep[0m=[35m42126[0m
[2m2023-10-05 12:43:30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_42126.pt[0m


Epoch 60/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:43:44[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=60 step=42840[0m [36mepoch[0m=[35m60[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022799795081301563, 'time_algorithm_update': 0.016135342314797622, 'loss': -77.20893096389557, 'time_step': 0.016434562640363762, 'observation_error': 0.019167860474066326, 'reward_error': 3.1429740404025466e-06, 'variance': 0.009795847857992816}[0m [36mstep[0m=[35m42840[0m
[2m2023-10-05 12:43:44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_42840.pt[0m


Epoch 61/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:43:58[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=61 step=43554[0m [36mepoch[0m=[35m61[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024014026844868807, 'time_algorithm_update': 0.016577291555431375, 'loss': -77.04296860307538, 'time_step': 0.01688768282657912, 'observation_error': 0.01690671118732684, 'reward_error': 1.9462299591421068e-06, 'variance': 0.009622712160464163}[0m [36mstep[0m=[35m43554[0m
[2m2023-10-05 12:43:58[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_43554.pt[0m


Epoch 62/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:44:11[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=62 step=44268[0m [36mepoch[0m=[35m62[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023419082331724194, 'time_algorithm_update': 0.01670129192309553, 'loss': -77.69781381943646, 'time_step': 0.017006293398325517, 'observation_error': 0.018537903609062833, 'reward_error': 2.8699874539973586e-06, 'variance': 0.010566379363251013}[0m [36mstep[0m=[35m44268[0m
[2m2023-10-05 12:44:11[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_44268.pt[0m


Epoch 63/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:44:25[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=63 step=44982[0m [36mepoch[0m=[35m63[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000232066093038778, 'time_algorithm_update': 0.01620288389403613, 'loss': -78.5877655734535, 'time_step': 0.01650191254976417, 'observation_error': 0.019961272545732413, 'reward_error': 1.5773823997510905e-06, 'variance': 0.00943464585892073}[0m [36mstep[0m=[35m44982[0m
[2m2023-10-05 12:44:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_44982.pt[0m


Epoch 64/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:44:38[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=64 step=45696[0m [36mepoch[0m=[35m64[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022413550304765462, 'time_algorithm_update': 0.016004155663882986, 'loss': -77.56028122995414, 'time_step': 0.016295965980080998, 'observation_error': 0.014882342747966643, 'reward_error': 2.8950212482144715e-06, 'variance': 0.010194716473794455}[0m [36mstep[0m=[35m45696[0m
[2m2023-10-05 12:44:38[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_45696.pt[0m


Epoch 65/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:44:51[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=65 step=46410[0m [36mepoch[0m=[35m65[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022316713626979113, 'time_algorithm_update': 0.016324722132429022, 'loss': -76.41564191422877, 'time_step': 0.016612953832503462, 'observation_error': 0.019752353406870024, 'reward_error': 2.537831654999758e-06, 'variance': 0.010689084616775386}[0m [36mstep[0m=[35m46410[0m
[2m2023-10-05 12:44:51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_46410.pt[0m


Epoch 66/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:45:05[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=66 step=47124[0m [36mepoch[0m=[35m66[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021632946505933918, 'time_algorithm_update': 0.016376675677900555, 'loss': -78.91433167657932, 'time_step': 0.016658500463020903, 'observation_error': 0.018296415540203177, 'reward_error': 1.9420042265126206e-06, 'variance': 0.00905311269540896}[0m [36mstep[0m=[35m47124[0m
[2m2023-10-05 12:45:05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_47124.pt[0m


Epoch 67/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:45:18[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=67 step=47838[0m [36mepoch[0m=[35m67[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022352008926434342, 'time_algorithm_update': 0.01605958965312199, 'loss': -79.05060414439824, 'time_step': 0.01634905952699378, 'observation_error': 0.015048839439290949, 'reward_error': 1.5348983301985492e-06, 'variance': 0.008500221630536542}[0m [36mstep[0m=[35m47838[0m
[2m2023-10-05 12:45:18[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_47838.pt[0m


Epoch 68/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:45:32[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=68 step=48552[0m [36mepoch[0m=[35m68[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023533349611512086, 'time_algorithm_update': 0.016507528743156197, 'loss': -78.30375722676766, 'time_step': 0.0168139263361442, 'observation_error': 0.01675109629936561, 'reward_error': 2.0474697413930492e-06, 'variance': 0.009000101695100595}[0m [36mstep[0m=[35m48552[0m
[2m2023-10-05 12:45:32[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_48552.pt[0m


Epoch 69/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:45:46[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=69 step=49266[0m [36mepoch[0m=[35m69[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023703381460921772, 'time_algorithm_update': 0.01695728535745658, 'loss': -77.54072724670922, 'time_step': 0.01726441857527618, 'observation_error': 0.01782141255307053, 'reward_error': 2.890441270262535e-06, 'variance': 0.009288005055348532}[0m [36mstep[0m=[35m49266[0m
[2m2023-10-05 12:45:46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_49266.pt[0m


Epoch 70/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:46:02[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=70 step=49980[0m [36mepoch[0m=[35m70[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00027451996041947055, 'time_algorithm_update': 0.02008881822687571, 'loss': -78.50018071727592, 'time_step': 0.020458640838537562, 'observation_error': 0.016199060753453065, 'reward_error': 2.518076181966211e-06, 'variance': 0.00873061809367534}[0m [36mstep[0m=[35m49980[0m
[2m2023-10-05 12:46:02[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_49980.pt[0m


Epoch 71/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:46:15[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=71 step=50694[0m [36mepoch[0m=[35m71[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023603940210422547, 'time_algorithm_update': 0.016377193253247345, 'loss': -78.2824741331469, 'time_step': 0.016683150740230784, 'observation_error': 0.01619817007854234, 'reward_error': 8.103917418296696e-06, 'variance': 0.00854030706713959}[0m [36mstep[0m=[35m50694[0m
[2m2023-10-05 12:46:15[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_50694.pt[0m


Epoch 72/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:46:30[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=72 step=51408[0m [36mepoch[0m=[35m72[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002458723319344828, 'time_algorithm_update': 0.01719213934505687, 'loss': -78.39700589460485, 'time_step': 0.017510574071013293, 'observation_error': 0.0163221517918744, 'reward_error': 1.4225600439535162e-06, 'variance': 0.008563098296475611}[0m [36mstep[0m=[35m51408[0m
[2m2023-10-05 12:46:30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_51408.pt[0m


Epoch 73/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:46:44[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=73 step=52122[0m [36mepoch[0m=[35m73[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024126090255438112, 'time_algorithm_update': 0.01709004243214925, 'loss': -78.47899282212352, 'time_step': 0.01740172129719197, 'observation_error': 0.017684245628561246, 'reward_error': 2.1209398822120457e-06, 'variance': 0.008012601092357517}[0m [36mstep[0m=[35m52122[0m
[2m2023-10-05 12:46:44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_52122.pt[0m


Epoch 74/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:46:57[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=74 step=52836[0m [36mepoch[0m=[35m74[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023687587064855238, 'time_algorithm_update': 0.016838916209565492, 'loss': -78.78852708533364, 'time_step': 0.017144798564643755, 'observation_error': 0.01624529582813717, 'reward_error': 2.41500740907448e-06, 'variance': 0.00835009856851653}[0m [36mstep[0m=[35m52836[0m
[2m2023-10-05 12:46:57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_52836.pt[0m


Epoch 75/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:47:11[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=75 step=53550[0m [36mepoch[0m=[35m75[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002408662096125071, 'time_algorithm_update': 0.016862872601891097, 'loss': -78.12212290884067, 'time_step': 0.017174381167948748, 'observation_error': 0.016728301128113097, 'reward_error': 2.026293624769897e-06, 'variance': 0.008645790272731272}[0m [36mstep[0m=[35m53550[0m
[2m2023-10-05 12:47:11[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_53550.pt[0m


Epoch 76/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:47:25[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=76 step=54264[0m [36mepoch[0m=[35m76[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023701344551492473, 'time_algorithm_update': 0.01673784776895988, 'loss': -78.73977236387108, 'time_step': 0.017043178822813917, 'observation_error': 0.017508267385460356, 'reward_error': 2.882786798840963e-06, 'variance': 0.008896990948616837}[0m [36mstep[0m=[35m54264[0m
[2m2023-10-05 12:47:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_54264.pt[0m


Epoch 77/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:47:38[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=77 step=54978[0m [36mepoch[0m=[35m77[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002423194276184595, 'time_algorithm_update': 0.016778624692216974, 'loss': -78.78789460959555, 'time_step': 0.017092736829228763, 'observation_error': 0.01517903804367111, 'reward_error': 2.4455945477296987e-06, 'variance': 0.007984540676240609}[0m [36mstep[0m=[35m54978[0m
[2m2023-10-05 12:47:38[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_54978.pt[0m


Epoch 78/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:47:52[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=78 step=55692[0m [36mepoch[0m=[35m78[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024170100855894116, 'time_algorithm_update': 0.01718287140715356, 'loss': -78.93760275106136, 'time_step': 0.01749613004572251, 'observation_error': 0.015336705419129579, 'reward_error': 2.469719698351893e-06, 'variance': 0.007695476338529171}[0m [36mstep[0m=[35m55692[0m
[2m2023-10-05 12:47:52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_55692.pt[0m


Epoch 79/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:48:06[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=79 step=56406[0m [36mepoch[0m=[35m79[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024723004894096313, 'time_algorithm_update': 0.01709637287951985, 'loss': -79.07082426915316, 'time_step': 0.017415881490840966, 'observation_error': 0.015900312423261016, 'reward_error': 1.430960771997785e-06, 'variance': 0.008086038100376744}[0m [36mstep[0m=[35m56406[0m
[2m2023-10-05 12:48:06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_56406.pt[0m


Epoch 80/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:48:20[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=80 step=57120[0m [36mepoch[0m=[35m80[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024175911056561295, 'time_algorithm_update': 0.01686070646558489, 'loss': -78.56341090322543, 'time_step': 0.017171605294492066, 'observation_error': 0.015684285351670435, 'reward_error': 2.122548374688458e-06, 'variance': 0.00846329004954683}[0m [36mstep[0m=[35m57120[0m
[2m2023-10-05 12:48:20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_57120.pt[0m


Epoch 81/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:48:34[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=81 step=57834[0m [36mepoch[0m=[35m81[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002313982538816308, 'time_algorithm_update': 0.016288508554132712, 'loss': -79.66959989104285, 'time_step': 0.016589258899207877, 'observation_error': 0.014511607890643463, 'reward_error': 1.7678251515762267e-06, 'variance': 0.007388464638054373}[0m [36mstep[0m=[35m57834[0m
[2m2023-10-05 12:48:34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_57834.pt[0m


Epoch 82/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:48:47[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=82 step=58548[0m [36mepoch[0m=[35m82[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023497252905068278, 'time_algorithm_update': 0.016267012815181614, 'loss': -79.74715666677437, 'time_step': 0.016566527991735636, 'observation_error': 0.015591239481739138, 'reward_error': 2.975359745506832e-06, 'variance': 0.007325885432354761}[0m [36mstep[0m=[35m58548[0m
[2m2023-10-05 12:48:47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_58548.pt[0m


Epoch 83/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:49:00[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=83 step=59262[0m [36mepoch[0m=[35m83[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023550947173302914, 'time_algorithm_update': 0.016180071176267137, 'loss': -79.89872944054483, 'time_step': 0.016482294774522968, 'observation_error': 0.015802272085047545, 'reward_error': 1.775768903740763e-06, 'variance': 0.007909351852846167}[0m [36mstep[0m=[35m59262[0m
[2m2023-10-05 12:49:00[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_59262.pt[0m


Epoch 84/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:49:14[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=84 step=59976[0m [36mepoch[0m=[35m84[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002510778042448669, 'time_algorithm_update': 0.017126402935060132, 'loss': -79.19671884905391, 'time_step': 0.017452708479403115, 'observation_error': 0.014685318419531366, 'reward_error': 1.8960864776582123e-06, 'variance': 0.0076150486623353065}[0m [36mstep[0m=[35m59976[0m
[2m2023-10-05 12:49:14[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_59976.pt[0m


Epoch 85/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:49:27[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=85 step=60690[0m [36mepoch[0m=[35m85[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002365352726784073, 'time_algorithm_update': 0.016119910889313, 'loss': -79.22092560893681, 'time_step': 0.016425359482858694, 'observation_error': 0.0171335628405639, 'reward_error': 2.1422085584822175e-06, 'variance': 0.00826891757610872}[0m [36mstep[0m=[35m60690[0m
[2m2023-10-05 12:49:27[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_60690.pt[0m


Epoch 86/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:49:40[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=86 step=61404[0m [36mepoch[0m=[35m86[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022047440878817347, 'time_algorithm_update': 0.015580297184257614, 'loss': -79.72745299138943, 'time_step': 0.015866620200020925, 'observation_error': 0.016029088936606272, 'reward_error': 1.2596908093059839e-06, 'variance': 0.007562115165665633}[0m [36mstep[0m=[35m61404[0m
[2m2023-10-05 12:49:40[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_61404.pt[0m


Epoch 87/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:49:54[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=87 step=62118[0m [36mepoch[0m=[35m87[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024353623056278176, 'time_algorithm_update': 0.017320152090377166, 'loss': -79.13505112891103, 'time_step': 0.017637880576424906, 'observation_error': 0.015179924858485886, 'reward_error': 1.7666978998525832e-06, 'variance': 0.007154306780747073}[0m [36mstep[0m=[35m62118[0m
[2m2023-10-05 12:49:54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_62118.pt[0m


Epoch 88/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:50:08[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=88 step=62832[0m [36mepoch[0m=[35m88[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002442554933350293, 'time_algorithm_update': 0.01709284936012674, 'loss': -79.33413584492787, 'time_step': 0.017406699370269348, 'observation_error': 0.014047592756916031, 'reward_error': 3.3799284256084603e-06, 'variance': 0.007247884643488739}[0m [36mstep[0m=[35m62832[0m
[2m2023-10-05 12:50:08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_62832.pt[0m


Epoch 89/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:50:22[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=89 step=63546[0m [36mepoch[0m=[35m89[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002452388864939286, 'time_algorithm_update': 0.01747983009541402, 'loss': -80.4624749739297, 'time_step': 0.017797386946798374, 'observation_error': 0.015826736389888223, 'reward_error': 2.0053997058323712e-06, 'variance': 0.006874074856681215}[0m [36mstep[0m=[35m63546[0m
[2m2023-10-05 12:50:22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_63546.pt[0m


Epoch 90/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:50:35[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=90 step=64260[0m [36mepoch[0m=[35m90[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002262912878469259, 'time_algorithm_update': 0.015939226003588082, 'loss': -80.30811104654263, 'time_step': 0.016234556833902996, 'observation_error': 0.015568827838140238, 'reward_error': 2.664093284017878e-06, 'variance': 0.007215649545478027}[0m [36mstep[0m=[35m64260[0m
[2m2023-10-05 12:50:35[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_64260.pt[0m


Epoch 91/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:50:48[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=91 step=64974[0m [36mepoch[0m=[35m91[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022546016201585613, 'time_algorithm_update': 0.015708894288840414, 'loss': -79.11042346740638, 'time_step': 0.01600364476692777, 'observation_error': 0.015549438328627223, 'reward_error': 3.4215434692173446e-06, 'variance': 0.007347517610973296}[0m [36mstep[0m=[35m64974[0m
[2m2023-10-05 12:50:48[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_64974.pt[0m


Epoch 92/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:51:02[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=92 step=65688[0m [36mepoch[0m=[35m92[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023148240161543132, 'time_algorithm_update': 0.016370334545103442, 'loss': -80.60691196644673, 'time_step': 0.016668501354399182, 'observation_error': 0.014919289639788115, 'reward_error': 2.267097398635688e-06, 'variance': 0.0070639550831040005}[0m [36mstep[0m=[35m65688[0m
[2m2023-10-05 12:51:02[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_65688.pt[0m


Epoch 93/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:51:15[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=93 step=66402[0m [36mepoch[0m=[35m93[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002480291184924898, 'time_algorithm_update': 0.017036486072700564, 'loss': -80.40199200846568, 'time_step': 0.01735639705711386, 'observation_error': 0.01519796407351046, 'reward_error': 1.3399123942597614e-06, 'variance': 0.006946460988844214}[0m [36mstep[0m=[35m66402[0m
[2m2023-10-05 12:51:15[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_66402.pt[0m


Epoch 94/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:51:29[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=94 step=67116[0m [36mepoch[0m=[35m94[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002443927342818231, 'time_algorithm_update': 0.017069567151430275, 'loss': -80.41878695581474, 'time_step': 0.017385719537067145, 'observation_error': 0.016472419438035752, 'reward_error': 1.3614000374989156e-06, 'variance': 0.007594378778274058}[0m [36mstep[0m=[35m67116[0m
[2m2023-10-05 12:51:29[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_67116.pt[0m


Epoch 95/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:51:44[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=95 step=67830[0m [36mepoch[0m=[35m95[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025253402752702645, 'time_algorithm_update': 0.01749642055575587, 'loss': -80.03199697809727, 'time_step': 0.017825508985866687, 'observation_error': 0.015566311467913131, 'reward_error': 1.2588770264304564e-06, 'variance': 0.006579969513873669}[0m [36mstep[0m=[35m67830[0m
[2m2023-10-05 12:51:44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_67830.pt[0m


Epoch 96/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:51:57[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=96 step=68544[0m [36mepoch[0m=[35m96[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023655898096848601, 'time_algorithm_update': 0.01677144575519722, 'loss': -80.30141677375602, 'time_step': 0.017077144454507268, 'observation_error': 0.01591429850310576, 'reward_error': 1.4405688354719485e-06, 'variance': 0.006579039434034816}[0m [36mstep[0m=[35m68544[0m
[2m2023-10-05 12:51:57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_68544.pt[0m


Epoch 97/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:52:11[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=97 step=69258[0m [36mepoch[0m=[35m97[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002484188026406852, 'time_algorithm_update': 0.01718059374170811, 'loss': -79.82956415865601, 'time_step': 0.017502328594859576, 'observation_error': 0.013910792822967854, 'reward_error': 1.4580885698478044e-06, 'variance': 0.006450631550044991}[0m [36mstep[0m=[35m69258[0m
[2m2023-10-05 12:52:11[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_69258.pt[0m


Epoch 98/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:52:25[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=98 step=69972[0m [36mepoch[0m=[35m98[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023033438610429524, 'time_algorithm_update': 0.016273682190924465, 'loss': -81.58023232104732, 'time_step': 0.016572417999182094, 'observation_error': 0.016851665525462366, 'reward_error': 4.756159831007257e-06, 'variance': 0.006372198585084434}[0m [36mstep[0m=[35m69972[0m
[2m2023-10-05 12:52:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_69972.pt[0m


Epoch 99/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:52:39[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=99 step=70686[0m [36mepoch[0m=[35m99[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023693998320763852, 'time_algorithm_update': 0.016946705449529056, 'loss': -80.75887435891715, 'time_step': 0.017254880162514225, 'observation_error': 0.014954102141352175, 'reward_error': 1.7022978805562742e-06, 'variance': 0.006158355667577105}[0m [36mstep[0m=[35m70686[0m
[2m2023-10-05 12:52:39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_70686.pt[0m


Epoch 100/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-05 12:52:53[0m [[32m[1minfo     [0m] [1mProbabilisticEnsembleDynamics_20231005122937: epoch=100 step=71400[0m [36mepoch[0m=[35m100[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025468380177388336, 'time_algorithm_update': 0.017397066792186236, 'loss': -80.16371313196605, 'time_step': 0.01772602132054604, 'observation_error': 0.016184192456031955, 'reward_error': 1.3152433183148368e-06, 'variance': 0.006597765055720612}[0m [36mstep[0m=[35m71400[0m
[2m2023-10-05 12:52:53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/ProbabilisticEnsembleDynamics_20231005122937/model_71400.pt[0m


[(1,
  {'time_sample_batch': 0.00023564370740361575,
   'time_algorithm_update': 0.013745944373080042,
   'loss': -22.885445690687735,
   'time_step': 0.014056795117567902,
   'observation_error': 0.03438636513912763,
   'reward_error': 0.00039823976651985073,
   'variance': 0.029693792782415095}),
 (2,
  {'time_sample_batch': 0.00023534985817447095,
   'time_algorithm_update': 0.014228190694536482,
   'loss': -33.198769204422874,
   'time_step': 0.014536314985665287,
   'observation_error': 0.020555156495159713,
   'reward_error': 0.0002807210454080229,
   'variance': 0.015802806306843786}),
 (3,
  {'time_sample_batch': 0.0002437102026632186,
   'time_algorithm_update': 0.01521063418615432,
   'loss': -41.05668649686819,
   'time_step': 0.015527376273766953,
   'observation_error': 0.013155272969279555,
   'reward_error': 4.632182273780113e-05,
   'variance': 0.006468071347963596}),
 (4,
  {'time_sample_batch': 0.000223257294556006,
   'time_algorithm_update': 0.015178150823470257,
  

In [8]:
def experiment_dynamics_training(dataset, n_runs, experiment_name, seed=1, use_gpu=True):
    for i in range(n_runs):
        for encoder_factory in ['default', 'inverted_pendulum']:
            # use the same seeds for default and symmetric runs
            train_episodes, test_episodes = train_test_split(dataset, random_state=seed+i)
            dynamics = d3rlpy.dynamics.ProbabilisticEnsembleDynamics(learning_rate=1e-4, use_gpu=use_gpu, encoder_factory=encoder_factory)
            dynamics.fit(train_episodes,
                 eval_episodes=test_episodes,
                 n_epochs=100,
                 scorers={
                    'observation_error': d3rlpy.metrics.scorer.dynamics_observation_prediction_error_scorer,
                    'reward_error': d3rlpy.metrics.scorer.dynamics_reward_prediction_error_scorer,
                    'variance': d3rlpy.metrics.scorer.dynamics_prediction_variance_scorer,
                 },
                tensorboard_dir='tensorboard_logs/dynamics',
                experiment_name=experiment_name)

In [None]:
experiment_dynamics_training(dataset=dataset, n_runs=3, experiment_name="exp_0", use_gpu=False)

[2m2023-10-09 13:56:25[0m [[32m[1mdebug    [0m] [1mRoundIterator is selected.[0m
[2m2023-10-09 13:56:25[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/exp_0_20231009135625[0m
[2m2023-10-09 13:56:25[0m [[32m[1mdebug    [0m] [1mBuilding models...[0m
[2m2023-10-09 13:56:25[0m [[32m[1mdebug    [0m] [1mModels have been built.[0m
[2m2023-10-09 13:56:25[0m [[32m[1minfo     [0m] [1mParameters are saved to d3rlpy_logs/exp_0_20231009135625/params.json[0m [36mparams[0m=[35m{'action_scaler': None, 'batch_size': 100, 'discrete_action': False, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 1.0, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_ensembles': 5, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0.0001, 'amsgrad': False}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': Non

Epoch 1/100:   0%|          | 0/714 [00:00<?, ?it/s]

  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[2m2023-10-09 13:56:36[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=1 step=714[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022399592466381082, 'time_algorithm_update': 0.012846483569853111, 'loss': -22.27442910194303, 'time_step': 0.013141517545662675, 'observation_error': 0.04477599714067544, 'reward_error': 0.0010903494126543256, 'variance': 0.04688352553434187}[0m [36mstep[0m=[35m714[0m
[2m2023-10-09 13:56:36[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_714.pt[0m


Epoch 2/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 13:56:47[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=2 step=1428[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022749940888220523, 'time_algorithm_update': 0.013856105777729793, 'loss': -34.00801589575802, 'time_step': 0.014152349544172528, 'observation_error': 0.02015534837504541, 'reward_error': 0.0001963458263788712, 'variance': 0.01797597975578639}[0m [36mstep[0m=[35m1428[0m
[2m2023-10-09 13:56:47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_1428.pt[0m


Epoch 3/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 13:56:59[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=3 step=2142[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024242494620528874, 'time_algorithm_update': 0.01456127881335945, 'loss': -42.65997984496151, 'time_step': 0.014875620019202139, 'observation_error': 0.011608545848916229, 'reward_error': 2.816027656768668e-05, 'variance': 0.004355415163728301}[0m [36mstep[0m=[35m2142[0m
[2m2023-10-09 13:56:59[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_2142.pt[0m


Epoch 4/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 13:57:11[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=4 step=2856[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023726755831421926, 'time_algorithm_update': 0.01499322816437366, 'loss': -50.69236901093598, 'time_step': 0.015301583861770416, 'observation_error': 0.010060698007802165, 'reward_error': 1.861307149818435e-05, 'variance': 0.002342244876274044}[0m [36mstep[0m=[35m2856[0m
[2m2023-10-09 13:57:11[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_2856.pt[0m


Epoch 5/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 13:57:24[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=5 step=3570[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023892513510225867, 'time_algorithm_update': 0.015399594266875451, 'loss': -56.07048288146321, 'time_step': 0.015708596098656748, 'observation_error': 0.012082262957869266, 'reward_error': 1.4934183954732225e-05, 'variance': 0.003969089079643925}[0m [36mstep[0m=[35m3570[0m
[2m2023-10-09 13:57:24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_3570.pt[0m


Epoch 6/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 13:57:38[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=6 step=4284[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023479889086982448, 'time_algorithm_update': 0.01604017323138667, 'loss': -59.85219403308313, 'time_step': 0.01634269401806743, 'observation_error': 0.017879773170735806, 'reward_error': 1.041486175184003e-05, 'variance': 0.009514247955471108}[0m [36mstep[0m=[35m4284[0m
[2m2023-10-09 13:57:38[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_4284.pt[0m


Epoch 7/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 13:57:51[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=7 step=4998[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023047830544266047, 'time_algorithm_update': 0.015628106119919893, 'loss': -61.80359278206064, 'time_step': 0.015926124335003167, 'observation_error': 0.014597579048174691, 'reward_error': 1.9191491860198617e-05, 'variance': 0.01576276852720975}[0m [36mstep[0m=[35m4998[0m
[2m2023-10-09 13:57:51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_4998.pt[0m


Epoch 8/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 13:58:04[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=8 step=5712[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021996117439590583, 'time_algorithm_update': 0.015323562114512553, 'loss': -63.34492040314928, 'time_step': 0.015608834619281673, 'observation_error': 0.021390189746724124, 'reward_error': 1.2725330763912053e-05, 'variance': 0.0285213724781985}[0m [36mstep[0m=[35m5712[0m
[2m2023-10-09 13:58:04[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_5712.pt[0m


Epoch 9/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 13:58:18[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=9 step=6426[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023716203972739, 'time_algorithm_update': 0.016376434254045245, 'loss': -64.65186327445407, 'time_step': 0.01668297810380866, 'observation_error': 0.03368095140220222, 'reward_error': 1.4445533674654202e-05, 'variance': 0.036122980199385195}[0m [36mstep[0m=[35m6426[0m
[2m2023-10-09 13:58:18[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_6426.pt[0m


Epoch 10/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 13:58:31[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=10 step=7140[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022944081731203223, 'time_algorithm_update': 0.015858665901739726, 'loss': -65.43879239398892, 'time_step': 0.01615519483550256, 'observation_error': 0.03597069747149486, 'reward_error': 1.5669491901734777e-05, 'variance': 0.03775839096958422}[0m [36mstep[0m=[35m7140[0m
[2m2023-10-09 13:58:31[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_7140.pt[0m


Epoch 11/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 13:58:45[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=11 step=7854[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023861559165292093, 'time_algorithm_update': 0.01661566459164232, 'loss': -67.0212342291653, 'time_step': 0.016921785365299685, 'observation_error': 0.043681328279701434, 'reward_error': 7.043809463626996e-06, 'variance': 0.04428326739980991}[0m [36mstep[0m=[35m7854[0m
[2m2023-10-09 13:58:45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_7854.pt[0m


Epoch 12/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 13:59:01[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=12 step=8568[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024429489584530103, 'time_algorithm_update': 0.018942167778976827, 'loss': -67.688446173147, 'time_step': 0.019267276889469775, 'observation_error': 0.049970863176304045, 'reward_error': 8.524272029740313e-06, 'variance': 0.04234904102936556}[0m [36mstep[0m=[35m8568[0m
[2m2023-10-09 13:59:01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_8568.pt[0m


Epoch 13/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 13:59:17[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=13 step=9282[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002561079687764999, 'time_algorithm_update': 0.018456107762013497, 'loss': -67.97904120707044, 'time_step': 0.01879073057521959, 'observation_error': 0.04655038333999118, 'reward_error': 6.649279032004356e-06, 'variance': 0.04662226816289871}[0m [36mstep[0m=[35m9282[0m
[2m2023-10-09 13:59:17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_9282.pt[0m


Epoch 14/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 13:59:32[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=14 step=9996[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024478976466074713, 'time_algorithm_update': 0.017383767777130382, 'loss': -69.28673110555867, 'time_step': 0.01770209431314335, 'observation_error': 0.056769088929921906, 'reward_error': 7.131728335635265e-06, 'variance': 0.04630315553363551}[0m [36mstep[0m=[35m9996[0m
[2m2023-10-09 13:59:32[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_9996.pt[0m


Epoch 15/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 13:59:45[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=15 step=10710[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002105122520810082, 'time_algorithm_update': 0.015626930055164155, 'loss': -69.43786182991263, 'time_step': 0.01590111442640716, 'observation_error': 0.034416143415169555, 'reward_error': 9.525245538668519e-06, 'variance': 0.04918734987764011}[0m [36mstep[0m=[35m10710[0m
[2m2023-10-09 13:59:45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_10710.pt[0m


Epoch 16/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 13:59:59[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=16 step=11424[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000210504571930701, 'time_algorithm_update': 0.015440045618543438, 'loss': -70.0244282968238, 'time_step': 0.01571454888298398, 'observation_error': 0.04305967016123047, 'reward_error': 7.085440269540006e-06, 'variance': 0.04863309326623917}[0m [36mstep[0m=[35m11424[0m
[2m2023-10-09 13:59:59[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_11424.pt[0m


Epoch 17/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:00:12[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=17 step=12138[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022012145579362115, 'time_algorithm_update': 0.01599181466409806, 'loss': -70.07763307769092, 'time_step': 0.016278033830872438, 'observation_error': 0.04428900012168534, 'reward_error': 4.783003066244105e-06, 'variance': 0.048760274419384166}[0m [36mstep[0m=[35m12138[0m
[2m2023-10-09 14:00:12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_12138.pt[0m


Epoch 18/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:00:25[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=18 step=12852[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021612911331219499, 'time_algorithm_update': 0.015122113441552768, 'loss': -70.66980172806427, 'time_step': 0.01540243158153459, 'observation_error': 0.04847481630989906, 'reward_error': 7.434772776676333e-06, 'variance': 0.04593059806890754}[0m [36mstep[0m=[35m12852[0m
[2m2023-10-09 14:00:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_12852.pt[0m


Epoch 19/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:00:39[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=19 step=13566[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022305694280886184, 'time_algorithm_update': 0.015177381138841645, 'loss': -70.83106797883491, 'time_step': 0.015467503491569968, 'observation_error': 0.04872505834102689, 'reward_error': 4.740616921449752e-06, 'variance': 0.04606395110606358}[0m [36mstep[0m=[35m13566[0m
[2m2023-10-09 14:00:39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_13566.pt[0m


Epoch 20/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:00:52[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=20 step=14280[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021029353475704245, 'time_algorithm_update': 0.014982636235341304, 'loss': -71.56529060064578, 'time_step': 0.0152564202370096, 'observation_error': 0.04250573514688289, 'reward_error': 6.381669962149387e-06, 'variance': 0.041964801885809214}[0m [36mstep[0m=[35m14280[0m
[2m2023-10-09 14:00:52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_14280.pt[0m


Epoch 21/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:01:05[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=21 step=14994[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021466020108604965, 'time_algorithm_update': 0.015078843140802463, 'loss': -70.97611077993858, 'time_step': 0.01535915126319693, 'observation_error': 0.03984085601238983, 'reward_error': 4.0662976740938125e-06, 'variance': 0.04388349261587355}[0m [36mstep[0m=[35m14994[0m
[2m2023-10-09 14:01:05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_14994.pt[0m


Epoch 22/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:01:18[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=22 step=15708[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00020738542914724485, 'time_algorithm_update': 0.015139394113663532, 'loss': -72.33454024424405, 'time_step': 0.015408728303027754, 'observation_error': 0.040289087984603955, 'reward_error': 4.721243643516641e-06, 'variance': 0.042213592345642446}[0m [36mstep[0m=[35m15708[0m
[2m2023-10-09 14:01:18[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_15708.pt[0m


Epoch 23/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:01:31[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=23 step=16422[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00020716737966243627, 'time_algorithm_update': 0.014979243946342575, 'loss': -72.25114109803314, 'time_step': 0.015248346061599688, 'observation_error': 0.03523804459252888, 'reward_error': 4.1522694543318705e-06, 'variance': 0.03658682777797292}[0m [36mstep[0m=[35m16422[0m
[2m2023-10-09 14:01:31[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_16422.pt[0m


Epoch 24/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:01:44[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=24 step=17136[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022038725577816577, 'time_algorithm_update': 0.015335369510810916, 'loss': -72.73067277085548, 'time_step': 0.015621219362531389, 'observation_error': 0.0460092940585487, 'reward_error': 3.799534952761279e-06, 'variance': 0.033736781690306014}[0m [36mstep[0m=[35m17136[0m
[2m2023-10-09 14:01:44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_17136.pt[0m


Epoch 25/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:01:57[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=25 step=17850[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021155474900531502, 'time_algorithm_update': 0.01467057870549648, 'loss': -73.10769509200622, 'time_step': 0.01494544327092104, 'observation_error': 0.03699497051287756, 'reward_error': 4.014588407349156e-06, 'variance': 0.03342130406909657}[0m [36mstep[0m=[35m17850[0m
[2m2023-10-09 14:01:57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_17850.pt[0m


Epoch 26/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:02:10[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=26 step=18564[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021494737192362298, 'time_algorithm_update': 0.014984566624425039, 'loss': -72.00165270824058, 'time_step': 0.01526269391805184, 'observation_error': 0.03270104812969221, 'reward_error': 3.417674860151748e-06, 'variance': 0.03698911510402973}[0m [36mstep[0m=[35m18564[0m
[2m2023-10-09 14:02:10[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_18564.pt[0m


Epoch 27/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:02:23[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=27 step=19278[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00020899458759639108, 'time_algorithm_update': 0.015044951973175135, 'loss': -73.31711727564408, 'time_step': 0.015316333089556013, 'observation_error': 0.032915165666496486, 'reward_error': 3.8076169394829175e-06, 'variance': 0.031186268915906624}[0m [36mstep[0m=[35m19278[0m
[2m2023-10-09 14:02:23[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_19278.pt[0m


Epoch 28/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:02:36[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=28 step=19992[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002149914493079947, 'time_algorithm_update': 0.015471392986821193, 'loss': -73.80295423833596, 'time_step': 0.01574898801263975, 'observation_error': 0.035061865926354935, 'reward_error': 2.727100991553905e-06, 'variance': 0.029034676960923256}[0m [36mstep[0m=[35m19992[0m
[2m2023-10-09 14:02:36[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_19992.pt[0m


Epoch 29/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:02:49[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=29 step=20706[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00020657266889299666, 'time_algorithm_update': 0.015152264709900073, 'loss': -72.99393965884083, 'time_step': 0.015420179073216201, 'observation_error': 0.02946970915636969, 'reward_error': 5.59840363166084e-06, 'variance': 0.02918916054915348}[0m [36mstep[0m=[35m20706[0m
[2m2023-10-09 14:02:49[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_20706.pt[0m


Epoch 30/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:03:02[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=30 step=21420[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002131017984128466, 'time_algorithm_update': 0.015164713565708877, 'loss': -74.21664049645432, 'time_step': 0.015440475373041062, 'observation_error': 0.027047042695207062, 'reward_error': 2.634368658142951e-06, 'variance': 0.027260560391858778}[0m [36mstep[0m=[35m21420[0m
[2m2023-10-09 14:03:02[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_21420.pt[0m


Epoch 31/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:03:15[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=31 step=22134[0m [36mepoch[0m=[35m31[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021251343211539986, 'time_algorithm_update': 0.01539444422521511, 'loss': -73.37539184060083, 'time_step': 0.01566972485443457, 'observation_error': 0.03139067591147839, 'reward_error': 6.35865054105044e-06, 'variance': 0.027628228843379787}[0m [36mstep[0m=[35m22134[0m
[2m2023-10-09 14:03:15[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_22134.pt[0m


Epoch 32/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:03:29[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=32 step=22848[0m [36mepoch[0m=[35m32[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00020499489888423631, 'time_algorithm_update': 0.015487632497685965, 'loss': -74.56713773289314, 'time_step': 0.015753429476954357, 'observation_error': 0.03150251516303287, 'reward_error': 5.150071194669229e-06, 'variance': 0.02605022267192262}[0m [36mstep[0m=[35m22848[0m
[2m2023-10-09 14:03:29[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_22848.pt[0m


Epoch 33/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:03:42[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=33 step=23562[0m [36mepoch[0m=[35m33[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021512768849605272, 'time_algorithm_update': 0.015979634279630433, 'loss': -73.94308010670318, 'time_step': 0.01626080792157256, 'observation_error': 0.03365787497171542, 'reward_error': 3.899060257960151e-06, 'variance': 0.024677757019616522}[0m [36mstep[0m=[35m23562[0m
[2m2023-10-09 14:03:42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_23562.pt[0m


Epoch 34/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:03:55[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=34 step=24276[0m [36mepoch[0m=[35m34[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00020502461772672935, 'time_algorithm_update': 0.015304128662878726, 'loss': -74.28342509603634, 'time_step': 0.015571591566924621, 'observation_error': 0.03105592956435207, 'reward_error': 4.654525752600703e-06, 'variance': 0.02354262891145493}[0m [36mstep[0m=[35m24276[0m
[2m2023-10-09 14:03:55[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_24276.pt[0m


Epoch 35/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:04:09[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=35 step=24990[0m [36mepoch[0m=[35m35[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002110779118471119, 'time_algorithm_update': 0.015782722238065146, 'loss': -73.75250649385426, 'time_step': 0.01605960100638766, 'observation_error': 0.02658883368730411, 'reward_error': 2.8918164656909886e-06, 'variance': 0.02254785999128962}[0m [36mstep[0m=[35m24990[0m
[2m2023-10-09 14:04:09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_24990.pt[0m


Epoch 36/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:04:22[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=36 step=25704[0m [36mepoch[0m=[35m36[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002152088309536461, 'time_algorithm_update': 0.015528915643024178, 'loss': -75.15209761520728, 'time_step': 0.015807287031862915, 'observation_error': 0.024188661114372755, 'reward_error': 9.549035134346119e-06, 'variance': 0.02265350426106992}[0m [36mstep[0m=[35m25704[0m
[2m2023-10-09 14:04:22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_25704.pt[0m


Epoch 37/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:04:35[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=37 step=26418[0m [36mepoch[0m=[35m37[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021403109660001696, 'time_algorithm_update': 0.01601280985760088, 'loss': -75.49143652555321, 'time_step': 0.016290745815309156, 'observation_error': 0.026829488045984045, 'reward_error': 2.863713735870597e-06, 'variance': 0.020466779919790054}[0m [36mstep[0m=[35m26418[0m
[2m2023-10-09 14:04:35[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_26418.pt[0m


Epoch 38/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:04:49[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=38 step=27132[0m [36mepoch[0m=[35m38[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021460310083811355, 'time_algorithm_update': 0.015682472568266197, 'loss': -75.31326157171853, 'time_step': 0.015960646610634, 'observation_error': 0.021949474877592853, 'reward_error': 5.481367777804205e-06, 'variance': 0.019990807253731754}[0m [36mstep[0m=[35m27132[0m
[2m2023-10-09 14:04:49[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_27132.pt[0m


Epoch 39/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:05:03[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=39 step=27846[0m [36mepoch[0m=[35m39[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021578951710078562, 'time_algorithm_update': 0.01610938808163341, 'loss': -75.23878288803314, 'time_step': 0.016390212109776772, 'observation_error': 0.02327515283843504, 'reward_error': 3.7201691303746215e-06, 'variance': 0.018281281529248152}[0m [36mstep[0m=[35m27846[0m
[2m2023-10-09 14:05:03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_27846.pt[0m


Epoch 40/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:05:16[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=40 step=28560[0m [36mepoch[0m=[35m40[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021377063932872953, 'time_algorithm_update': 0.0161003351879387, 'loss': -75.57748170526756, 'time_step': 0.01637722230425068, 'observation_error': 0.02385919884960956, 'reward_error': 2.866231921938299e-06, 'variance': 0.016958014195673603}[0m [36mstep[0m=[35m28560[0m
[2m2023-10-09 14:05:16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_28560.pt[0m


Epoch 41/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:05:29[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=41 step=29274[0m [36mepoch[0m=[35m41[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021602993919735863, 'time_algorithm_update': 0.0156491076578947, 'loss': -75.62790056971275, 'time_step': 0.015929425463956946, 'observation_error': 0.024425195667201617, 'reward_error': 3.826858948340766e-06, 'variance': 0.016399840448140145}[0m [36mstep[0m=[35m29274[0m
[2m2023-10-09 14:05:30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_29274.pt[0m


Epoch 42/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:05:43[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=42 step=29988[0m [36mepoch[0m=[35m42[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002161581643155309, 'time_algorithm_update': 0.01589799795498033, 'loss': -76.57733829882966, 'time_step': 0.01617946611399076, 'observation_error': 0.020096473803255233, 'reward_error': 3.205084263007637e-06, 'variance': 0.015475714408053357}[0m [36mstep[0m=[35m29988[0m
[2m2023-10-09 14:05:43[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_29988.pt[0m


Epoch 43/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:05:56[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=43 step=30702[0m [36mepoch[0m=[35m43[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00020423623360171705, 'time_algorithm_update': 0.01554694563066926, 'loss': -76.00919896614651, 'time_step': 0.015812631080798407, 'observation_error': 0.02313721479272605, 'reward_error': 2.4109143050312104e-06, 'variance': 0.015800062212863807}[0m [36mstep[0m=[35m30702[0m
[2m2023-10-09 14:05:56[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_30702.pt[0m


Epoch 44/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:06:09[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=44 step=31416[0m [36mepoch[0m=[35m44[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021121782415053423, 'time_algorithm_update': 0.015473007821903176, 'loss': -76.09290053998055, 'time_step': 0.015747378854190603, 'observation_error': 0.02061414102315309, 'reward_error': 3.4182311498940708e-06, 'variance': 0.014220099080669476}[0m [36mstep[0m=[35m31416[0m
[2m2023-10-09 14:06:09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_31416.pt[0m


Epoch 45/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:06:22[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=45 step=32130[0m [36mepoch[0m=[35m45[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002052306461067093, 'time_algorithm_update': 0.015378060127172818, 'loss': -76.27341969807942, 'time_step': 0.01564483315336938, 'observation_error': 0.02000652673220262, 'reward_error': 2.563864159311043e-06, 'variance': 0.013861426655421312}[0m [36mstep[0m=[35m32130[0m
[2m2023-10-09 14:06:22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_32130.pt[0m


Epoch 46/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:06:35[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=46 step=32844[0m [36mepoch[0m=[35m46[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021806350943087195, 'time_algorithm_update': 0.015662303157881193, 'loss': -76.08452933709495, 'time_step': 0.015945632584622595, 'observation_error': 0.018475075386445897, 'reward_error': 2.3324575363445944e-06, 'variance': 0.013273618225589126}[0m [36mstep[0m=[35m32844[0m
[2m2023-10-09 14:06:35[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_32844.pt[0m


Epoch 47/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:06:48[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=47 step=33558[0m [36mepoch[0m=[35m47[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00020793673037146988, 'time_algorithm_update': 0.015151081298913608, 'loss': -76.1662950275325, 'time_step': 0.015421077984721721, 'observation_error': 0.01971849776632753, 'reward_error': 3.3151435662656013e-06, 'variance': 0.013261033420779344}[0m [36mstep[0m=[35m33558[0m
[2m2023-10-09 14:06:48[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_33558.pt[0m


Epoch 48/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:07:01[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=48 step=34272[0m [36mepoch[0m=[35m48[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00020890142403396907, 'time_algorithm_update': 0.015207682337079729, 'loss': -77.49938517124379, 'time_step': 0.015479234420284838, 'observation_error': 0.020336572637686522, 'reward_error': 3.1642997891033574e-06, 'variance': 0.012458494372738303}[0m [36mstep[0m=[35m34272[0m
[2m2023-10-09 14:07:01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_34272.pt[0m


Epoch 49/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:07:14[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=49 step=34986[0m [36mepoch[0m=[35m49[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002134253664844844, 'time_algorithm_update': 0.015200645315880869, 'loss': -77.03755572129364, 'time_step': 0.015480168393346108, 'observation_error': 0.019619945484259924, 'reward_error': 2.7424326285327426e-06, 'variance': 0.01177895152161948}[0m [36mstep[0m=[35m34986[0m
[2m2023-10-09 14:07:14[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_34986.pt[0m


Epoch 50/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:07:26[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=50 step=35700[0m [36mepoch[0m=[35m50[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021563057138138458, 'time_algorithm_update': 0.014984592670152168, 'loss': -77.51134171472545, 'time_step': 0.015263631564228474, 'observation_error': 0.01981751925116375, 'reward_error': 3.3431674271094927e-06, 'variance': 0.012182941560621587}[0m [36mstep[0m=[35m35700[0m
[2m2023-10-09 14:07:26[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_35700.pt[0m


Epoch 51/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:07:39[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=51 step=36414[0m [36mepoch[0m=[35m51[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023023654766777317, 'time_algorithm_update': 0.015849954941693473, 'loss': -77.45528309819412, 'time_step': 0.016147178762099323, 'observation_error': 0.02150066618403732, 'reward_error': 3.331612752055045e-06, 'variance': 0.012311466766582154}[0m [36mstep[0m=[35m36414[0m
[2m2023-10-09 14:07:39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_36414.pt[0m


Epoch 52/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:07:52[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=52 step=37128[0m [36mepoch[0m=[35m52[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021724841173957377, 'time_algorithm_update': 0.0147662142745587, 'loss': -78.10641890346837, 'time_step': 0.01504925586262337, 'observation_error': 0.020281671363900754, 'reward_error': 9.610472934341906e-06, 'variance': 0.01162407113662278}[0m [36mstep[0m=[35m37128[0m
[2m2023-10-09 14:07:52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_37128.pt[0m


Epoch 53/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:08:04[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=53 step=37842[0m [36mepoch[0m=[35m53[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021044045937161486, 'time_algorithm_update': 0.014853644103897052, 'loss': -76.64205711695995, 'time_step': 0.015129709110206583, 'observation_error': 0.01929877417358142, 'reward_error': 4.101725788074521e-06, 'variance': 0.01091347872281648}[0m [36mstep[0m=[35m37842[0m
[2m2023-10-09 14:08:04[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_37842.pt[0m


Epoch 54/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:08:16[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=54 step=38556[0m [36mepoch[0m=[35m54[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00020880224991913268, 'time_algorithm_update': 0.014525754111153739, 'loss': -78.03445072334354, 'time_step': 0.014796597617013114, 'observation_error': 0.01916327786717114, 'reward_error': 2.2270953050584957e-06, 'variance': 0.010774953163643831}[0m [36mstep[0m=[35m38556[0m
[2m2023-10-09 14:08:16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_38556.pt[0m


Epoch 55/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:08:28[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=55 step=39270[0m [36mepoch[0m=[35m55[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021195044370592476, 'time_algorithm_update': 0.014559849637563155, 'loss': -77.59274880625621, 'time_step': 0.014834980002972259, 'observation_error': 0.017954835741350043, 'reward_error': 2.4313801179971347e-06, 'variance': 0.010152844677597674}[0m [36mstep[0m=[35m39270[0m
[2m2023-10-09 14:08:28[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_39270.pt[0m


Epoch 56/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:08:40[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=56 step=39984[0m [36mepoch[0m=[35m56[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021309678961916798, 'time_algorithm_update': 0.014740307791894223, 'loss': -78.68404703247113, 'time_step': 0.015016000477873645, 'observation_error': 0.021531760104723564, 'reward_error': 4.171690983006095e-06, 'variance': 0.009869423420906045}[0m [36mstep[0m=[35m39984[0m
[2m2023-10-09 14:08:40[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_39984.pt[0m


Epoch 57/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:08:52[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=57 step=40698[0m [36mepoch[0m=[35m57[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00020818950749245012, 'time_algorithm_update': 0.01438494122662798, 'loss': -77.75751477730374, 'time_step': 0.01465849248635001, 'observation_error': 0.0185960181893758, 'reward_error': 2.1755570430906284e-06, 'variance': 0.009459947002388273}[0m [36mstep[0m=[35m40698[0m
[2m2023-10-09 14:08:52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_40698.pt[0m


Epoch 58/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:09:04[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=58 step=41412[0m [36mepoch[0m=[35m58[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00020871643258743927, 'time_algorithm_update': 0.014435052871704102, 'loss': -77.736771949533, 'time_step': 0.014706676747618603, 'observation_error': 0.01747377929400922, 'reward_error': 4.350337779662183e-06, 'variance': 0.009988117389319264}[0m [36mstep[0m=[35m41412[0m
[2m2023-10-09 14:09:04[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_41412.pt[0m


Epoch 59/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:09:17[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=59 step=42126[0m [36mepoch[0m=[35m59[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021167462613402293, 'time_algorithm_update': 0.01450377619233118, 'loss': -78.0737279122617, 'time_step': 0.014778712550465133, 'observation_error': 0.017316657796097253, 'reward_error': 4.669588604354135e-06, 'variance': 0.008945771351477276}[0m [36mstep[0m=[35m42126[0m
[2m2023-10-09 14:09:17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_42126.pt[0m


Epoch 60/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:09:29[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=60 step=42840[0m [36mepoch[0m=[35m60[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021196179697159626, 'time_algorithm_update': 0.014669323167881044, 'loss': -77.97187458166555, 'time_step': 0.014943844797898407, 'observation_error': 0.017733532005384894, 'reward_error': 1.8952197695540662e-06, 'variance': 0.008946940122977916}[0m [36mstep[0m=[35m42840[0m
[2m2023-10-09 14:09:29[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_42840.pt[0m


Epoch 61/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:09:41[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=61 step=43554[0m [36mepoch[0m=[35m61[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021206931907589695, 'time_algorithm_update': 0.014824153996315323, 'loss': -78.01397676761745, 'time_step': 0.015100406331508434, 'observation_error': 0.016059973685447183, 'reward_error': 2.311072381435391e-06, 'variance': 0.010168190149778192}[0m [36mstep[0m=[35m43554[0m
[2m2023-10-09 14:09:41[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_43554.pt[0m


Epoch 62/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:09:53[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=62 step=44268[0m [36mepoch[0m=[35m62[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00020814376051018552, 'time_algorithm_update': 0.014553138187953405, 'loss': -78.72996571484734, 'time_step': 0.014822127772312538, 'observation_error': 0.01833122876352012, 'reward_error': 2.095863805044849e-06, 'variance': 0.010008616073468158}[0m [36mstep[0m=[35m44268[0m
[2m2023-10-09 14:09:53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_44268.pt[0m


Epoch 63/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:10:05[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=63 step=44982[0m [36mepoch[0m=[35m63[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021426183502881133, 'time_algorithm_update': 0.014869232471583603, 'loss': -77.92097171815503, 'time_step': 0.015149188976661832, 'observation_error': 0.016238310700048338, 'reward_error': 2.1956943462564206e-06, 'variance': 0.00831548170251247}[0m [36mstep[0m=[35m44982[0m
[2m2023-10-09 14:10:05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_44982.pt[0m


Epoch 64/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:10:17[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=64 step=45696[0m [36mepoch[0m=[35m64[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00020743317964698086, 'time_algorithm_update': 0.0144915794458042, 'loss': -78.43379451313606, 'time_step': 0.014761385797452526, 'observation_error': 0.01646256564396327, 'reward_error': 1.8269129697632203e-06, 'variance': 0.009051075170261083}[0m [36mstep[0m=[35m45696[0m
[2m2023-10-09 14:10:17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_45696.pt[0m


Epoch 65/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:10:29[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=65 step=46410[0m [36mepoch[0m=[35m65[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002122396180609695, 'time_algorithm_update': 0.014578537446778027, 'loss': -78.41138115182979, 'time_step': 0.014853642768218737, 'observation_error': 0.016619349835168926, 'reward_error': 2.694757438805645e-06, 'variance': 0.00897465295971817}[0m [36mstep[0m=[35m46410[0m
[2m2023-10-09 14:10:29[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_46410.pt[0m


Epoch 66/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:10:41[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=66 step=47124[0m [36mepoch[0m=[35m66[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021425014784356126, 'time_algorithm_update': 0.014963499972132407, 'loss': -78.06566832245899, 'time_step': 0.015243566670671565, 'observation_error': 0.017051003796589886, 'reward_error': 1.9166893095477282e-06, 'variance': 0.008502340633206422}[0m [36mstep[0m=[35m47124[0m
[2m2023-10-09 14:10:41[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_47124.pt[0m


Epoch 67/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:10:54[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=67 step=47838[0m [36mepoch[0m=[35m67[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00020854947279815247, 'time_algorithm_update': 0.014693178716493921, 'loss': -78.92358880617371, 'time_step': 0.014964443628861457, 'observation_error': 0.014902686024524821, 'reward_error': 2.5575435619663346e-06, 'variance': 0.008638659617486473}[0m [36mstep[0m=[35m47838[0m
[2m2023-10-09 14:10:54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_47838.pt[0m


Epoch 68/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:11:06[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=68 step=48552[0m [36mepoch[0m=[35m68[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022130319718219319, 'time_algorithm_update': 0.014940902632491595, 'loss': -78.73045725915946, 'time_step': 0.015227122801024708, 'observation_error': 0.016482613446658433, 'reward_error': 1.6870579315477022e-06, 'variance': 0.008194898860451038}[0m [36mstep[0m=[35m48552[0m
[2m2023-10-09 14:11:06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_48552.pt[0m


Epoch 69/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:11:18[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=69 step=49266[0m [36mepoch[0m=[35m69[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00020842325119745163, 'time_algorithm_update': 0.01454055643215233, 'loss': -79.42582311950812, 'time_step': 0.014811029955118644, 'observation_error': 0.015731744438526304, 'reward_error': 1.9494816445584713e-06, 'variance': 0.008542064185673066}[0m [36mstep[0m=[35m49266[0m
[2m2023-10-09 14:11:18[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_49266.pt[0m


Epoch 70/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:11:30[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=70 step=49980[0m [36mepoch[0m=[35m70[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021234146353243445, 'time_algorithm_update': 0.01437914271314605, 'loss': -78.56650474559025, 'time_step': 0.014654552569242418, 'observation_error': 0.018121523466271664, 'reward_error': 1.843314797320831e-06, 'variance': 0.007822987150499786}[0m [36mstep[0m=[35m49980[0m
[2m2023-10-09 14:11:30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_49980.pt[0m


Epoch 71/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:11:42[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=71 step=50694[0m [36mepoch[0m=[35m71[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022029976884857947, 'time_algorithm_update': 0.014831462160211984, 'loss': -80.19951915473831, 'time_step': 0.015115506842690689, 'observation_error': 0.014326725926681138, 'reward_error': 1.7459465228750952e-06, 'variance': 0.007549035514369408}[0m [36mstep[0m=[35m50694[0m
[2m2023-10-09 14:11:42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_50694.pt[0m


Epoch 72/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:11:54[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=72 step=51408[0m [36mepoch[0m=[35m72[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021460443651642787, 'time_algorithm_update': 0.01455012857079172, 'loss': -78.75375603427406, 'time_step': 0.014829251612601829, 'observation_error': 0.01652331369145319, 'reward_error': 2.0300123401527835e-06, 'variance': 0.007702514014438543}[0m [36mstep[0m=[35m51408[0m
[2m2023-10-09 14:11:54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_51408.pt[0m


Epoch 73/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:12:06[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=73 step=52122[0m [36mepoch[0m=[35m73[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00020989917573474703, 'time_algorithm_update': 0.014656586807315089, 'loss': -78.96153867010976, 'time_step': 0.014930206520550725, 'observation_error': 0.017527478794399913, 'reward_error': 3.145589426486759e-06, 'variance': 0.007194091838594498}[0m [36mstep[0m=[35m52122[0m
[2m2023-10-09 14:12:06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_52122.pt[0m


Epoch 74/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:12:18[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=74 step=52836[0m [36mepoch[0m=[35m74[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002099412496016473, 'time_algorithm_update': 0.01444467743571733, 'loss': -79.90369937533424, 'time_step': 0.014717711787931725, 'observation_error': 0.017442140181504154, 'reward_error': 1.9715075147959688e-06, 'variance': 0.007636255520243661}[0m [36mstep[0m=[35m52836[0m
[2m2023-10-09 14:12:18[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_52836.pt[0m


Epoch 75/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:12:30[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=75 step=53550[0m [36mepoch[0m=[35m75[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021098474828468985, 'time_algorithm_update': 0.014422829411610836, 'loss': -80.09725469701431, 'time_step': 0.014696981392654717, 'observation_error': 0.014849134741142266, 'reward_error': 2.1441711940589574e-06, 'variance': 0.007357404611289368}[0m [36mstep[0m=[35m53550[0m
[2m2023-10-09 14:12:30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_53550.pt[0m


Epoch 76/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:12:42[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=76 step=54264[0m [36mepoch[0m=[35m76[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021180652436755953, 'time_algorithm_update': 0.01463661273988355, 'loss': -79.40414496563396, 'time_step': 0.01491236920450248, 'observation_error': 0.01702115326514315, 'reward_error': 2.0568259684023226e-06, 'variance': 0.007390724113648722}[0m [36mstep[0m=[35m54264[0m
[2m2023-10-09 14:12:42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_54264.pt[0m


Epoch 77/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:12:53[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=77 step=54978[0m [36mepoch[0m=[35m77[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002099512671890045, 'time_algorithm_update': 0.014404883237779975, 'loss': -79.8539142768924, 'time_step': 0.01467786917165548, 'observation_error': 0.01740179097373034, 'reward_error': 2.385841534840785e-06, 'variance': 0.006834036541111861}[0m [36mstep[0m=[35m54978[0m
[2m2023-10-09 14:12:53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_54978.pt[0m


Epoch 78/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:13:05[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=78 step=55692[0m [36mepoch[0m=[35m78[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021089993271173216, 'time_algorithm_update': 0.01464987502378576, 'loss': -78.25249753011708, 'time_step': 0.014926740101405553, 'observation_error': 0.015025805965438052, 'reward_error': 2.7373128095100627e-06, 'variance': 0.007677000158059534}[0m [36mstep[0m=[35m55692[0m
[2m2023-10-09 14:13:05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_55692.pt[0m


Epoch 79/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:13:17[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=79 step=56406[0m [36mepoch[0m=[35m79[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00020837383109982273, 'time_algorithm_update': 0.014429353866256586, 'loss': -79.94017048042362, 'time_step': 0.014700428778383913, 'observation_error': 0.01690062615617843, 'reward_error': 1.7181551888862997e-06, 'variance': 0.006573447156938419}[0m [36mstep[0m=[35m56406[0m
[2m2023-10-09 14:13:17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_56406.pt[0m


Epoch 80/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:13:29[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=80 step=57120[0m [36mepoch[0m=[35m80[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00020929544913668593, 'time_algorithm_update': 0.014369406619993579, 'loss': -80.67717831074691, 'time_step': 0.014640572692165855, 'observation_error': 0.016144039250415562, 'reward_error': 2.0747735047119266e-06, 'variance': 0.006439856403576377}[0m [36mstep[0m=[35m57120[0m
[2m2023-10-09 14:13:29[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_57120.pt[0m


Epoch 81/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:13:41[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=81 step=57834[0m [36mepoch[0m=[35m81[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021259123537720754, 'time_algorithm_update': 0.014758115723019554, 'loss': -79.14347351565749, 'time_step': 0.015036563245522208, 'observation_error': 0.01645017787086405, 'reward_error': 1.9089969798835168e-06, 'variance': 0.007064454736405622}[0m [36mstep[0m=[35m57834[0m
[2m2023-10-09 14:13:41[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_57834.pt[0m


Epoch 82/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:13:53[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=82 step=58548[0m [36mepoch[0m=[35m82[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002120132205866966, 'time_algorithm_update': 0.014438337304678951, 'loss': -80.50356947206983, 'time_step': 0.014711938986257344, 'observation_error': 0.01615651528158408, 'reward_error': 1.6605967887132738e-06, 'variance': 0.006787111204015794}[0m [36mstep[0m=[35m58548[0m
[2m2023-10-09 14:13:53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_58548.pt[0m


Epoch 83/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:14:05[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=83 step=59262[0m [36mepoch[0m=[35m83[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021195812385623196, 'time_algorithm_update': 0.0143738447451124, 'loss': -80.4394633068758, 'time_step': 0.01464978820469533, 'observation_error': 0.015338727937647671, 'reward_error': 1.9853829151937343e-06, 'variance': 0.006914636087313269}[0m [36mstep[0m=[35m59262[0m
[2m2023-10-09 14:14:05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_59262.pt[0m


Epoch 84/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:14:17[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=84 step=59976[0m [36mepoch[0m=[35m84[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021378032299650818, 'time_algorithm_update': 0.014286903106197924, 'loss': -80.65963200828274, 'time_step': 0.014563021539640026, 'observation_error': 0.013829791119332899, 'reward_error': 1.870771572412974e-06, 'variance': 0.007086696789292775}[0m [36mstep[0m=[35m59976[0m
[2m2023-10-09 14:14:17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_59976.pt[0m


Epoch 85/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:14:28[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=85 step=60690[0m [36mepoch[0m=[35m85[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002163010818951604, 'time_algorithm_update': 0.014226447300416748, 'loss': -80.49482953047551, 'time_step': 0.014505131905820189, 'observation_error': 0.014307876587251226, 'reward_error': 2.016072839009505e-06, 'variance': 0.006813135487049269}[0m [36mstep[0m=[35m60690[0m
[2m2023-10-09 14:14:28[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_60690.pt[0m


Epoch 86/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:14:40[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=86 step=61404[0m [36mepoch[0m=[35m86[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021502083423090916, 'time_algorithm_update': 0.014480418517809956, 'loss': -80.32025144213722, 'time_step': 0.014760330277664655, 'observation_error': 0.013413798153985572, 'reward_error': 1.5450128674060352e-06, 'variance': 0.007109341861158512}[0m [36mstep[0m=[35m61404[0m
[2m2023-10-09 14:14:40[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_61404.pt[0m


Epoch 87/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:14:52[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=87 step=62118[0m [36mepoch[0m=[35m87[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00020682945304891976, 'time_algorithm_update': 0.014086281885953844, 'loss': -80.01001286172733, 'time_step': 0.014354459711817466, 'observation_error': 0.014735399817997345, 'reward_error': 1.6151065916655696e-06, 'variance': 0.006520717425224766}[0m [36mstep[0m=[35m62118[0m
[2m2023-10-09 14:14:52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_62118.pt[0m


Epoch 88/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:15:03[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=88 step=62832[0m [36mepoch[0m=[35m88[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021258054995069317, 'time_algorithm_update': 0.01424341976475649, 'loss': -80.21379751851913, 'time_step': 0.01451837014751274, 'observation_error': 0.014716315094614345, 'reward_error': 3.043427574903426e-06, 'variance': 0.006513654599539357}[0m [36mstep[0m=[35m62832[0m
[2m2023-10-09 14:15:03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_62832.pt[0m


Epoch 89/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:15:15[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=89 step=63546[0m [36mepoch[0m=[35m89[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021328912729642638, 'time_algorithm_update': 0.014418959617614746, 'loss': -80.87021452372147, 'time_step': 0.014696882218539882, 'observation_error': 0.01495031952889092, 'reward_error': 2.253431379649202e-06, 'variance': 0.006472849949963009}[0m [36mstep[0m=[35m63546[0m
[2m2023-10-09 14:15:15[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_63546.pt[0m


Epoch 90/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:15:27[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=90 step=64260[0m [36mepoch[0m=[35m90[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021094167265905386, 'time_algorithm_update': 0.014271417251821994, 'loss': -80.94701258341472, 'time_step': 0.014546537265724161, 'observation_error': 0.01518701437226938, 'reward_error': 1.7316981546970126e-06, 'variance': 0.006086004066898945}[0m [36mstep[0m=[35m64260[0m
[2m2023-10-09 14:15:27[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_64260.pt[0m


Epoch 91/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:15:39[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=91 step=64974[0m [36mepoch[0m=[35m91[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021564726736031327, 'time_algorithm_update': 0.014620348518969966, 'loss': -80.91680886030865, 'time_step': 0.014901275728263106, 'observation_error': 0.014203592499476335, 'reward_error': 1.8647741273540998e-06, 'variance': 0.005557116344698021}[0m [36mstep[0m=[35m64974[0m
[2m2023-10-09 14:15:39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_64974.pt[0m


Epoch 92/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:15:50[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=92 step=65688[0m [36mepoch[0m=[35m92[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021129028469908472, 'time_algorithm_update': 0.014385006340945802, 'loss': -81.5229007763689, 'time_step': 0.014661340486435663, 'observation_error': 0.013490346206361465, 'reward_error': 1.6726225846907535e-06, 'variance': 0.006042411582605036}[0m [36mstep[0m=[35m65688[0m
[2m2023-10-09 14:15:50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_65688.pt[0m


Epoch 93/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:16:02[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=93 step=66402[0m [36mepoch[0m=[35m93[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021162286859934402, 'time_algorithm_update': 0.01461401239496653, 'loss': -81.38818030263863, 'time_step': 0.014890014958314868, 'observation_error': 0.01633619783690839, 'reward_error': 1.305951008463146e-06, 'variance': 0.006030500847035085}[0m [36mstep[0m=[35m66402[0m
[2m2023-10-09 14:16:02[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_66402.pt[0m


Epoch 94/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:16:14[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=94 step=67116[0m [36mepoch[0m=[35m94[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002193033528261158, 'time_algorithm_update': 0.014872070120162322, 'loss': -80.5466953966798, 'time_step': 0.015157899268868924, 'observation_error': 0.01462281164745007, 'reward_error': 2.546639138509954e-06, 'variance': 0.005766071394894718}[0m [36mstep[0m=[35m67116[0m
[2m2023-10-09 14:16:14[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_67116.pt[0m


Epoch 95/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:16:26[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=95 step=67830[0m [36mepoch[0m=[35m95[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021574377011852105, 'time_algorithm_update': 0.014576047408480603, 'loss': -80.46914626236389, 'time_step': 0.014857623423514914, 'observation_error': 0.015755373311055112, 'reward_error': 1.6124095761039432e-06, 'variance': 0.006017404596103253}[0m [36mstep[0m=[35m67830[0m
[2m2023-10-09 14:16:26[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_67830.pt[0m


Epoch 96/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:16:38[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=96 step=68544[0m [36mepoch[0m=[35m96[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021930068146948722, 'time_algorithm_update': 0.014738495610341304, 'loss': -80.37777314065885, 'time_step': 0.01502440323014887, 'observation_error': 0.014054186139520813, 'reward_error': 4.401478157297201e-06, 'variance': 0.005620696187408743}[0m [36mstep[0m=[35m68544[0m
[2m2023-10-09 14:16:38[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_68544.pt[0m


Epoch 97/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:16:50[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=97 step=69258[0m [36mepoch[0m=[35m97[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021482949831238648, 'time_algorithm_update': 0.014541421617780412, 'loss': -79.34974115435817, 'time_step': 0.014820677225663215, 'observation_error': 0.015408878145320918, 'reward_error': 2.4652492469822468e-06, 'variance': 0.007447562566568507}[0m [36mstep[0m=[35m69258[0m
[2m2023-10-09 14:16:50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_69258.pt[0m


Epoch 98/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:17:02[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=98 step=69972[0m [36mepoch[0m=[35m98[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002150395337273093, 'time_algorithm_update': 0.01472629752813601, 'loss': -82.24541119701054, 'time_step': 0.01500421211499126, 'observation_error': 0.0146447760374555, 'reward_error': 1.4894125860536515e-06, 'variance': 0.005719171204395526}[0m [36mstep[0m=[35m69972[0m
[2m2023-10-09 14:17:02[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_69972.pt[0m


Epoch 99/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:17:14[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=99 step=70686[0m [36mepoch[0m=[35m99[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022170890947016014, 'time_algorithm_update': 0.014773189854555103, 'loss': -81.87600783072934, 'time_step': 0.01505628119663698, 'observation_error': 0.017585534630231874, 'reward_error': 1.815348664310834e-06, 'variance': 0.005869929262789571}[0m [36mstep[0m=[35m70686[0m
[2m2023-10-09 14:17:14[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_70686.pt[0m


Epoch 100/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:17:26[0m [[32m[1minfo     [0m] [1mexp_0_20231009135625: epoch=100 step=71400[0m [36mepoch[0m=[35m100[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021734458057820296, 'time_algorithm_update': 0.014566708345707058, 'loss': -81.86326325707743, 'time_step': 0.014847554746462183, 'observation_error': 0.015845998313490685, 'reward_error': 6.271410781992436e-06, 'variance': 0.0058532596414191595}[0m [36mstep[0m=[35m71400[0m
[2m2023-10-09 14:17:26[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009135625/model_71400.pt[0m
Using InvertedPendulumEncoderFactory
[2m2023-10-09 14:17:26[0m [[32m[1mdebug    [0m] [1mRoundIterator is selected.[0m
[2m2023-10-09 14:17:26[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/exp_0_20231009141726[0m
[2m2023-10-09 14:17:26[0m [[32m[1mdebug    [0m] [1mBuilding models...[0m
[2m2023-10-09 14:17:26[0m [[32m[1mdebug    [0m] [1mModels have been buil

Epoch 1/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:17:37[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=1 step=714[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002256064187912714, 'time_algorithm_update': 0.012807161534199862, 'loss': -21.841717275386515, 'time_step': 0.013104870873672956, 'observation_error': 0.03778056283344044, 'reward_error': 0.0007296648183929386, 'variance': 0.043947844644393066}[0m [36mstep[0m=[35m714[0m
[2m2023-10-09 14:17:37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_714.pt[0m


Epoch 2/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:17:47[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=2 step=1428[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021573508820947813, 'time_algorithm_update': 0.012546257478516308, 'loss': -31.499347008910835, 'time_step': 0.012827121577009099, 'observation_error': 0.025875636979822057, 'reward_error': 0.000644891134066919, 'variance': 0.024246110829693104}[0m [36mstep[0m=[35m1428[0m
[2m2023-10-09 14:17:47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_1428.pt[0m


Epoch 3/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:17:58[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=3 step=2142[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021630575676926043, 'time_algorithm_update': 0.01340440374796464, 'loss': -38.5481392061677, 'time_step': 0.01368534731931713, 'observation_error': 0.016330546873064712, 'reward_error': 9.359498604590196e-05, 'variance': 0.009924142810789134}[0m [36mstep[0m=[35m2142[0m
[2m2023-10-09 14:17:58[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_2142.pt[0m


Epoch 4/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:18:10[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=4 step=2856[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022122672959869983, 'time_algorithm_update': 0.014604025528210551, 'loss': -44.745254901944755, 'time_step': 0.014889132074949121, 'observation_error': 0.011349294920989711, 'reward_error': 4.037828398072611e-05, 'variance': 0.0034623273987586553}[0m [36mstep[0m=[35m2856[0m
[2m2023-10-09 14:18:10[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_2856.pt[0m


Epoch 5/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:18:23[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=5 step=3570[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022297546643168986, 'time_algorithm_update': 0.015165804814891656, 'loss': -51.241601059082846, 'time_step': 0.0154518249655972, 'observation_error': 0.01097301442284152, 'reward_error': 2.4818691332643535e-05, 'variance': 0.002604008295165193}[0m [36mstep[0m=[35m3570[0m
[2m2023-10-09 14:18:23[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_3570.pt[0m


Epoch 6/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:18:37[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=6 step=4284[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021784412426774908, 'time_algorithm_update': 0.015689192365865412, 'loss': -55.95607730320522, 'time_step': 0.015969768291761895, 'observation_error': 0.012809016253268395, 'reward_error': 9.492611939190357e-06, 'variance': 0.004898327317497148}[0m [36mstep[0m=[35m4284[0m
[2m2023-10-09 14:18:37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_4284.pt[0m


Epoch 7/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:18:51[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=7 step=4998[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021610841029832343, 'time_algorithm_update': 0.016362800651571665, 'loss': -58.91342167026188, 'time_step': 0.016643517491530303, 'observation_error': 0.020923520412896586, 'reward_error': 2.143786315382894e-05, 'variance': 0.013194666477573596}[0m [36mstep[0m=[35m4998[0m
[2m2023-10-09 14:18:51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_4998.pt[0m


Epoch 8/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:19:05[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=8 step=5712[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021665203137224127, 'time_algorithm_update': 0.01666252319218398, 'loss': -61.784606045033755, 'time_step': 0.016943330190428832, 'observation_error': 0.02830314882253907, 'reward_error': 1.16753263311306e-05, 'variance': 0.02127800777729436}[0m [36mstep[0m=[35m5712[0m
[2m2023-10-09 14:19:05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_5712.pt[0m


Epoch 9/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:19:19[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=9 step=6426[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021398701921564525, 'time_algorithm_update': 0.01654307681973241, 'loss': -64.04068742746732, 'time_step': 0.016818611895670743, 'observation_error': 0.02994277905707382, 'reward_error': 9.762886039775498e-06, 'variance': 0.031999359578580445}[0m [36mstep[0m=[35m6426[0m
[2m2023-10-09 14:19:19[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_6426.pt[0m


Epoch 10/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:19:33[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=10 step=7140[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021684704040612828, 'time_algorithm_update': 0.016726470794998297, 'loss': -65.1789113720592, 'time_step': 0.017005169425024037, 'observation_error': 0.04068847478517718, 'reward_error': 1.264668397206118e-05, 'variance': 0.04417895117718011}[0m [36mstep[0m=[35m7140[0m
[2m2023-10-09 14:19:33[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_7140.pt[0m


Epoch 11/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:19:47[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=11 step=7854[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021974980330266873, 'time_algorithm_update': 0.01675052836495621, 'loss': -66.76282297930463, 'time_step': 0.017035203487599262, 'observation_error': 0.04426220540176707, 'reward_error': 1.2149778719196216e-05, 'variance': 0.050996626433658265}[0m [36mstep[0m=[35m7854[0m
[2m2023-10-09 14:19:47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_7854.pt[0m


Epoch 12/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:20:02[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=12 step=8568[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022108481377780605, 'time_algorithm_update': 0.01670925623896409, 'loss': -67.61511248633975, 'time_step': 0.01699614391273477, 'observation_error': 0.04896817856768935, 'reward_error': 1.2930045262075566e-05, 'variance': 0.06019634844399739}[0m [36mstep[0m=[35m8568[0m
[2m2023-10-09 14:20:02[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_8568.pt[0m


Epoch 13/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:20:16[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=13 step=9282[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022174163358886035, 'time_algorithm_update': 0.0172071296627782, 'loss': -68.90578571654835, 'time_step': 0.017494558286266167, 'observation_error': 0.04523313296554563, 'reward_error': 8.20246073084139e-06, 'variance': 0.06060268941866827}[0m [36mstep[0m=[35m9282[0m
[2m2023-10-09 14:20:16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_9282.pt[0m


Epoch 14/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:20:31[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=14 step=9996[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021608703944529472, 'time_algorithm_update': 0.01691626834602249, 'loss': -69.17689861436519, 'time_step': 0.017196717382479115, 'observation_error': 0.04072284602216088, 'reward_error': 6.414086547225542e-06, 'variance': 0.07174265188051211}[0m [36mstep[0m=[35m9996[0m
[2m2023-10-09 14:20:31[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_9996.pt[0m


Epoch 15/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:20:46[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=15 step=10710[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022450047714703557, 'time_algorithm_update': 0.01711363551997337, 'loss': -70.0109861865431, 'time_step': 0.017402417519513297, 'observation_error': 0.07889875241253914, 'reward_error': 6.042484630938162e-06, 'variance': 0.0699090614683263}[0m [36mstep[0m=[35m10710[0m
[2m2023-10-09 14:20:46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_10710.pt[0m


Epoch 16/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:21:00[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=16 step=11424[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002183907506178741, 'time_algorithm_update': 0.0169178437785942, 'loss': -70.16200696520444, 'time_step': 0.017200387158647638, 'observation_error': 0.06304014215583992, 'reward_error': 8.781067398279794e-06, 'variance': 0.07748196555415514}[0m [36mstep[0m=[35m11424[0m
[2m2023-10-09 14:21:00[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_11424.pt[0m


Epoch 17/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:21:15[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=17 step=12138[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002236736922704873, 'time_algorithm_update': 0.017409961430632434, 'loss': -71.0997248604184, 'time_step': 0.0176970581380593, 'observation_error': 0.0620101275953538, 'reward_error': 7.2498477263187e-06, 'variance': 0.07577431650485877}[0m [36mstep[0m=[35m12138[0m
[2m2023-10-09 14:21:15[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_12138.pt[0m


Epoch 18/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:21:30[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=18 step=12852[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021880347521699108, 'time_algorithm_update': 0.01685719229593998, 'loss': -71.53247265588669, 'time_step': 0.01713929757350633, 'observation_error': 0.05799048035115837, 'reward_error': 6.4454952041084595e-06, 'variance': 0.06669480647421748}[0m [36mstep[0m=[35m12852[0m
[2m2023-10-09 14:21:30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_12852.pt[0m


Epoch 19/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:21:44[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=19 step=13566[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022451717312596425, 'time_algorithm_update': 0.016260535109276865, 'loss': -70.82642027243179, 'time_step': 0.016553571244247823, 'observation_error': 0.054089998019409086, 'reward_error': 5.611886308161442e-06, 'variance': 0.06156029462790088}[0m [36mstep[0m=[35m13566[0m
[2m2023-10-09 14:21:44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_13566.pt[0m


Epoch 20/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:21:58[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=20 step=14280[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002144952448142343, 'time_algorithm_update': 0.0154951109605677, 'loss': -71.80916153215894, 'time_step': 0.015772345019321815, 'observation_error': 0.05100152313159361, 'reward_error': 5.453802087180598e-06, 'variance': 0.06173878629337127}[0m [36mstep[0m=[35m14280[0m
[2m2023-10-09 14:21:58[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_14280.pt[0m


Epoch 21/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:22:12[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=21 step=14994[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021922554956430815, 'time_algorithm_update': 0.016034287230975153, 'loss': -72.16487496640502, 'time_step': 0.016319507644290014, 'observation_error': 0.05608844113107088, 'reward_error': 4.6366205587670375e-06, 'variance': 0.05806978637441349}[0m [36mstep[0m=[35m14994[0m
[2m2023-10-09 14:22:12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_14994.pt[0m


Epoch 22/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:22:26[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=22 step=15708[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022054787109545968, 'time_algorithm_update': 0.01578006657565675, 'loss': -72.73132107571728, 'time_step': 0.016065581839959493, 'observation_error': 0.043924992144548425, 'reward_error': 4.8344627186487795e-06, 'variance': 0.05204213530192001}[0m [36mstep[0m=[35m15708[0m
[2m2023-10-09 14:22:26[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_15708.pt[0m


Epoch 23/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:22:40[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=23 step=16422[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022119433939957818, 'time_algorithm_update': 0.01620010434746408, 'loss': -72.5908254308193, 'time_step': 0.016485441632631447, 'observation_error': 0.049523637596040525, 'reward_error': 7.970654582476291e-06, 'variance': 0.050025414392855856}[0m [36mstep[0m=[35m16422[0m
[2m2023-10-09 14:22:40[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_16422.pt[0m


Epoch 24/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:22:54[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=24 step=17136[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002139509559011593, 'time_algorithm_update': 0.016200258952228964, 'loss': -73.42209458417919, 'time_step': 0.016476664890428216, 'observation_error': 0.048840320839143164, 'reward_error': 6.055598290171099e-06, 'variance': 0.047643160414762555}[0m [36mstep[0m=[35m17136[0m
[2m2023-10-09 14:22:54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_17136.pt[0m


Epoch 25/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:23:08[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=25 step=17850[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021905959153375705, 'time_algorithm_update': 0.016437057353535286, 'loss': -72.47939020178231, 'time_step': 0.01672045890690566, 'observation_error': 0.04846236809314397, 'reward_error': 3.6700962584516032e-06, 'variance': 0.05095873252454056}[0m [36mstep[0m=[35m17850[0m
[2m2023-10-09 14:23:08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_17850.pt[0m


Epoch 26/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:23:23[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=26 step=18564[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002211970107562068, 'time_algorithm_update': 0.01670480375530339, 'loss': -73.47669423394511, 'time_step': 0.016991168844933603, 'observation_error': 0.04751892300576428, 'reward_error': 3.486075025882942e-06, 'variance': 0.04495312764478753}[0m [36mstep[0m=[35m18564[0m
[2m2023-10-09 14:23:23[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_18564.pt[0m


Epoch 27/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:23:37[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=27 step=19278[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021661630197733391, 'time_algorithm_update': 0.016745690538101838, 'loss': -73.8293921687022, 'time_step': 0.01702640370494511, 'observation_error': 0.04117739473869541, 'reward_error': 3.5545229237452016e-06, 'variance': 0.04390078204117206}[0m [36mstep[0m=[35m19278[0m
[2m2023-10-09 14:23:37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_19278.pt[0m


Epoch 28/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:23:51[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=28 step=19992[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000222973796833797, 'time_algorithm_update': 0.01665056052328158, 'loss': -73.26223322724094, 'time_step': 0.01693963002757866, 'observation_error': 0.03312063244738408, 'reward_error': 5.4192126561483535e-06, 'variance': 0.040335643172770004}[0m [36mstep[0m=[35m19992[0m
[2m2023-10-09 14:23:51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_19992.pt[0m


Epoch 29/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:24:06[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=29 step=20706[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022005400403874928, 'time_algorithm_update': 0.01665964313581878, 'loss': -73.95884313636802, 'time_step': 0.016943725551209862, 'observation_error': 0.04103762657138098, 'reward_error': 3.693322308972833e-06, 'variance': 0.03741244126619363}[0m [36mstep[0m=[35m20706[0m
[2m2023-10-09 14:24:06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_20706.pt[0m


Epoch 30/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:24:20[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=30 step=21420[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021996451359169157, 'time_algorithm_update': 0.01668179435890262, 'loss': -74.17946868650719, 'time_step': 0.016965969603936544, 'observation_error': 0.037039268009450704, 'reward_error': 3.5881007410806007e-06, 'variance': 0.03452560794590831}[0m [36mstep[0m=[35m21420[0m
[2m2023-10-09 14:24:20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_21420.pt[0m


Epoch 31/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:24:34[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=31 step=22134[0m [36mepoch[0m=[35m31[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002158860198589934, 'time_algorithm_update': 0.016635091030965, 'loss': -74.2455909085207, 'time_step': 0.01691271210251068, 'observation_error': 0.038131931914889165, 'reward_error': 4.303593656974647e-06, 'variance': 0.03380779957178706}[0m [36mstep[0m=[35m22134[0m
[2m2023-10-09 14:24:34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_22134.pt[0m


Epoch 32/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:24:49[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=32 step=22848[0m [36mepoch[0m=[35m32[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022058393440994563, 'time_algorithm_update': 0.01669847898456563, 'loss': -74.75730161105885, 'time_step': 0.016982162699979896, 'observation_error': 0.03411215085685449, 'reward_error': 6.178884318999235e-06, 'variance': 0.03321850525290486}[0m [36mstep[0m=[35m22848[0m
[2m2023-10-09 14:24:49[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_22848.pt[0m


Epoch 33/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:25:03[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=33 step=23562[0m [36mepoch[0m=[35m33[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022021795855182893, 'time_algorithm_update': 0.01627500417853604, 'loss': -74.47537154510242, 'time_step': 0.01656179134251357, 'observation_error': 0.03819832164768275, 'reward_error': 3.3043493850997324e-06, 'variance': 0.03013309632993327}[0m [36mstep[0m=[35m23562[0m
[2m2023-10-09 14:25:03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_23562.pt[0m


Epoch 34/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:25:17[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=34 step=24276[0m [36mepoch[0m=[35m34[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002128800758126737, 'time_algorithm_update': 0.016386020083387357, 'loss': -74.3734937168303, 'time_step': 0.01666260466856115, 'observation_error': 0.03130562543739575, 'reward_error': 5.4583741306855665e-06, 'variance': 0.02806834615150411}[0m [36mstep[0m=[35m24276[0m
[2m2023-10-09 14:25:17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_24276.pt[0m


Epoch 35/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:25:30[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=35 step=24990[0m [36mepoch[0m=[35m35[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021361737024216425, 'time_algorithm_update': 0.01612238322987276, 'loss': -75.25164205487035, 'time_step': 0.01639853305175525, 'observation_error': 0.02995236092515133, 'reward_error': 2.7947826924832596e-06, 'variance': 0.02642652570552394}[0m [36mstep[0m=[35m24990[0m
[2m2023-10-09 14:25:30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_24990.pt[0m


Epoch 36/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:25:44[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=36 step=25704[0m [36mepoch[0m=[35m36[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021580420956224285, 'time_algorithm_update': 0.01661920714445141, 'loss': -74.83223158638684, 'time_step': 0.01689768104660077, 'observation_error': 0.030539808688491314, 'reward_error': 3.2226052643556034e-06, 'variance': 0.02731358755310015}[0m [36mstep[0m=[35m25704[0m
[2m2023-10-09 14:25:44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_25704.pt[0m


Epoch 37/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:25:58[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=37 step=26418[0m [36mepoch[0m=[35m37[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021321466323040447, 'time_algorithm_update': 0.01621170437970415, 'loss': -74.97572293001063, 'time_step': 0.016488470617128687, 'observation_error': 0.035158363202307835, 'reward_error': 4.420384462705745e-06, 'variance': 0.025020476713217412}[0m [36mstep[0m=[35m26418[0m
[2m2023-10-09 14:25:58[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_26418.pt[0m


Epoch 38/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:26:12[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=38 step=27132[0m [36mepoch[0m=[35m38[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021436835537437632, 'time_algorithm_update': 0.016342551434407382, 'loss': -75.83434172237621, 'time_step': 0.016618648163076875, 'observation_error': 0.03067147796038905, 'reward_error': 2.5226656923988272e-06, 'variance': 0.024039916355679408}[0m [36mstep[0m=[35m27132[0m
[2m2023-10-09 14:26:12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_27132.pt[0m


Epoch 39/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:26:26[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=39 step=27846[0m [36mepoch[0m=[35m39[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021210137535544003, 'time_algorithm_update': 0.01626365692341695, 'loss': -75.85508594860215, 'time_step': 0.016538149502430977, 'observation_error': 0.028941452748240483, 'reward_error': 2.740149557201877e-06, 'variance': 0.024781162980646837}[0m [36mstep[0m=[35m27846[0m
[2m2023-10-09 14:26:26[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_27846.pt[0m


Epoch 40/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:26:39[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=40 step=28560[0m [36mepoch[0m=[35m40[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021862082120751133, 'time_algorithm_update': 0.016189246284527604, 'loss': -74.76908367087528, 'time_step': 0.016472445482633363, 'observation_error': 0.032324265564906, 'reward_error': 2.5256525082023014e-06, 'variance': 0.0244710363916078}[0m [36mstep[0m=[35m28560[0m
[2m2023-10-09 14:26:39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_28560.pt[0m


Epoch 41/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:26:53[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=41 step=29274[0m [36mepoch[0m=[35m41[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021863417799065427, 'time_algorithm_update': 0.015927911138668115, 'loss': -74.71584039041642, 'time_step': 0.01621232246484409, 'observation_error': 0.027162275046371396, 'reward_error': 4.716031942312234e-06, 'variance': 0.02044638152625917}[0m [36mstep[0m=[35m29274[0m
[2m2023-10-09 14:26:53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_29274.pt[0m


Epoch 42/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:27:06[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=42 step=29988[0m [36mepoch[0m=[35m42[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002151938045726103, 'time_algorithm_update': 0.016045535645898983, 'loss': -74.36973610111312, 'time_step': 0.016325993030345074, 'observation_error': 0.022045385084733606, 'reward_error': 3.082838419410702e-06, 'variance': 0.020425898158392592}[0m [36mstep[0m=[35m29988[0m
[2m2023-10-09 14:27:06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_29988.pt[0m


Epoch 43/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:27:20[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=43 step=30702[0m [36mepoch[0m=[35m43[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002168099753329066, 'time_algorithm_update': 0.01582842347334747, 'loss': -76.09072258893181, 'time_step': 0.016109520647706103, 'observation_error': 0.022503806621188394, 'reward_error': 2.867737967568659e-06, 'variance': 0.01794555728230211}[0m [36mstep[0m=[35m30702[0m
[2m2023-10-09 14:27:20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_30702.pt[0m


Epoch 44/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:27:33[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=44 step=31416[0m [36mepoch[0m=[35m44[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021501115056313052, 'time_algorithm_update': 0.015849117137470833, 'loss': -76.4718951938533, 'time_step': 0.016123550278799876, 'observation_error': 0.02152166204384241, 'reward_error': 2.584886845779016e-06, 'variance': 0.018390339254126337}[0m [36mstep[0m=[35m31416[0m
[2m2023-10-09 14:27:33[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_31416.pt[0m


Epoch 45/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:27:47[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=45 step=32130[0m [36mepoch[0m=[35m45[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022000124474533465, 'time_algorithm_update': 0.01604359390354958, 'loss': -76.8617271281758, 'time_step': 0.01632642011348607, 'observation_error': 0.020003752334077935, 'reward_error': 3.7571967762318756e-06, 'variance': 0.01557597136446622}[0m [36mstep[0m=[35m32130[0m
[2m2023-10-09 14:27:47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_32130.pt[0m


Epoch 46/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:28:00[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=46 step=32844[0m [36mepoch[0m=[35m46[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002146698847538283, 'time_algorithm_update': 0.01592510087149484, 'loss': -74.68620949299061, 'time_step': 0.016203044509353424, 'observation_error': 0.01988487822083355, 'reward_error': 5.575957655827625e-06, 'variance': 0.016410183935399163}[0m [36mstep[0m=[35m32844[0m
[2m2023-10-09 14:28:00[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_32844.pt[0m


Epoch 47/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:28:13[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=47 step=33558[0m [36mepoch[0m=[35m47[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021756763885669015, 'time_algorithm_update': 0.01610487916556393, 'loss': -74.85497194177964, 'time_step': 0.01638865303926441, 'observation_error': 0.02224674418193957, 'reward_error': 2.3599647714966362e-06, 'variance': 0.016974098827190736}[0m [36mstep[0m=[35m33558[0m
[2m2023-10-09 14:28:13[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_33558.pt[0m


Epoch 48/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:28:27[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=48 step=34272[0m [36mepoch[0m=[35m48[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002186184837704613, 'time_algorithm_update': 0.01605233291284043, 'loss': -76.2649225801313, 'time_step': 0.016335486030044342, 'observation_error': 0.01942859094473077, 'reward_error': 2.7146715971162534e-06, 'variance': 0.015043733192560041}[0m [36mstep[0m=[35m34272[0m
[2m2023-10-09 14:28:27[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_34272.pt[0m


Epoch 49/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:28:40[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=49 step=34986[0m [36mepoch[0m=[35m49[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002165468467049906, 'time_algorithm_update': 0.01589336816002341, 'loss': -76.9166685179168, 'time_step': 0.01617375475351884, 'observation_error': 0.01966918363837739, 'reward_error': 3.3463669562334654e-06, 'variance': 0.014125965026800108}[0m [36mstep[0m=[35m34986[0m
[2m2023-10-09 14:28:40[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_34986.pt[0m


Epoch 50/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:28:53[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=50 step=35700[0m [36mepoch[0m=[35m50[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021670946553975593, 'time_algorithm_update': 0.015625494200976288, 'loss': -76.34768157220688, 'time_step': 0.015905738878650825, 'observation_error': 0.01741786112468102, 'reward_error': 3.2604119338276902e-06, 'variance': 0.013658830793141659}[0m [36mstep[0m=[35m35700[0m
[2m2023-10-09 14:28:53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_35700.pt[0m


Epoch 51/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:29:07[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=51 step=36414[0m [36mepoch[0m=[35m51[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022079330198571128, 'time_algorithm_update': 0.015966469834164745, 'loss': -76.65972272221114, 'time_step': 0.016253490741847277, 'observation_error': 0.022421565311181824, 'reward_error': 2.2900096015400653e-06, 'variance': 0.013166034969807233}[0m [36mstep[0m=[35m36414[0m
[2m2023-10-09 14:29:07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_36414.pt[0m


Epoch 52/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:29:20[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=52 step=37128[0m [36mepoch[0m=[35m52[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002163454931991107, 'time_algorithm_update': 0.015482881489922018, 'loss': -75.8503636873069, 'time_step': 0.01576412224969944, 'observation_error': 0.02063457440775088, 'reward_error': 2.9222880246609853e-06, 'variance': 0.014047315053901619}[0m [36mstep[0m=[35m37128[0m
[2m2023-10-09 14:29:20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_37128.pt[0m


Epoch 53/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:29:33[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=53 step=37842[0m [36mepoch[0m=[35m53[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002240787367192971, 'time_algorithm_update': 0.015709736100097998, 'loss': -77.15028539224834, 'time_step': 0.016000440808571353, 'observation_error': 0.019644232328267335, 'reward_error': 2.649117047756873e-06, 'variance': 0.012639391046660375}[0m [36mstep[0m=[35m37842[0m
[2m2023-10-09 14:29:33[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_37842.pt[0m


Epoch 54/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:29:46[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=54 step=38556[0m [36mepoch[0m=[35m54[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002248808115470309, 'time_algorithm_update': 0.01599581067969485, 'loss': -75.04315538299518, 'time_step': 0.016291918874788685, 'observation_error': 0.02058075465941236, 'reward_error': 2.7336561025629694e-06, 'variance': 0.012528095439215707}[0m [36mstep[0m=[35m38556[0m
[2m2023-10-09 14:29:46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_38556.pt[0m


Epoch 55/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:29:59[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=55 step=39270[0m [36mepoch[0m=[35m55[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021332619236964805, 'time_algorithm_update': 0.01555023507243779, 'loss': -76.98601984243099, 'time_step': 0.01582745644224792, 'observation_error': 0.01949998339880731, 'reward_error': 2.692882806752057e-06, 'variance': 0.011881364601894389}[0m [36mstep[0m=[35m39270[0m
[2m2023-10-09 14:29:59[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_39270.pt[0m


Epoch 56/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:30:12[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=56 step=39984[0m [36mepoch[0m=[35m56[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022469081130682252, 'time_algorithm_update': 0.016037193666987058, 'loss': -77.24812147129818, 'time_step': 0.016327631573717135, 'observation_error': 0.021697531982467284, 'reward_error': 2.215902593692583e-06, 'variance': 0.010969760000542181}[0m [36mstep[0m=[35m39984[0m
[2m2023-10-09 14:30:12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_39984.pt[0m


Epoch 57/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:30:25[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=57 step=40698[0m [36mepoch[0m=[35m57[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002163909062617967, 'time_algorithm_update': 0.015577195405292244, 'loss': -77.03731651199298, 'time_step': 0.01585691582922842, 'observation_error': 0.021787597514210288, 'reward_error': 1.750998936545304e-06, 'variance': 0.011433502426059521}[0m [36mstep[0m=[35m40698[0m
[2m2023-10-09 14:30:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_40698.pt[0m


Epoch 58/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:30:38[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=58 step=41412[0m [36mepoch[0m=[35m58[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022106210724646304, 'time_algorithm_update': 0.015826685421940993, 'loss': -77.17092950270623, 'time_step': 0.016112753323146274, 'observation_error': 0.01984348364313661, 'reward_error': 2.8584964761419832e-06, 'variance': 0.011318178756402522}[0m [36mstep[0m=[35m41412[0m
[2m2023-10-09 14:30:38[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_41412.pt[0m


Epoch 59/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:30:51[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=59 step=42126[0m [36mepoch[0m=[35m59[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002208564127860617, 'time_algorithm_update': 0.015575490411924048, 'loss': -77.64275148803112, 'time_step': 0.01586056723982012, 'observation_error': 0.018434305473623513, 'reward_error': 2.417205348440051e-06, 'variance': 0.011068284534270344}[0m [36mstep[0m=[35m42126[0m
[2m2023-10-09 14:30:51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_42126.pt[0m


Epoch 60/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:31:03[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=60 step=42840[0m [36mepoch[0m=[35m60[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021453765260071313, 'time_algorithm_update': 0.015152588945810869, 'loss': -77.06214465213424, 'time_step': 0.015428626570714955, 'observation_error': 0.02138518233174706, 'reward_error': 2.5884466788438487e-06, 'variance': 0.011367841376977026}[0m [36mstep[0m=[35m42840[0m
[2m2023-10-09 14:31:03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_42840.pt[0m


Epoch 61/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:31:16[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=61 step=43554[0m [36mepoch[0m=[35m61[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022740056868694744, 'time_algorithm_update': 0.01569139590116442, 'loss': -77.7352271761213, 'time_step': 0.015985952371976624, 'observation_error': 0.021682579785406792, 'reward_error': 2.20953592182014e-06, 'variance': 0.010892397394887227}[0m [36mstep[0m=[35m43554[0m
[2m2023-10-09 14:31:16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_43554.pt[0m


Epoch 62/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:31:29[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=62 step=44268[0m [36mepoch[0m=[35m62[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00020973522122166738, 'time_algorithm_update': 0.015351896860352418, 'loss': -75.79844413952333, 'time_step': 0.01562669263834379, 'observation_error': 0.01816706124888554, 'reward_error': 3.6177831171705424e-06, 'variance': 0.010763199115834905}[0m [36mstep[0m=[35m44268[0m
[2m2023-10-09 14:31:29[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_44268.pt[0m


Epoch 63/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:31:42[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=63 step=44982[0m [36mepoch[0m=[35m63[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021491097468955843, 'time_algorithm_update': 0.01596932685007902, 'loss': -77.45006667131803, 'time_step': 0.01624862052479378, 'observation_error': 0.017886305800894814, 'reward_error': 2.6745743533801575e-06, 'variance': 0.01154676540085326}[0m [36mstep[0m=[35m44982[0m
[2m2023-10-09 14:31:42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_44982.pt[0m


Epoch 64/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:31:55[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=64 step=45696[0m [36mepoch[0m=[35m64[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002097589295117461, 'time_algorithm_update': 0.015412250486742548, 'loss': -78.43997608580175, 'time_step': 0.015684068036012622, 'observation_error': 0.017683113725735132, 'reward_error': 2.0976117477984e-06, 'variance': 0.009967241608699702}[0m [36mstep[0m=[35m45696[0m
[2m2023-10-09 14:31:55[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_45696.pt[0m


Epoch 65/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:32:08[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=65 step=46410[0m [36mepoch[0m=[35m65[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022090483112495487, 'time_algorithm_update': 0.015763702512789173, 'loss': -78.45398763715386, 'time_step': 0.016050957164176705, 'observation_error': 0.01913019244769703, 'reward_error': 1.856814888775456e-06, 'variance': 0.009191110334188868}[0m [36mstep[0m=[35m46410[0m
[2m2023-10-09 14:32:08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_46410.pt[0m


Epoch 66/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:32:23[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=66 step=47124[0m [36mepoch[0m=[35m66[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002616032832810859, 'time_algorithm_update': 0.018026312192281086, 'loss': -76.61251545620232, 'time_step': 0.018364662549742797, 'observation_error': 0.016249596120070417, 'reward_error': 3.6198003056321544e-06, 'variance': 0.009348126055445242}[0m [36mstep[0m=[35m47124[0m
[2m2023-10-09 14:32:23[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_47124.pt[0m


Epoch 67/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:32:38[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=67 step=47838[0m [36mepoch[0m=[35m67[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002706872314966026, 'time_algorithm_update': 0.018901963527796984, 'loss': -77.85269419352214, 'time_step': 0.019256180741873775, 'observation_error': 0.01844470588551856, 'reward_error': 2.202730375373139e-06, 'variance': 0.009369720052882445}[0m [36mstep[0m=[35m47838[0m
[2m2023-10-09 14:32:38[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_47838.pt[0m


Epoch 68/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:32:52[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=68 step=48552[0m [36mepoch[0m=[35m68[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024483016892975454, 'time_algorithm_update': 0.01704154595607469, 'loss': -77.09420624366996, 'time_step': 0.017361553109326616, 'observation_error': 0.018987866027262965, 'reward_error': 2.219211626312916e-06, 'variance': 0.009679752314833518}[0m [36mstep[0m=[35m48552[0m
[2m2023-10-09 14:32:52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_48552.pt[0m


Epoch 69/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:33:06[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=69 step=49266[0m [36mepoch[0m=[35m69[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022131221301081467, 'time_algorithm_update': 0.015862044166116154, 'loss': -78.03544658105247, 'time_step': 0.016150173352879972, 'observation_error': 0.016523570001008084, 'reward_error': 2.002063244025298e-06, 'variance': 0.008549231830723664}[0m [36mstep[0m=[35m49266[0m
[2m2023-10-09 14:33:06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_49266.pt[0m


Epoch 70/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:33:21[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=70 step=49980[0m [36mepoch[0m=[35m70[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024063647294244846, 'time_algorithm_update': 0.017136632895269312, 'loss': -78.37915227660278, 'time_step': 0.01745036870491605, 'observation_error': 0.016482382622584668, 'reward_error': 2.6201017388978798e-06, 'variance': 0.009483225421512136}[0m [36mstep[0m=[35m49980[0m
[2m2023-10-09 14:33:21[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_49980.pt[0m


Epoch 71/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:33:34[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=71 step=50694[0m [36mepoch[0m=[35m71[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002397095121923281, 'time_algorithm_update': 0.017002681056324506, 'loss': -79.32997630891346, 'time_step': 0.017311324592397995, 'observation_error': 0.01683281938698615, 'reward_error': 2.0159250147793147e-06, 'variance': 0.008755914436841726}[0m [36mstep[0m=[35m50694[0m
[2m2023-10-09 14:33:34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_50694.pt[0m


Epoch 72/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:33:51[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=72 step=51408[0m [36mepoch[0m=[35m72[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00029350562589843063, 'time_algorithm_update': 0.01982116966354413, 'loss': -77.57125955469468, 'time_step': 0.020206473788627388, 'observation_error': 0.01775837510933747, 'reward_error': 2.168575656837078e-06, 'variance': 0.00883976211451904}[0m [36mstep[0m=[35m51408[0m
[2m2023-10-09 14:33:51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_51408.pt[0m


Epoch 73/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:34:06[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=73 step=52122[0m [36mepoch[0m=[35m73[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000266634115651876, 'time_algorithm_update': 0.01824700899150859, 'loss': -78.55766540548714, 'time_step': 0.018593554069348078, 'observation_error': 0.018472347484710713, 'reward_error': 3.0690182032654777e-06, 'variance': 0.008510314280911202}[0m [36mstep[0m=[35m52122[0m
[2m2023-10-09 14:34:06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_52122.pt[0m


Epoch 74/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:34:21[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=74 step=52836[0m [36mepoch[0m=[35m74[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00026897055094315557, 'time_algorithm_update': 0.01863094638375675, 'loss': -78.91300733015984, 'time_step': 0.01898046334584554, 'observation_error': 0.017121927034248943, 'reward_error': 2.8658987798885904e-06, 'variance': 0.009042763614997019}[0m [36mstep[0m=[35m52836[0m
[2m2023-10-09 14:34:21[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_52836.pt[0m


Epoch 75/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:34:36[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=75 step=53550[0m [36mepoch[0m=[35m75[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002672458563198228, 'time_algorithm_update': 0.01841689925901696, 'loss': -78.96969761300822, 'time_step': 0.018759982926504954, 'observation_error': 0.018763003097055538, 'reward_error': 3.1368047553481117e-06, 'variance': 0.008335740223657597}[0m [36mstep[0m=[35m53550[0m
[2m2023-10-09 14:34:36[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_53550.pt[0m


Epoch 76/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:34:52[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=76 step=54264[0m [36mepoch[0m=[35m76[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00028675711121545786, 'time_algorithm_update': 0.01928566283538562, 'loss': -79.51584118220653, 'time_step': 0.019653787132070846, 'observation_error': 0.016446054225942395, 'reward_error': 2.2140081310474595e-06, 'variance': 0.008388305277614087}[0m [36mstep[0m=[35m54264[0m
[2m2023-10-09 14:34:52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_54264.pt[0m


Epoch 77/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:35:07[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=77 step=54978[0m [36mepoch[0m=[35m77[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025023966610264713, 'time_algorithm_update': 0.017654840685740237, 'loss': -79.5075496972776, 'time_step': 0.01797941185179211, 'observation_error': 0.018660787655961176, 'reward_error': 2.170619231137529e-06, 'variance': 0.008702528831172384}[0m [36mstep[0m=[35m54978[0m
[2m2023-10-09 14:35:07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_54978.pt[0m


Epoch 78/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:35:21[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=78 step=55692[0m [36mepoch[0m=[35m78[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002524258375835686, 'time_algorithm_update': 0.01742567301464348, 'loss': -77.94987632847634, 'time_step': 0.01775358704959645, 'observation_error': 0.019064927834546733, 'reward_error': 4.720079325927649e-06, 'variance': 0.008313402336891089}[0m [36mstep[0m=[35m55692[0m
[2m2023-10-09 14:35:21[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_55692.pt[0m


Epoch 79/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:35:34[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=79 step=56406[0m [36mepoch[0m=[35m79[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022426673344203403, 'time_algorithm_update': 0.01577648094722203, 'loss': -78.56870833030936, 'time_step': 0.016067318555687656, 'observation_error': 0.016145223148693894, 'reward_error': 2.9712107860265244e-06, 'variance': 0.008308223994817215}[0m [36mstep[0m=[35m56406[0m
[2m2023-10-09 14:35:34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_56406.pt[0m


Epoch 80/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:35:47[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=80 step=57120[0m [36mepoch[0m=[35m80[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021884922219925568, 'time_algorithm_update': 0.015411282453884264, 'loss': -79.35563443555218, 'time_step': 0.015695847383066387, 'observation_error': 0.015298669958348209, 'reward_error': 1.8614668609891167e-06, 'variance': 0.00801239841279141}[0m [36mstep[0m=[35m57120[0m
[2m2023-10-09 14:35:47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_57120.pt[0m


Epoch 81/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:35:59[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=81 step=57834[0m [36mepoch[0m=[35m81[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002182111018846015, 'time_algorithm_update': 0.015165530666917646, 'loss': -79.37695139930362, 'time_step': 0.0154483231509767, 'observation_error': 0.015234470129515437, 'reward_error': 1.9586713956539426e-06, 'variance': 0.00774204005076278}[0m [36mstep[0m=[35m57834[0m
[2m2023-10-09 14:35:59[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_57834.pt[0m


Epoch 82/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:36:12[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=82 step=58548[0m [36mepoch[0m=[35m82[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022856828545321937, 'time_algorithm_update': 0.015756976704637545, 'loss': -79.45975772339423, 'time_step': 0.016056051775187003, 'observation_error': 0.016969919257330376, 'reward_error': 1.4924656885853213e-06, 'variance': 0.0074508612662118594}[0m [36mstep[0m=[35m58548[0m
[2m2023-10-09 14:36:12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_58548.pt[0m


Epoch 83/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:36:25[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=83 step=59262[0m [36mepoch[0m=[35m83[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022024266860064338, 'time_algorithm_update': 0.015359956677220449, 'loss': -79.76956250754391, 'time_step': 0.01564558046538623, 'observation_error': 0.01576711053800631, 'reward_error': 1.834902488010447e-06, 'variance': 0.007838415951824846}[0m [36mstep[0m=[35m59262[0m
[2m2023-10-09 14:36:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_59262.pt[0m


Epoch 84/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:36:38[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=84 step=59976[0m [36mepoch[0m=[35m84[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022541975774684875, 'time_algorithm_update': 0.015938438955141382, 'loss': -79.69351170911175, 'time_step': 0.01623289090912549, 'observation_error': 0.014826495802731732, 'reward_error': 2.0543765243156394e-06, 'variance': 0.007951174912679042}[0m [36mstep[0m=[35m59976[0m
[2m2023-10-09 14:36:38[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_59976.pt[0m


Epoch 85/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:36:50[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=85 step=60690[0m [36mepoch[0m=[35m85[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022273604609385257, 'time_algorithm_update': 0.015581206781189648, 'loss': -80.00466639254273, 'time_step': 0.015868015649939785, 'observation_error': 0.016733663080009103, 'reward_error': 2.6235270934001963e-06, 'variance': 0.007886578611798376}[0m [36mstep[0m=[35m60690[0m
[2m2023-10-09 14:36:50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_60690.pt[0m


Epoch 86/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:37:03[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=86 step=61404[0m [36mepoch[0m=[35m86[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021961990858660358, 'time_algorithm_update': 0.015458270615222407, 'loss': -79.50467385898452, 'time_step': 0.01574298580821489, 'observation_error': 0.017104833967978326, 'reward_error': 1.6623971326571967e-06, 'variance': 0.007565434139350225}[0m [36mstep[0m=[35m61404[0m
[2m2023-10-09 14:37:03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_61404.pt[0m


Epoch 87/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:37:16[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=87 step=62118[0m [36mepoch[0m=[35m87[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002288264052874568, 'time_algorithm_update': 0.01602480458278282, 'loss': -79.59201155189706, 'time_step': 0.016320767522859974, 'observation_error': 0.0165153584558483, 'reward_error': 3.929436798001698e-06, 'variance': 0.00755931384316728}[0m [36mstep[0m=[35m62118[0m
[2m2023-10-09 14:37:16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_62118.pt[0m


Epoch 88/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:37:28[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=88 step=62832[0m [36mepoch[0m=[35m88[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002160329444735658, 'time_algorithm_update': 0.015399857395503367, 'loss': -80.31895729406875, 'time_step': 0.015679660631495028, 'observation_error': 0.016681306820193545, 'reward_error': 1.7467882808432764e-06, 'variance': 0.007699949183941803}[0m [36mstep[0m=[35m62832[0m
[2m2023-10-09 14:37:28[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_62832.pt[0m


Epoch 89/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:37:41[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=89 step=63546[0m [36mepoch[0m=[35m89[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022465508191191517, 'time_algorithm_update': 0.015652934710184734, 'loss': -79.47608038290542, 'time_step': 0.0159451353783701, 'observation_error': 0.01607131263919438, 'reward_error': 2.37059743555262e-06, 'variance': 0.0072786722044564075}[0m [36mstep[0m=[35m63546[0m
[2m2023-10-09 14:37:41[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_63546.pt[0m


Epoch 90/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:37:54[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=90 step=64260[0m [36mepoch[0m=[35m90[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022133926049667914, 'time_algorithm_update': 0.015288809768292083, 'loss': -81.14329152147309, 'time_step': 0.015573510268823105, 'observation_error': 0.01687997535402818, 'reward_error': 1.6830461378978475e-06, 'variance': 0.007130337713882264}[0m [36mstep[0m=[35m64260[0m
[2m2023-10-09 14:37:54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_64260.pt[0m


Epoch 91/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:38:07[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=91 step=64974[0m [36mepoch[0m=[35m91[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022210360241203415, 'time_algorithm_update': 0.015474734186124401, 'loss': -79.24797176313, 'time_step': 0.015764592074546495, 'observation_error': 0.017214858228788654, 'reward_error': 1.8717527426840974e-06, 'variance': 0.007096909811713707}[0m [36mstep[0m=[35m64974[0m
[2m2023-10-09 14:38:07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_64974.pt[0m


Epoch 92/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:38:19[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=92 step=65688[0m [36mepoch[0m=[35m92[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021866389683314731, 'time_algorithm_update': 0.015363875557394588, 'loss': -78.60821741435375, 'time_step': 0.015647133191426594, 'observation_error': 0.0157454005799621, 'reward_error': 3.459360889220461e-06, 'variance': 0.007663981027440124}[0m [36mstep[0m=[35m65688[0m
[2m2023-10-09 14:38:19[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_65688.pt[0m


Epoch 93/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:38:31[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=93 step=66402[0m [36mepoch[0m=[35m93[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021605331356785878, 'time_algorithm_update': 0.015157416087238728, 'loss': -80.12527782242505, 'time_step': 0.01543811823473591, 'observation_error': 0.015618281440032545, 'reward_error': 2.490214698947975e-06, 'variance': 0.007144535978265008}[0m [36mstep[0m=[35m66402[0m
[2m2023-10-09 14:38:31[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_66402.pt[0m


Epoch 94/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:38:44[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=94 step=67116[0m [36mepoch[0m=[35m94[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022163644892160965, 'time_algorithm_update': 0.015553194935582266, 'loss': -79.61563927252419, 'time_step': 0.015840354085970326, 'observation_error': 0.01372334345560797, 'reward_error': 1.9487540459103397e-06, 'variance': 0.006950174288997395}[0m [36mstep[0m=[35m67116[0m
[2m2023-10-09 14:38:44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_67116.pt[0m


Epoch 95/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:38:56[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=95 step=67830[0m [36mepoch[0m=[35m95[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021690814768900724, 'time_algorithm_update': 0.015267599530580664, 'loss': -79.76066259576493, 'time_step': 0.015551102929422501, 'observation_error': 0.015787799572394845, 'reward_error': 1.8732211372960605e-06, 'variance': 0.006987132669798096}[0m [36mstep[0m=[35m67830[0m
[2m2023-10-09 14:38:56[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_67830.pt[0m


Epoch 96/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:39:09[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=96 step=68544[0m [36mepoch[0m=[35m96[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022648262376544857, 'time_algorithm_update': 0.015658651747289493, 'loss': -80.06048699119846, 'time_step': 0.01595187955209855, 'observation_error': 0.01587897888670696, 'reward_error': 1.7161553912889511e-06, 'variance': 0.006562768514811601}[0m [36mstep[0m=[35m68544[0m
[2m2023-10-09 14:39:09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_68544.pt[0m


Epoch 97/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:39:22[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=97 step=69258[0m [36mepoch[0m=[35m97[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022503107535738905, 'time_algorithm_update': 0.015422136843705378, 'loss': -79.47644974337238, 'time_step': 0.01571442767017696, 'observation_error': 0.015824926785692608, 'reward_error': 2.8226312268129266e-06, 'variance': 0.006877789905172625}[0m [36mstep[0m=[35m69258[0m
[2m2023-10-09 14:39:22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_69258.pt[0m


Epoch 98/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:39:34[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=98 step=69972[0m [36mepoch[0m=[35m98[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00020974023001534597, 'time_algorithm_update': 0.015061587178740515, 'loss': -80.69826565803933, 'time_step': 0.015334935415358771, 'observation_error': 0.015027368027164634, 'reward_error': 1.7996723774879121e-06, 'variance': 0.007171505459560974}[0m [36mstep[0m=[35m69972[0m
[2m2023-10-09 14:39:34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_69972.pt[0m


Epoch 99/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:39:47[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=99 step=70686[0m [36mepoch[0m=[35m99[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022246557123520796, 'time_algorithm_update': 0.01573978719257173, 'loss': -80.53323298008168, 'time_step': 0.016032600603183778, 'observation_error': 0.015922655767322633, 'reward_error': 1.398510908182727e-06, 'variance': 0.006328641652237477}[0m [36mstep[0m=[35m70686[0m
[2m2023-10-09 14:39:47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_70686.pt[0m


Epoch 100/100:   0%|          | 0/714 [00:00<?, ?it/s]

[2m2023-10-09 14:39:59[0m [[32m[1minfo     [0m] [1mexp_0_20231009141726: epoch=100 step=71400[0m [36mepoch[0m=[35m100[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022018022563945013, 'time_algorithm_update': 0.015079978801289192, 'loss': -80.25595284309708, 'time_step': 0.015364176752854461, 'observation_error': 0.016848533697439848, 'reward_error': 1.6056682575868717e-06, 'variance': 0.007439189335735175}[0m [36mstep[0m=[35m71400[0m
[2m2023-10-09 14:39:59[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009141726/model_71400.pt[0m
[2m2023-10-09 14:39:59[0m [[32m[1mdebug    [0m] [1mRoundIterator is selected.[0m
[2m2023-10-09 14:39:59[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/exp_0_20231009143959[0m
[2m2023-10-09 14:39:59[0m [[32m[1mdebug    [0m] [1mBuilding models...[0m
[2m2023-10-09 14:39:59[0m [[32m[1mdebug    [0m] [1mModels have been built.[0m
[2m2023-10-09 14:39:59[0m [

Epoch 1/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:40:12[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=1 step=818[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002482803060256473, 'time_algorithm_update': 0.014357844599884704, 'loss': -20.832126195736066, 'time_step': 0.01469026626176531, 'observation_error': 0.053155011672498344, 'reward_error': 0.0023659467202989523, 'variance': 0.06753279589454672}[0m [36mstep[0m=[35m818[0m
[2m2023-10-09 14:40:12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_818.pt[0m


Epoch 2/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:40:25[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=2 step=1636[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002360378908936144, 'time_algorithm_update': 0.013899184089418145, 'loss': -30.825870121749514, 'time_step': 0.014204331890003606, 'observation_error': 0.039151513491114925, 'reward_error': 0.0005994327826360122, 'variance': 0.041417921560214455}[0m [36mstep[0m=[35m1636[0m
[2m2023-10-09 14:40:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_1636.pt[0m


Epoch 3/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:40:38[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=3 step=2454[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000249004888651132, 'time_algorithm_update': 0.014908224562852481, 'loss': -37.631660712231636, 'time_step': 0.015233947187297793, 'observation_error': 0.02323709749397985, 'reward_error': 0.00025317312071859385, 'variance': 0.01685703530382389}[0m [36mstep[0m=[35m2454[0m
[2m2023-10-09 14:40:39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_2454.pt[0m


Epoch 4/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:40:51[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=4 step=3272[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021756249127003266, 'time_algorithm_update': 0.013856098820935834, 'loss': -45.61129559514575, 'time_step': 0.014136454295412543, 'observation_error': 0.018552354286030477, 'reward_error': 5.045274727640145e-05, 'variance': 0.007801945948554035}[0m [36mstep[0m=[35m3272[0m
[2m2023-10-09 14:40:51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_3272.pt[0m


Epoch 5/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:41:05[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=5 step=4090[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002253484026435535, 'time_algorithm_update': 0.014735043777521781, 'loss': -53.32681165872401, 'time_step': 0.015025703131715359, 'observation_error': 0.017561547731461357, 'reward_error': 2.275654038701649e-05, 'variance': 0.007397643260805382}[0m [36mstep[0m=[35m4090[0m
[2m2023-10-09 14:41:05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_4090.pt[0m


Epoch 6/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:41:19[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=6 step=4908[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021601568515842876, 'time_algorithm_update': 0.015084466607763015, 'loss': -58.110099949288774, 'time_step': 0.015365713674456683, 'observation_error': 0.028000289960036586, 'reward_error': 1.3679670154226987e-05, 'variance': 0.013724652966363585}[0m [36mstep[0m=[35m4908[0m
[2m2023-10-09 14:41:19[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_4908.pt[0m


Epoch 7/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:41:33[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=7 step=5726[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022372552410023137, 'time_algorithm_update': 0.015165157014991368, 'loss': -60.97312417531655, 'time_step': 0.015452900259302415, 'observation_error': 0.02569078576525845, 'reward_error': 1.4333302607721317e-05, 'variance': 0.02264672709662979}[0m [36mstep[0m=[35m5726[0m
[2m2023-10-09 14:41:33[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_5726.pt[0m


Epoch 8/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:41:48[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=8 step=6544[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022361855635141684, 'time_algorithm_update': 0.01549395576374455, 'loss': -62.66415348845764, 'time_step': 0.01578296921363961, 'observation_error': 0.02368719193059644, 'reward_error': 1.4207044012547562e-05, 'variance': 0.037965103044298315}[0m [36mstep[0m=[35m6544[0m
[2m2023-10-09 14:41:48[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_6544.pt[0m


Epoch 9/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:42:03[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=9 step=7362[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024507243942223437, 'time_algorithm_update': 0.01675808866915901, 'loss': -63.990509294355995, 'time_step': 0.017076225035930905, 'observation_error': 0.044142428990940785, 'reward_error': 1.4688941813078452e-05, 'variance': 0.049668767156468493}[0m [36mstep[0m=[35m7362[0m
[2m2023-10-09 14:42:03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_7362.pt[0m


Epoch 10/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:42:18[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=10 step=8180[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022852974590870453, 'time_algorithm_update': 0.015607960066760374, 'loss': -64.73847318919772, 'time_step': 0.015907998189949465, 'observation_error': 0.04601757117356801, 'reward_error': 1.2076739857291295e-05, 'variance': 0.05854375939397773}[0m [36mstep[0m=[35m8180[0m
[2m2023-10-09 14:42:18[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_8180.pt[0m


Epoch 11/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:42:32[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=11 step=8998[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021840715758083502, 'time_algorithm_update': 0.01518292094792597, 'loss': -66.25695973328682, 'time_step': 0.015464125752157571, 'observation_error': 0.04932574107641146, 'reward_error': 1.0024674850308491e-05, 'variance': 0.04998882448296414}[0m [36mstep[0m=[35m8998[0m
[2m2023-10-09 14:42:32[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_8998.pt[0m


Epoch 12/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:42:47[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=12 step=9816[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022030780251276814, 'time_algorithm_update': 0.015572702389183138, 'loss': -67.21459557447574, 'time_step': 0.015856965538342017, 'observation_error': 0.04272566811694361, 'reward_error': 1.2584810829414893e-05, 'variance': 0.05604491363947785}[0m [36mstep[0m=[35m9816[0m
[2m2023-10-09 14:42:47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_9816.pt[0m


Epoch 13/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:43:01[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=13 step=10634[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021970213771157858, 'time_algorithm_update': 0.01557015585724474, 'loss': -68.32190392711051, 'time_step': 0.015856079483964915, 'observation_error': 0.04056650619332616, 'reward_error': 1.0004660514236118e-05, 'variance': 0.05659469226347678}[0m [36mstep[0m=[35m10634[0m
[2m2023-10-09 14:43:01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_10634.pt[0m


Epoch 14/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:43:16[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=14 step=11452[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002224526953289153, 'time_algorithm_update': 0.015881070297912747, 'loss': -68.00975417158132, 'time_step': 0.01617222164545782, 'observation_error': 0.05104027928129233, 'reward_error': 8.199265399628968e-06, 'variance': 0.06708732408771163}[0m [36mstep[0m=[35m11452[0m
[2m2023-10-09 14:43:16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_11452.pt[0m


Epoch 15/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:43:31[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=15 step=12270[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021911570961726033, 'time_algorithm_update': 0.015706203094613, 'loss': -68.8360883449284, 'time_step': 0.01599151551869213, 'observation_error': 0.06560470107362802, 'reward_error': 7.887697884016245e-06, 'variance': 0.061103405628930796}[0m [36mstep[0m=[35m12270[0m
[2m2023-10-09 14:43:31[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_12270.pt[0m


Epoch 16/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:43:46[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=16 step=13088[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023135171251366949, 'time_algorithm_update': 0.015894312438871575, 'loss': -69.89461037931932, 'time_step': 0.01619634039536082, 'observation_error': 0.06930853947924659, 'reward_error': 7.567790860021472e-06, 'variance': 0.07180743893664916}[0m [36mstep[0m=[35m13088[0m
[2m2023-10-09 14:43:46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_13088.pt[0m


Epoch 17/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:44:01[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=17 step=13906[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021967444851229417, 'time_algorithm_update': 0.01544078725474388, 'loss': -69.55576367949507, 'time_step': 0.01572566539559213, 'observation_error': 0.07590964931847556, 'reward_error': 9.152179485825118e-06, 'variance': 0.07295977153081755}[0m [36mstep[0m=[35m13906[0m
[2m2023-10-09 14:44:01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_13906.pt[0m


Epoch 18/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:44:16[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=18 step=14724[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022242792078218715, 'time_algorithm_update': 0.015862322378275156, 'loss': -70.31400053716814, 'time_step': 0.01614959548912888, 'observation_error': 0.07055196631016522, 'reward_error': 7.1737579902211045e-06, 'variance': 0.06440033503513107}[0m [36mstep[0m=[35m14724[0m
[2m2023-10-09 14:44:16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_14724.pt[0m


Epoch 19/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:44:30[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=19 step=15542[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021697635464097, 'time_algorithm_update': 0.015364639916454958, 'loss': -70.69926702641041, 'time_step': 0.01564700941584804, 'observation_error': 0.060959822739850275, 'reward_error': 6.4488993510266045e-06, 'variance': 0.06475740123506599}[0m [36mstep[0m=[35m15542[0m
[2m2023-10-09 14:44:30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_15542.pt[0m


Epoch 20/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:44:45[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=20 step=16360[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022046286202876084, 'time_algorithm_update': 0.015539964137275528, 'loss': -70.95276763823912, 'time_step': 0.015826027084387892, 'observation_error': 0.06357866875641732, 'reward_error': 5.218364418113168e-06, 'variance': 0.06646704582748864}[0m [36mstep[0m=[35m16360[0m
[2m2023-10-09 14:44:45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_16360.pt[0m


Epoch 21/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:44:59[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=21 step=17178[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021627683802746912, 'time_algorithm_update': 0.014969261467894016, 'loss': -70.6803637098858, 'time_step': 0.015250167811703857, 'observation_error': 0.07479787653294112, 'reward_error': 7.2917317940926e-06, 'variance': 0.06320108796003325}[0m [36mstep[0m=[35m17178[0m
[2m2023-10-09 14:44:59[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_17178.pt[0m


Epoch 22/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:45:13[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=22 step=17996[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002228222732730483, 'time_algorithm_update': 0.015311115819842425, 'loss': -72.02635656417436, 'time_step': 0.015600535572303828, 'observation_error': 0.06831437146828942, 'reward_error': 7.565534281780192e-06, 'variance': 0.06022489152446807}[0m [36mstep[0m=[35m17996[0m
[2m2023-10-09 14:45:13[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_17996.pt[0m


Epoch 23/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:45:27[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=23 step=18814[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021592620532495178, 'time_algorithm_update': 0.01502260631337434, 'loss': -73.56445017070817, 'time_step': 0.015299422233786735, 'observation_error': 0.06440095643896203, 'reward_error': 4.240992334535104e-06, 'variance': 0.053456014716461234}[0m [36mstep[0m=[35m18814[0m
[2m2023-10-09 14:45:27[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_18814.pt[0m


Epoch 24/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:45:42[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=24 step=19632[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021910988031214782, 'time_algorithm_update': 0.015638603557875802, 'loss': -72.27851833224588, 'time_step': 0.015923061697290696, 'observation_error': 0.06497572435290627, 'reward_error': 6.640695199586728e-06, 'variance': 0.0605819256452753}[0m [36mstep[0m=[35m19632[0m
[2m2023-10-09 14:45:42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_19632.pt[0m


Epoch 25/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:45:56[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=25 step=20450[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000216588122920477, 'time_algorithm_update': 0.01536069085370649, 'loss': -73.4185649713269, 'time_step': 0.01564036604827657, 'observation_error': 0.06598092059502547, 'reward_error': 7.134538211127609e-06, 'variance': 0.05618678151639184}[0m [36mstep[0m=[35m20450[0m
[2m2023-10-09 14:45:56[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_20450.pt[0m


Epoch 26/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:46:11[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=26 step=21268[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002203762968478401, 'time_algorithm_update': 0.015691165819144773, 'loss': -72.46063030144988, 'time_step': 0.015978419110361113, 'observation_error': 0.06147733264041288, 'reward_error': 8.408873163864383e-06, 'variance': 0.057669625209896595}[0m [36mstep[0m=[35m21268[0m
[2m2023-10-09 14:46:11[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_21268.pt[0m


Epoch 27/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:46:25[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=27 step=22086[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021237849023347962, 'time_algorithm_update': 0.01519621351237402, 'loss': -73.72959198054008, 'time_step': 0.01547022874314511, 'observation_error': 0.06115136827621448, 'reward_error': 4.649348133425149e-06, 'variance': 0.052957528334029495}[0m [36mstep[0m=[35m22086[0m
[2m2023-10-09 14:46:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_22086.pt[0m


Epoch 28/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:46:40[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=28 step=22904[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000218009598972162, 'time_algorithm_update': 0.015513035662308298, 'loss': -73.35693244654276, 'time_step': 0.015794929198938944, 'observation_error': 0.05847837866494807, 'reward_error': 5.924243439545207e-06, 'variance': 0.049414711626940604}[0m [36mstep[0m=[35m22904[0m
[2m2023-10-09 14:46:40[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_22904.pt[0m


Epoch 29/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:46:54[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=29 step=23722[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021741209519812998, 'time_algorithm_update': 0.015142637534945984, 'loss': -73.61635200784958, 'time_step': 0.015423821936609693, 'observation_error': 0.05840007003992267, 'reward_error': 4.005257849450906e-06, 'variance': 0.04746555984954542}[0m [36mstep[0m=[35m23722[0m
[2m2023-10-09 14:46:54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_23722.pt[0m


Epoch 30/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:47:08[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=30 step=24540[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021863683220226782, 'time_algorithm_update': 0.015063154376806432, 'loss': -74.68046411966637, 'time_step': 0.01534502372182086, 'observation_error': 0.046528524966164664, 'reward_error': 5.849292531507663e-06, 'variance': 0.04384800999723189}[0m [36mstep[0m=[35m24540[0m
[2m2023-10-09 14:47:08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_24540.pt[0m


Epoch 31/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:47:22[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=31 step=25358[0m [36mepoch[0m=[35m31[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021893354383249446, 'time_algorithm_update': 0.014936946423537574, 'loss': -74.12721862186721, 'time_step': 0.015220370444225508, 'observation_error': 0.048192873769561, 'reward_error': 4.363928337354739e-06, 'variance': 0.04413172007118826}[0m [36mstep[0m=[35m25358[0m
[2m2023-10-09 14:47:22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_25358.pt[0m


Epoch 32/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:47:36[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=32 step=26176[0m [36mepoch[0m=[35m32[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002191431073512891, 'time_algorithm_update': 0.015081731206338971, 'loss': -74.70487814719053, 'time_step': 0.01536405290543013, 'observation_error': 0.05315350465675275, 'reward_error': 5.0231815218856126e-06, 'variance': 0.04074516983533363}[0m [36mstep[0m=[35m26176[0m
[2m2023-10-09 14:47:36[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_26176.pt[0m


Epoch 33/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:47:51[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=33 step=26994[0m [36mepoch[0m=[35m33[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022226440877378133, 'time_algorithm_update': 0.015528594077653581, 'loss': -74.37770028918763, 'time_step': 0.015818235635174517, 'observation_error': 0.04627935180027489, 'reward_error': 3.4152791545555983e-06, 'variance': 0.04381816856008207}[0m [36mstep[0m=[35m26994[0m
[2m2023-10-09 14:47:51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_26994.pt[0m


Epoch 34/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:48:05[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=34 step=27812[0m [36mepoch[0m=[35m34[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002116754760369112, 'time_algorithm_update': 0.015212632041688653, 'loss': -73.91773940881482, 'time_step': 0.01548708388741267, 'observation_error': 0.04849470450079957, 'reward_error': 5.526661095612986e-06, 'variance': 0.038819683179287114}[0m [36mstep[0m=[35m27812[0m
[2m2023-10-09 14:48:05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_27812.pt[0m


Epoch 35/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:48:20[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=35 step=28630[0m [36mepoch[0m=[35m35[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021852665833564143, 'time_algorithm_update': 0.015637577891641257, 'loss': -75.57645570386593, 'time_step': 0.015919668458784706, 'observation_error': 0.0529332503821675, 'reward_error': 4.1450914968894125e-06, 'variance': 0.039179762711239206}[0m [36mstep[0m=[35m28630[0m
[2m2023-10-09 14:48:20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_28630.pt[0m


Epoch 36/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:48:34[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=36 step=29448[0m [36mepoch[0m=[35m36[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002155027063085281, 'time_algorithm_update': 0.015328288369773362, 'loss': -75.12338365727356, 'time_step': 0.015607670058831026, 'observation_error': 0.0514213332808232, 'reward_error': 3.3821911246821728e-06, 'variance': 0.0373956882361203}[0m [36mstep[0m=[35m29448[0m
[2m2023-10-09 14:48:34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_29448.pt[0m


Epoch 37/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:48:48[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=37 step=30266[0m [36mepoch[0m=[35m37[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022078055915739252, 'time_algorithm_update': 0.015675466043150513, 'loss': -75.31500477196244, 'time_step': 0.015963497255134115, 'observation_error': 0.050095095100290754, 'reward_error': 5.053777044804439e-06, 'variance': 0.035009543429536855}[0m [36mstep[0m=[35m30266[0m
[2m2023-10-09 14:48:48[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_30266.pt[0m


Epoch 38/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:49:03[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=38 step=31084[0m [36mepoch[0m=[35m38[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002209318296250621, 'time_algorithm_update': 0.015341562280445052, 'loss': -75.34931142639122, 'time_step': 0.01562726730822351, 'observation_error': 0.04024190409893291, 'reward_error': 5.124197511599412e-06, 'variance': 0.031827427797476555}[0m [36mstep[0m=[35m31084[0m
[2m2023-10-09 14:49:03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_31084.pt[0m


Epoch 39/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:49:17[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=39 step=31902[0m [36mepoch[0m=[35m39[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022442912122731104, 'time_algorithm_update': 0.01553193222922626, 'loss': -76.02111677785375, 'time_step': 0.015822331304946563, 'observation_error': 0.048496750697433005, 'reward_error': 3.1705577733855264e-06, 'variance': 0.030941938958905853}[0m [36mstep[0m=[35m31902[0m
[2m2023-10-09 14:49:17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_31902.pt[0m


Epoch 40/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:49:31[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=40 step=32720[0m [36mepoch[0m=[35m40[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021505180955807562, 'time_algorithm_update': 0.01534325744237177, 'loss': -76.36415304123335, 'time_step': 0.015620806397902061, 'observation_error': 0.03697978415349132, 'reward_error': 3.671009043638792e-06, 'variance': 0.030242675071637186}[0m [36mstep[0m=[35m32720[0m
[2m2023-10-09 14:49:31[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_32720.pt[0m


Epoch 41/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:49:46[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=41 step=33538[0m [36mepoch[0m=[35m41[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002295294717354996, 'time_algorithm_update': 0.015625418835572332, 'loss': -75.50344588762391, 'time_step': 0.015922652771537052, 'observation_error': 0.04475499857105756, 'reward_error': 4.219450555112899e-06, 'variance': 0.03495717617676415}[0m [36mstep[0m=[35m33538[0m
[2m2023-10-09 14:49:46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_33538.pt[0m


Epoch 42/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:50:00[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=42 step=34356[0m [36mepoch[0m=[35m42[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021239743547509527, 'time_algorithm_update': 0.015090286877452598, 'loss': -76.4193704367267, 'time_step': 0.015364574336772442, 'observation_error': 0.043617109058775236, 'reward_error': 3.6653293303535166e-06, 'variance': 0.0271642301449103}[0m [36mstep[0m=[35m34356[0m
[2m2023-10-09 14:50:00[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_34356.pt[0m


Epoch 43/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:50:14[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=43 step=35174[0m [36mepoch[0m=[35m43[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022326413460057638, 'time_algorithm_update': 0.015486619291795204, 'loss': -76.02855749410055, 'time_step': 0.01577643456260849, 'observation_error': 0.03932731106990635, 'reward_error': 5.343086232971882e-06, 'variance': 0.026115193203074646}[0m [36mstep[0m=[35m35174[0m
[2m2023-10-09 14:50:14[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_35174.pt[0m


Epoch 44/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:50:28[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=44 step=35992[0m [36mepoch[0m=[35m44[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021785861996974804, 'time_algorithm_update': 0.0150547668811453, 'loss': -77.56904991215191, 'time_step': 0.015336398390511137, 'observation_error': 0.038150177177664744, 'reward_error': 3.1112822727347293e-06, 'variance': 0.025623892597898938}[0m [36mstep[0m=[35m35992[0m
[2m2023-10-09 14:50:28[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_35992.pt[0m


Epoch 45/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:50:42[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=45 step=36810[0m [36mepoch[0m=[35m45[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022271909457255692, 'time_algorithm_update': 0.015204846421780388, 'loss': -76.39597844377998, 'time_step': 0.015491910843511083, 'observation_error': 0.03744820829205662, 'reward_error': 3.505814643840841e-06, 'variance': 0.02512276436682103}[0m [36mstep[0m=[35m36810[0m
[2m2023-10-09 14:50:42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_36810.pt[0m


Epoch 46/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:50:55[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=46 step=37628[0m [36mepoch[0m=[35m46[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021269968494517879, 'time_algorithm_update': 0.014748886628372454, 'loss': -77.2756316131368, 'time_step': 0.015022009101065563, 'observation_error': 0.038688476338295676, 'reward_error': 3.6871083947971927e-06, 'variance': 0.023618014831487087}[0m [36mstep[0m=[35m37628[0m
[2m2023-10-09 14:50:55[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_37628.pt[0m


Epoch 47/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:51:09[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=47 step=38446[0m [36mepoch[0m=[35m47[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021832292412195928, 'time_algorithm_update': 0.01515604843428782, 'loss': -75.95309432386478, 'time_step': 0.015441406035481572, 'observation_error': 0.037603820326902196, 'reward_error': 3.984736134845748e-06, 'variance': 0.022564991809565057}[0m [36mstep[0m=[35m38446[0m
[2m2023-10-09 14:51:09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_38446.pt[0m


Epoch 48/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:51:23[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=48 step=39264[0m [36mepoch[0m=[35m48[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021937365636848878, 'time_algorithm_update': 0.01486080724627581, 'loss': -75.93601017301415, 'time_step': 0.015145483984632422, 'observation_error': 0.038198956610233495, 'reward_error': 3.2549821203967563e-06, 'variance': 0.02213477121056027}[0m [36mstep[0m=[35m39264[0m
[2m2023-10-09 14:51:23[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_39264.pt[0m


Epoch 49/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:51:37[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=49 step=40082[0m [36mepoch[0m=[35m49[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022233465190038705, 'time_algorithm_update': 0.014886774760295244, 'loss': -77.20714872682007, 'time_step': 0.015174540155965716, 'observation_error': 0.042176495155406675, 'reward_error': 4.846250727831513e-06, 'variance': 0.021579833778518336}[0m [36mstep[0m=[35m40082[0m
[2m2023-10-09 14:51:37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_40082.pt[0m


Epoch 50/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:51:51[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=50 step=40900[0m [36mepoch[0m=[35m50[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022787977838865993, 'time_algorithm_update': 0.015003954577271104, 'loss': -77.5151701113414, 'time_step': 0.015295403219376917, 'observation_error': 0.0362718191967521, 'reward_error': 3.7874918521825185e-06, 'variance': 0.020993746798365322}[0m [36mstep[0m=[35m40900[0m
[2m2023-10-09 14:51:51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_40900.pt[0m


Epoch 51/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:52:04[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=51 step=41718[0m [36mepoch[0m=[35m51[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022121047040943995, 'time_algorithm_update': 0.014703533177270866, 'loss': -77.8768772554281, 'time_step': 0.014989500523779387, 'observation_error': 0.03634032056595774, 'reward_error': 2.692205050033856e-06, 'variance': 0.018846238848897977}[0m [36mstep[0m=[35m41718[0m
[2m2023-10-09 14:52:04[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_41718.pt[0m


Epoch 52/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:52:18[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=52 step=42536[0m [36mepoch[0m=[35m52[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023190957701293646, 'time_algorithm_update': 0.015124791408809297, 'loss': -76.96976389336994, 'time_step': 0.015426168523382732, 'observation_error': 0.02983076669022784, 'reward_error': 2.8373231889284148e-06, 'variance': 0.01852782842433711}[0m [36mstep[0m=[35m42536[0m
[2m2023-10-09 14:52:18[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_42536.pt[0m


Epoch 53/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:52:32[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=53 step=43354[0m [36mepoch[0m=[35m53[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021157521198897608, 'time_algorithm_update': 0.014590609627422901, 'loss': -77.7985048573874, 'time_step': 0.01486485686453747, 'observation_error': 0.03084935501827452, 'reward_error': 4.5618991194436955e-06, 'variance': 0.01803926480244316}[0m [36mstep[0m=[35m43354[0m
[2m2023-10-09 14:52:32[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_43354.pt[0m


Epoch 54/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:52:45[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=54 step=44172[0m [36mepoch[0m=[35m54[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021944419096035013, 'time_algorithm_update': 0.014926205345937266, 'loss': -78.13204206872395, 'time_step': 0.015210263303556185, 'observation_error': 0.03437610495749326, 'reward_error': 3.7431735626985333e-06, 'variance': 0.017588121441479473}[0m [36mstep[0m=[35m44172[0m
[2m2023-10-09 14:52:45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_44172.pt[0m


Epoch 55/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:52:59[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=55 step=44990[0m [36mepoch[0m=[35m55[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022158238007561793, 'time_algorithm_update': 0.014678182986662848, 'loss': -77.39011340152955, 'time_step': 0.014964084170558343, 'observation_error': 0.034361653021636525, 'reward_error': 4.437073729739867e-06, 'variance': 0.019082804813394097}[0m [36mstep[0m=[35m44990[0m
[2m2023-10-09 14:52:59[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_44990.pt[0m


Epoch 56/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:53:12[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=56 step=45808[0m [36mepoch[0m=[35m56[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022189599669067083, 'time_algorithm_update': 0.014852393810206928, 'loss': -78.63993914844355, 'time_step': 0.015138784947197129, 'observation_error': 0.02906062672703301, 'reward_error': 3.6454035753611247e-06, 'variance': 0.01851561764674268}[0m [36mstep[0m=[35m45808[0m
[2m2023-10-09 14:53:12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_45808.pt[0m


Epoch 57/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:53:26[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=57 step=46626[0m [36mepoch[0m=[35m57[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022168031240150807, 'time_algorithm_update': 0.01470120757599623, 'loss': -77.29206995276192, 'time_step': 0.014990116098399267, 'observation_error': 0.03403982352670885, 'reward_error': 4.0497376242267885e-06, 'variance': 0.018310943766761355}[0m [36mstep[0m=[35m46626[0m
[2m2023-10-09 14:53:26[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_46626.pt[0m


Epoch 58/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:53:39[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=58 step=47444[0m [36mepoch[0m=[35m58[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022073275885546993, 'time_algorithm_update': 0.01495327489008822, 'loss': -78.86584976424798, 'time_step': 0.015237646172856934, 'observation_error': 0.030387392132235368, 'reward_error': 2.555223782072515e-06, 'variance': 0.017727835339771045}[0m [36mstep[0m=[35m47444[0m
[2m2023-10-09 14:53:39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_47444.pt[0m


Epoch 59/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:53:53[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=59 step=48262[0m [36mepoch[0m=[35m59[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022247105764001973, 'time_algorithm_update': 0.014896980707686221, 'loss': -77.93045247271475, 'time_step': 0.01518594169383527, 'observation_error': 0.030641470766963046, 'reward_error': 2.8687281440702683e-06, 'variance': 0.015471856941973994}[0m [36mstep[0m=[35m48262[0m
[2m2023-10-09 14:53:53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_48262.pt[0m


Epoch 60/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:54:07[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=60 step=49080[0m [36mepoch[0m=[35m60[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021805156996897206, 'time_algorithm_update': 0.014751718213330853, 'loss': -77.71151802184238, 'time_step': 0.015034931213173714, 'observation_error': 0.03134401337187206, 'reward_error': 4.338253927451949e-06, 'variance': 0.015302304713372558}[0m [36mstep[0m=[35m49080[0m
[2m2023-10-09 14:54:07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_49080.pt[0m


Epoch 61/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:54:20[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=61 step=49898[0m [36mepoch[0m=[35m61[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002207219746410118, 'time_algorithm_update': 0.015244258645111308, 'loss': -77.95499974416346, 'time_step': 0.01553252915006978, 'observation_error': 0.030199434633123818, 'reward_error': 2.600015425238628e-06, 'variance': 0.014836366478908349}[0m [36mstep[0m=[35m49898[0m
[2m2023-10-09 14:54:20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_49898.pt[0m


Epoch 62/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:54:34[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=62 step=50716[0m [36mepoch[0m=[35m62[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021159736334840358, 'time_algorithm_update': 0.014407926200363047, 'loss': -78.53714395443792, 'time_step': 0.014681206647224707, 'observation_error': 0.0287231294806385, 'reward_error': 2.5705866935276667e-06, 'variance': 0.015248411095262315}[0m [36mstep[0m=[35m50716[0m
[2m2023-10-09 14:54:34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_50716.pt[0m


Epoch 63/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:54:47[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=63 step=51534[0m [36mepoch[0m=[35m63[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002215211723719366, 'time_algorithm_update': 0.014666512426362353, 'loss': -79.44530436462179, 'time_step': 0.014951325570458597, 'observation_error': 0.03266287838885661, 'reward_error': 2.201361248800683e-06, 'variance': 0.014031783665584361}[0m [36mstep[0m=[35m51534[0m
[2m2023-10-09 14:54:47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_51534.pt[0m


Epoch 64/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:55:00[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=64 step=52352[0m [36mepoch[0m=[35m64[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021756598885310017, 'time_algorithm_update': 0.014357929707739348, 'loss': -76.69433598646616, 'time_step': 0.01464043298968476, 'observation_error': 0.029263593263392434, 'reward_error': 4.40777967252377e-06, 'variance': 0.016686693191934363}[0m [36mstep[0m=[35m52352[0m
[2m2023-10-09 14:55:00[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_52352.pt[0m


Epoch 65/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:55:14[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=65 step=53170[0m [36mepoch[0m=[35m65[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022236729600901708, 'time_algorithm_update': 0.014860490423542946, 'loss': -79.51384817300624, 'time_step': 0.015147738468384684, 'observation_error': 0.025926696687782077, 'reward_error': 2.381256036583759e-06, 'variance': 0.013936255544144151}[0m [36mstep[0m=[35m53170[0m
[2m2023-10-09 14:55:14[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_53170.pt[0m


Epoch 66/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:55:27[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=66 step=53988[0m [36mepoch[0m=[35m66[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021765984066541153, 'time_algorithm_update': 0.014624489548736797, 'loss': -79.30992656232092, 'time_step': 0.014908428879996676, 'observation_error': 0.03036504185493789, 'reward_error': 2.5987530656267505e-06, 'variance': 0.014593105779807025}[0m [36mstep[0m=[35m53988[0m
[2m2023-10-09 14:55:27[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_53988.pt[0m


Epoch 67/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:55:42[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=67 step=54806[0m [36mepoch[0m=[35m67[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002452930786207428, 'time_algorithm_update': 0.016100131503527206, 'loss': -78.86836526329768, 'time_step': 0.01641653627521543, 'observation_error': 0.029927373203726588, 'reward_error': 2.961448525449965e-06, 'variance': 0.013439069228604867}[0m [36mstep[0m=[35m54806[0m
[2m2023-10-09 14:55:42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_54806.pt[0m


Epoch 68/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:55:57[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=68 step=55624[0m [36mepoch[0m=[35m68[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002536948560794, 'time_algorithm_update': 0.01650865270339481, 'loss': -79.00480710498279, 'time_step': 0.016837853674200754, 'observation_error': 0.028934470428580737, 'reward_error': 2.792317316358357e-06, 'variance': 0.012705679737527847}[0m [36mstep[0m=[35m55624[0m
[2m2023-10-09 14:55:57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_55624.pt[0m


Epoch 69/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:56:10[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=69 step=56442[0m [36mepoch[0m=[35m69[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002243343950192328, 'time_algorithm_update': 0.014848961223891429, 'loss': -79.67155399765537, 'time_step': 0.015139957803385765, 'observation_error': 0.028892280320234998, 'reward_error': 2.5632211106512154e-06, 'variance': 0.012822227619290793}[0m [36mstep[0m=[35m56442[0m
[2m2023-10-09 14:56:10[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_56442.pt[0m


Epoch 70/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:56:23[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=70 step=57260[0m [36mepoch[0m=[35m70[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002187193168696098, 'time_algorithm_update': 0.014460111014125983, 'loss': -79.62056163820486, 'time_step': 0.014743357823938496, 'observation_error': 0.027543584374782402, 'reward_error': 3.844471579292278e-06, 'variance': 0.012659555175742689}[0m [36mstep[0m=[35m57260[0m
[2m2023-10-09 14:56:23[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_57260.pt[0m


Epoch 71/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:56:37[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=71 step=58078[0m [36mepoch[0m=[35m71[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022091405024446894, 'time_algorithm_update': 0.014736102087864958, 'loss': -79.1165954629483, 'time_step': 0.015020533703941588, 'observation_error': 0.02856751148984723, 'reward_error': 2.712193834042651e-06, 'variance': 0.01402898982371358}[0m [36mstep[0m=[35m58078[0m
[2m2023-10-09 14:56:37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_58078.pt[0m


Epoch 72/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:56:50[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=72 step=58896[0m [36mepoch[0m=[35m72[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022297791571955226, 'time_algorithm_update': 0.014745733557237099, 'loss': -80.43479118487072, 'time_step': 0.015034078385835755, 'observation_error': 0.030473346791168456, 'reward_error': 2.076226094771375e-06, 'variance': 0.012275970783050094}[0m [36mstep[0m=[35m58896[0m
[2m2023-10-09 14:56:50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_58896.pt[0m


Epoch 73/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:57:04[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=73 step=59714[0m [36mepoch[0m=[35m73[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022033432585103006, 'time_algorithm_update': 0.014531452090349641, 'loss': -78.84425269945326, 'time_step': 0.01481802189554154, 'observation_error': 0.030735723180160747, 'reward_error': 3.0177247269728453e-06, 'variance': 0.013304106398090121}[0m [36mstep[0m=[35m59714[0m
[2m2023-10-09 14:57:04[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_59714.pt[0m


Epoch 74/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:57:17[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=74 step=60532[0m [36mepoch[0m=[35m74[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021850159232365764, 'time_algorithm_update': 0.014989766631557773, 'loss': -79.14525535112489, 'time_step': 0.015276535798984518, 'observation_error': 0.031732650628847316, 'reward_error': 2.941370220058554e-06, 'variance': 0.012331412140387894}[0m [36mstep[0m=[35m60532[0m
[2m2023-10-09 14:57:17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_60532.pt[0m


Epoch 75/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:57:30[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=75 step=61350[0m [36mepoch[0m=[35m75[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021790438001488124, 'time_algorithm_update': 0.014510565690131526, 'loss': -79.56946899255506, 'time_step': 0.01479227210606806, 'observation_error': 0.02867808109503753, 'reward_error': 5.040587095379736e-06, 'variance': 0.012942908421747397}[0m [36mstep[0m=[35m61350[0m
[2m2023-10-09 14:57:30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_61350.pt[0m


Epoch 76/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:57:44[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=76 step=62168[0m [36mepoch[0m=[35m76[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022563024954574325, 'time_algorithm_update': 0.015061546071525891, 'loss': -78.93387866661426, 'time_step': 0.015354182726013632, 'observation_error': 0.02485799616749372, 'reward_error': 3.2090006241756296e-06, 'variance': 0.011811169851828085}[0m [36mstep[0m=[35m62168[0m
[2m2023-10-09 14:57:44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_62168.pt[0m


Epoch 77/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:57:57[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=77 step=62986[0m [36mepoch[0m=[35m77[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002137906393969846, 'time_algorithm_update': 0.014316336158435328, 'loss': -80.47788976282246, 'time_step': 0.014593900853089424, 'observation_error': 0.023672210022805274, 'reward_error': 3.096255465110908e-06, 'variance': 0.01102389020630477}[0m [36mstep[0m=[35m62986[0m
[2m2023-10-09 14:57:57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_62986.pt[0m


Epoch 78/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:58:11[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=78 step=63804[0m [36mepoch[0m=[35m78[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022591967454457925, 'time_algorithm_update': 0.01478063302401517, 'loss': -80.11011308562784, 'time_step': 0.015073859604180296, 'observation_error': 0.0266667886363465, 'reward_error': 2.6001044210138695e-06, 'variance': 0.011622629386444329}[0m [36mstep[0m=[35m63804[0m
[2m2023-10-09 14:58:11[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_63804.pt[0m


Epoch 79/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:58:24[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=79 step=64622[0m [36mepoch[0m=[35m79[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021695624353833186, 'time_algorithm_update': 0.014367963107698995, 'loss': -80.97303184845045, 'time_step': 0.01464874995075403, 'observation_error': 0.029979742851738527, 'reward_error': 2.6231308845384806e-06, 'variance': 0.011259429872973495}[0m [36mstep[0m=[35m64622[0m
[2m2023-10-09 14:58:24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_64622.pt[0m


Epoch 80/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:58:37[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=80 step=65440[0m [36mepoch[0m=[35m80[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002203098427695575, 'time_algorithm_update': 0.01444661267520746, 'loss': -79.82722175616799, 'time_step': 0.014732784921790684, 'observation_error': 0.027928211841981715, 'reward_error': 2.339963895848007e-06, 'variance': 0.01130857653545973}[0m [36mstep[0m=[35m65440[0m
[2m2023-10-09 14:58:37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_65440.pt[0m


Epoch 81/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:58:51[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=81 step=66258[0m [36mepoch[0m=[35m81[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002269975130598819, 'time_algorithm_update': 0.014896926203683419, 'loss': -80.67112231779215, 'time_step': 0.015192138245169866, 'observation_error': 0.025766124396084195, 'reward_error': 3.0828701818747977e-06, 'variance': 0.010221519955633794}[0m [36mstep[0m=[35m66258[0m
[2m2023-10-09 14:58:51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_66258.pt[0m


Epoch 82/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:59:04[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=82 step=67076[0m [36mepoch[0m=[35m82[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022161473271899236, 'time_algorithm_update': 0.014550570170861876, 'loss': -80.34592939763897, 'time_step': 0.014836466399848024, 'observation_error': 0.02801721791205474, 'reward_error': 2.6260896942206235e-06, 'variance': 0.009759137738736708}[0m [36mstep[0m=[35m67076[0m
[2m2023-10-09 14:59:04[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_67076.pt[0m


Epoch 83/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:59:17[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=83 step=67894[0m [36mepoch[0m=[35m83[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002199571698102508, 'time_algorithm_update': 0.014662351468373044, 'loss': -80.61334882447072, 'time_step': 0.014949425508457174, 'observation_error': 0.027458316907222252, 'reward_error': 2.146752626127571e-06, 'variance': 0.01077390448866539}[0m [36mstep[0m=[35m67894[0m
[2m2023-10-09 14:59:17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_67894.pt[0m


Epoch 84/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:59:30[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=84 step=68712[0m [36mepoch[0m=[35m84[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021246709567118973, 'time_algorithm_update': 0.01426287822443582, 'loss': -80.80892885693127, 'time_step': 0.014538965773174407, 'observation_error': 0.02604139068463694, 'reward_error': 2.314297180592554e-06, 'variance': 0.01098818791247543}[0m [36mstep[0m=[35m68712[0m
[2m2023-10-09 14:59:30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_68712.pt[0m


Epoch 85/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:59:44[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=85 step=69530[0m [36mepoch[0m=[35m85[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002251382561942476, 'time_algorithm_update': 0.014841665848543125, 'loss': -81.00968369295079, 'time_step': 0.015132182384761446, 'observation_error': 0.027257837086985117, 'reward_error': 2.4061760483483142e-06, 'variance': 0.01022376008933206}[0m [36mstep[0m=[35m69530[0m
[2m2023-10-09 14:59:44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_69530.pt[0m


Epoch 86/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 14:59:57[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=86 step=70348[0m [36mepoch[0m=[35m86[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022058440304035664, 'time_algorithm_update': 0.014575850118343288, 'loss': -81.0931488400858, 'time_step': 0.014860098694239385, 'observation_error': 0.025994722418682604, 'reward_error': 1.8421364718978043e-06, 'variance': 0.009893201525852137}[0m [36mstep[0m=[35m70348[0m
[2m2023-10-09 14:59:57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_70348.pt[0m


Epoch 87/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:00:11[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=87 step=71166[0m [36mepoch[0m=[35m87[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002236730603542188, 'time_algorithm_update': 0.01492523185198348, 'loss': -80.82536233841353, 'time_step': 0.015214562707541917, 'observation_error': 0.029212119954541267, 'reward_error': 2.0720097846937485e-06, 'variance': 0.010642858162557228}[0m [36mstep[0m=[35m71166[0m
[2m2023-10-09 15:00:11[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_71166.pt[0m


Epoch 88/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:00:24[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=88 step=71984[0m [36mepoch[0m=[35m88[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002149838981535149, 'time_algorithm_update': 0.014450627609103699, 'loss': -81.31107494300619, 'time_step': 0.014728817496731113, 'observation_error': 0.025597608879975003, 'reward_error': 2.946247147828435e-06, 'variance': 0.010107112147669696}[0m [36mstep[0m=[35m71984[0m
[2m2023-10-09 15:00:24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_71984.pt[0m


Epoch 89/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:00:37[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=89 step=72802[0m [36mepoch[0m=[35m89[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021227239688043198, 'time_algorithm_update': 0.014267719753797014, 'loss': -80.12440202288639, 'time_step': 0.01454316170699439, 'observation_error': 0.02761743825318329, 'reward_error': 1.788884899000886e-06, 'variance': 0.00982686677328401}[0m [36mstep[0m=[35m72802[0m
[2m2023-10-09 15:00:37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_72802.pt[0m


Epoch 90/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:00:50[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=90 step=73620[0m [36mepoch[0m=[35m90[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022471854622614705, 'time_algorithm_update': 0.015016454939154366, 'loss': -81.38340465713539, 'time_step': 0.015306410113290352, 'observation_error': 0.027749222568368852, 'reward_error': 1.8181286351166024e-06, 'variance': 0.009046273641129018}[0m [36mstep[0m=[35m73620[0m
[2m2023-10-09 15:00:50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_73620.pt[0m


Epoch 91/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:01:03[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=91 step=74438[0m [36mepoch[0m=[35m91[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002180670176275202, 'time_algorithm_update': 0.014572554229232676, 'loss': -80.62292291366091, 'time_step': 0.014854380145924016, 'observation_error': 0.026021543251981867, 'reward_error': 2.324157332010993e-06, 'variance': 0.009918665622436247}[0m [36mstep[0m=[35m74438[0m
[2m2023-10-09 15:01:04[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_74438.pt[0m


Epoch 92/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:01:17[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=92 step=75256[0m [36mepoch[0m=[35m92[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021984262396479003, 'time_algorithm_update': 0.014922013492630863, 'loss': -82.00721407869334, 'time_step': 0.015206547121546963, 'observation_error': 0.02406391058429812, 'reward_error': 2.1816154353674863e-06, 'variance': 0.009653386159489701}[0m [36mstep[0m=[35m75256[0m
[2m2023-10-09 15:01:17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_75256.pt[0m


Epoch 93/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:01:30[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=93 step=76074[0m [36mepoch[0m=[35m93[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00020939417748113133, 'time_algorithm_update': 0.014536475202565088, 'loss': -81.68361053839872, 'time_step': 0.014808792356756905, 'observation_error': 0.026210092152098758, 'reward_error': 3.958164897029148e-06, 'variance': 0.00890186292702467}[0m [36mstep[0m=[35m76074[0m
[2m2023-10-09 15:01:30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_76074.pt[0m


Epoch 94/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:01:44[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=94 step=76892[0m [36mepoch[0m=[35m94[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022903135761363582, 'time_algorithm_update': 0.01508334767264667, 'loss': -81.29712571724703, 'time_step': 0.015377315448956852, 'observation_error': 0.023467777686859405, 'reward_error': 2.2612355169039257e-06, 'variance': 0.00981585454201109}[0m [36mstep[0m=[35m76892[0m
[2m2023-10-09 15:01:44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_76892.pt[0m


Epoch 95/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:01:57[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=95 step=77710[0m [36mepoch[0m=[35m95[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002156991538908196, 'time_algorithm_update': 0.014775861446315326, 'loss': -81.88363682903113, 'time_step': 0.015056871843221426, 'observation_error': 0.02557452391426511, 'reward_error': 2.6785498308626433e-06, 'variance': 0.009148001993829804}[0m [36mstep[0m=[35m77710[0m
[2m2023-10-09 15:01:57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_77710.pt[0m


Epoch 96/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:02:11[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=96 step=78528[0m [36mepoch[0m=[35m96[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022733240663859547, 'time_algorithm_update': 0.015165898794066934, 'loss': -79.47840345751102, 'time_step': 0.01546242155480793, 'observation_error': 0.024300476625362288, 'reward_error': 2.4437888422172027e-06, 'variance': 0.00958491100789589}[0m [36mstep[0m=[35m78528[0m
[2m2023-10-09 15:02:11[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_78528.pt[0m


Epoch 97/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:02:25[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=97 step=79346[0m [36mepoch[0m=[35m97[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021622699746875717, 'time_algorithm_update': 0.01463748540155462, 'loss': -81.7726337041132, 'time_step': 0.014917755768176687, 'observation_error': 0.027115723458227443, 'reward_error': 2.01277623468604e-06, 'variance': 0.010053477894046705}[0m [36mstep[0m=[35m79346[0m
[2m2023-10-09 15:02:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_79346.pt[0m


Epoch 98/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:02:38[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=98 step=80164[0m [36mepoch[0m=[35m98[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002202489265311318, 'time_algorithm_update': 0.01486362484090194, 'loss': -81.62864247161194, 'time_step': 0.015150050370792305, 'observation_error': 0.023247890816309208, 'reward_error': 2.180774630069858e-06, 'variance': 0.00855744780362989}[0m [36mstep[0m=[35m80164[0m
[2m2023-10-09 15:02:38[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_80164.pt[0m


Epoch 99/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:02:52[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=99 step=80982[0m [36mepoch[0m=[35m99[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022657051646039072, 'time_algorithm_update': 0.014837103834362077, 'loss': -82.2758832945509, 'time_step': 0.015131549322226228, 'observation_error': 0.027521386414329354, 'reward_error': 1.7614490763950919e-06, 'variance': 0.009770988738640827}[0m [36mstep[0m=[35m80982[0m
[2m2023-10-09 15:02:52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_80982.pt[0m


Epoch 100/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:03:06[0m [[32m[1minfo     [0m] [1mexp_0_20231009143959: epoch=100 step=81800[0m [36mepoch[0m=[35m100[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000244804291387059, 'time_algorithm_update': 0.01615963267813685, 'loss': -80.7045135253216, 'time_step': 0.016476578409339512, 'observation_error': 0.024730827599642912, 'reward_error': 3.1367734901301533e-06, 'variance': 0.009081853797293987}[0m [36mstep[0m=[35m81800[0m
[2m2023-10-09 15:03:06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009143959/model_81800.pt[0m
Using InvertedPendulumEncoderFactory
[2m2023-10-09 15:03:06[0m [[32m[1mdebug    [0m] [1mRoundIterator is selected.[0m
[2m2023-10-09 15:03:06[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/exp_0_20231009150306[0m
[2m2023-10-09 15:03:06[0m [[32m[1mdebug    [0m] [1mBuilding models...[0m
[2m2023-10-09 15:03:06[0m [[32m[1mdebug    [0m] [1mModels have been built.[

Epoch 1/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:03:22[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=1 step=818[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00029996292515314005, 'time_algorithm_update': 0.01673709209507427, 'loss': -20.64359748073396, 'time_step': 0.017133547799220005, 'observation_error': 0.04903536886932672, 'reward_error': 0.001081358745940995, 'variance': 0.04153488793596729}[0m [36mstep[0m=[35m818[0m
[2m2023-10-09 15:03:22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_818.pt[0m


Epoch 2/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:03:34[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=2 step=1636[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022020695553432175, 'time_algorithm_update': 0.01296290762558543, 'loss': -32.56187727632033, 'time_step': 0.013248871474510883, 'observation_error': 0.030170852040403942, 'reward_error': 0.00041774513339967376, 'variance': 0.023808723706400522}[0m [36mstep[0m=[35m1636[0m
[2m2023-10-09 15:03:34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_1636.pt[0m


Epoch 3/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:03:47[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=3 step=2454[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022297879011531915, 'time_algorithm_update': 0.014415393248746914, 'loss': -39.85283100983916, 'time_step': 0.014706033074768365, 'observation_error': 0.021991773123937304, 'reward_error': 0.00019006249145778206, 'variance': 0.011755724556980872}[0m [36mstep[0m=[35m2454[0m
[2m2023-10-09 15:03:47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_2454.pt[0m


Epoch 4/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:04:01[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=4 step=3272[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021662426461217457, 'time_algorithm_update': 0.014348405206116022, 'loss': -46.737613608026855, 'time_step': 0.014630211594635234, 'observation_error': 0.019129159699642267, 'reward_error': 2.389834911997951e-05, 'variance': 0.006068308965429403}[0m [36mstep[0m=[35m3272[0m
[2m2023-10-09 15:04:01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_3272.pt[0m


Epoch 5/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:04:15[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=5 step=4090[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023171138063911122, 'time_algorithm_update': 0.015683416632393463, 'loss': -53.01922807687652, 'time_step': 0.015981997429304427, 'observation_error': 0.02114038900206878, 'reward_error': 1.521477091026778e-05, 'variance': 0.008397152883508854}[0m [36mstep[0m=[35m4090[0m
[2m2023-10-09 15:04:15[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_4090.pt[0m


Epoch 6/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:04:30[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=6 step=4908[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022184528173619202, 'time_algorithm_update': 0.015782980872249837, 'loss': -57.43473376679829, 'time_step': 0.01606780334323426, 'observation_error': 0.026373528720254762, 'reward_error': 1.832500619998387e-05, 'variance': 0.023152168073017815}[0m [36mstep[0m=[35m4908[0m
[2m2023-10-09 15:04:30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_4908.pt[0m


Epoch 7/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:04:45[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=7 step=5726[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021882861634046932, 'time_algorithm_update': 0.016624475458140477, 'loss': -60.823001595173025, 'time_step': 0.0169107252344817, 'observation_error': 0.04388837857309728, 'reward_error': 1.4686514605673554e-05, 'variance': 0.050372973442220095}[0m [36mstep[0m=[35m5726[0m
[2m2023-10-09 15:04:45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_5726.pt[0m


Epoch 8/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:05:00[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=8 step=6544[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021844912857764507, 'time_algorithm_update': 0.01625790660130657, 'loss': -62.790902681047584, 'time_step': 0.01654126766549929, 'observation_error': 0.05273385354671384, 'reward_error': 2.4434017387641935e-05, 'variance': 0.08323857972568137}[0m [36mstep[0m=[35m6544[0m
[2m2023-10-09 15:05:00[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_6544.pt[0m


Epoch 9/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:05:16[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=9 step=7362[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022692464674597555, 'time_algorithm_update': 0.016747775754019218, 'loss': -64.78153988609687, 'time_step': 0.017041826597927252, 'observation_error': 0.05461120626831479, 'reward_error': 7.627750872408495e-05, 'variance': 0.08896099919065605}[0m [36mstep[0m=[35m7362[0m
[2m2023-10-09 15:05:16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_7362.pt[0m


Epoch 10/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:05:31[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=10 step=8180[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002222297244083619, 'time_algorithm_update': 0.01637493660514104, 'loss': -65.75868828080976, 'time_step': 0.016661951769243535, 'observation_error': 0.09165243941731296, 'reward_error': 5.709539688035889e-05, 'variance': 0.11746246941228151}[0m [36mstep[0m=[35m8180[0m
[2m2023-10-09 15:05:31[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_8180.pt[0m


Epoch 11/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:05:46[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=11 step=8998[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022321895748595444, 'time_algorithm_update': 0.01677353399598511, 'loss': -67.11808737274487, 'time_step': 0.017062221004793873, 'observation_error': 0.09927827461269555, 'reward_error': 1.3182791368734088e-05, 'variance': 0.14251806100649472}[0m [36mstep[0m=[35m8998[0m
[2m2023-10-09 15:05:46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_8998.pt[0m


Epoch 12/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:06:02[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=12 step=9816[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021486264860717474, 'time_algorithm_update': 0.016411156409527096, 'loss': -68.19312315873238, 'time_step': 0.016687420586210592, 'observation_error': 0.12561481152345402, 'reward_error': 1.7482125000575574e-05, 'variance': 0.13694544475783332}[0m [36mstep[0m=[35m9816[0m
[2m2023-10-09 15:06:02[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_9816.pt[0m


Epoch 13/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:06:18[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=13 step=10634[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022311257266765118, 'time_algorithm_update': 0.017156615233945963, 'loss': -69.12203578202823, 'time_step': 0.017446161482328307, 'observation_error': 0.10345481311606441, 'reward_error': 9.190832808182666e-06, 'variance': 0.13448154163489068}[0m [36mstep[0m=[35m10634[0m
[2m2023-10-09 15:06:18[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_10634.pt[0m


Epoch 14/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:06:34[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=14 step=11452[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021961294934335723, 'time_algorithm_update': 0.016721443616965, 'loss': -69.81107940417338, 'time_step': 0.017004335713561414, 'observation_error': 0.12168241579678644, 'reward_error': 1.0450730510263996e-05, 'variance': 0.1415492852417134}[0m [36mstep[0m=[35m11452[0m
[2m2023-10-09 15:06:34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_11452.pt[0m


Epoch 15/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:06:50[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=15 step=12270[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002222273926863169, 'time_algorithm_update': 0.01720933692670976, 'loss': -70.64176035685178, 'time_step': 0.017493718119297168, 'observation_error': 0.13435927636760453, 'reward_error': 9.450039956688932e-06, 'variance': 0.14796232393939676}[0m [36mstep[0m=[35m12270[0m
[2m2023-10-09 15:06:50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_12270.pt[0m


Epoch 16/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:07:05[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=16 step=13088[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022296130219998163, 'time_algorithm_update': 0.016537226208264786, 'loss': -70.38880829006652, 'time_step': 0.016825488260730846, 'observation_error': 0.11651863137463465, 'reward_error': 9.369860366392326e-06, 'variance': 0.15291031771469513}[0m [36mstep[0m=[35m13088[0m
[2m2023-10-09 15:07:05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_13088.pt[0m


Epoch 17/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:07:21[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=17 step=13906[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022716131653354336, 'time_algorithm_update': 0.01603322681995942, 'loss': -71.65182188029394, 'time_step': 0.01632649129060778, 'observation_error': 0.1109023822902742, 'reward_error': 8.813993229837606e-06, 'variance': 0.13791533462523456}[0m [36mstep[0m=[35m13906[0m
[2m2023-10-09 15:07:21[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_13906.pt[0m


Epoch 18/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:07:35[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=18 step=14724[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021777963288547358, 'time_algorithm_update': 0.015503367759779205, 'loss': -71.3485872325804, 'time_step': 0.01578544113047257, 'observation_error': 0.0995422398087748, 'reward_error': 6.402421351259815e-06, 'variance': 0.13230891406509954}[0m [36mstep[0m=[35m14724[0m
[2m2023-10-09 15:07:35[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_14724.pt[0m


Epoch 19/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:07:50[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=19 step=15542[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002300541091956253, 'time_algorithm_update': 0.01611675056093771, 'loss': -71.81948188930969, 'time_step': 0.016414896783152538, 'observation_error': 0.12131685474790162, 'reward_error': 1.1732133250469006e-05, 'variance': 0.12432736340031944}[0m [36mstep[0m=[35m15542[0m
[2m2023-10-09 15:07:50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_15542.pt[0m


Epoch 20/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:08:05[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=20 step=16360[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002185275327314083, 'time_algorithm_update': 0.01544332300246782, 'loss': -71.90655502538517, 'time_step': 0.015726468673836633, 'observation_error': 0.0940154224886352, 'reward_error': 9.303724891032813e-06, 'variance': 0.10779697166618618}[0m [36mstep[0m=[35m16360[0m
[2m2023-10-09 15:08:05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_16360.pt[0m


Epoch 21/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:08:20[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=21 step=17178[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022528340589154903, 'time_algorithm_update': 0.0158441023022155, 'loss': -72.91196275282023, 'time_step': 0.016135093926800494, 'observation_error': 0.10813980448878521, 'reward_error': 1.2739666721239447e-05, 'variance': 0.10895300623694659}[0m [36mstep[0m=[35m17178[0m
[2m2023-10-09 15:08:20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_17178.pt[0m


Epoch 22/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:08:35[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=22 step=17996[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022111078929201605, 'time_algorithm_update': 0.01570054691986233, 'loss': -72.7032405076808, 'time_step': 0.015986221635254205, 'observation_error': 0.09169904110515505, 'reward_error': 6.969378860605651e-06, 'variance': 0.10221771145886517}[0m [36mstep[0m=[35m17996[0m
[2m2023-10-09 15:08:35[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_17996.pt[0m


Epoch 23/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:08:53[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=23 step=18814[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002725625679370535, 'time_algorithm_update': 0.01902019598664748, 'loss': -73.25670239161745, 'time_step': 0.01937381708243074, 'observation_error': 0.08728216595694718, 'reward_error': 9.609170439727231e-06, 'variance': 0.09546791137672038}[0m [36mstep[0m=[35m18814[0m
[2m2023-10-09 15:08:53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_18814.pt[0m


Epoch 24/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:09:10[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=24 step=19632[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00026476732967535267, 'time_algorithm_update': 0.018294429720759682, 'loss': -72.6795995078052, 'time_step': 0.01863451138102338, 'observation_error': 0.09239573496193187, 'reward_error': 1.0544700321429538e-05, 'variance': 0.09973048206067621}[0m [36mstep[0m=[35m19632[0m
[2m2023-10-09 15:09:10[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_19632.pt[0m


Epoch 25/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:09:28[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=25 step=20450[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002733964500333977, 'time_algorithm_update': 0.019004952062313014, 'loss': -72.794565818712, 'time_step': 0.019357397387255084, 'observation_error': 0.08396268216179777, 'reward_error': 1.0059697854946105e-05, 'variance': 0.09287036571377259}[0m [36mstep[0m=[35m20450[0m
[2m2023-10-09 15:09:28[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_20450.pt[0m


Epoch 26/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:09:46[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=26 step=21268[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00028401977567043163, 'time_algorithm_update': 0.019386865107529322, 'loss': -73.36282197537224, 'time_step': 0.0197512613823478, 'observation_error': 0.08102061752848182, 'reward_error': 6.421615501725026e-06, 'variance': 0.09227827984901536}[0m [36mstep[0m=[35m21268[0m
[2m2023-10-09 15:09:46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_21268.pt[0m


Epoch 27/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:10:02[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=27 step=22086[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025347829739447034, 'time_algorithm_update': 0.0179462322104531, 'loss': -74.05557076913512, 'time_step': 0.018271509475987813, 'observation_error': 0.08418941845623229, 'reward_error': 5.59990471298416e-06, 'variance': 0.08589992477629753}[0m [36mstep[0m=[35m22086[0m
[2m2023-10-09 15:10:02[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_22086.pt[0m


Epoch 28/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:10:19[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=28 step=22904[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002524820691507428, 'time_algorithm_update': 0.017815132304333823, 'loss': -73.94748823100605, 'time_step': 0.01814537526342863, 'observation_error': 0.07285344304169608, 'reward_error': 5.698793733006851e-06, 'variance': 0.07583991152810628}[0m [36mstep[0m=[35m22904[0m
[2m2023-10-09 15:10:19[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_22904.pt[0m


Epoch 29/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:10:34[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=29 step=23722[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021904284330335398, 'time_algorithm_update': 0.015909388770684055, 'loss': -73.57584926493303, 'time_step': 0.01619245545496859, 'observation_error': 0.06900432337500684, 'reward_error': 6.906487850133672e-06, 'variance': 0.08022233751399681}[0m [36mstep[0m=[35m23722[0m
[2m2023-10-09 15:10:34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_23722.pt[0m


Epoch 30/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:10:49[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=30 step=24540[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022608726706656383, 'time_algorithm_update': 0.016469088043735196, 'loss': -73.69282096289189, 'time_step': 0.01676066784520604, 'observation_error': 0.059210094211496866, 'reward_error': 4.295494632520117e-06, 'variance': 0.06848937291348116}[0m [36mstep[0m=[35m24540[0m
[2m2023-10-09 15:10:49[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_24540.pt[0m


Epoch 31/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:11:04[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=31 step=25358[0m [36mepoch[0m=[35m31[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022030663665174565, 'time_algorithm_update': 0.015821899936368237, 'loss': -74.57126315473636, 'time_step': 0.016106401504106218, 'observation_error': 0.06285224262241203, 'reward_error': 4.466133679999281e-06, 'variance': 0.06716295370754075}[0m [36mstep[0m=[35m25358[0m
[2m2023-10-09 15:11:04[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_25358.pt[0m


Epoch 32/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:11:19[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=32 step=26176[0m [36mepoch[0m=[35m32[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022630440868200475, 'time_algorithm_update': 0.016479486941125398, 'loss': -73.95701389528429, 'time_step': 0.016773153925291776, 'observation_error': 0.06467556962216774, 'reward_error': 6.995515195205521e-06, 'variance': 0.0679504631205602}[0m [36mstep[0m=[35m26176[0m
[2m2023-10-09 15:11:19[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_26176.pt[0m


Epoch 33/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:11:34[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=33 step=26994[0m [36mepoch[0m=[35m33[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021931856943517558, 'time_algorithm_update': 0.01579143278173246, 'loss': -74.60162511489794, 'time_step': 0.016075252029307024, 'observation_error': 0.058307765226189474, 'reward_error': 5.8585678324577174e-06, 'variance': 0.057740548359396786}[0m [36mstep[0m=[35m26994[0m
[2m2023-10-09 15:11:34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_26994.pt[0m


Epoch 34/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:11:50[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=34 step=27812[0m [36mepoch[0m=[35m34[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002296650030793654, 'time_algorithm_update': 0.016745467932125176, 'loss': -75.24224116808045, 'time_step': 0.01704473367238686, 'observation_error': 0.05376559774747651, 'reward_error': 4.195027346168572e-06, 'variance': 0.05324655054775119}[0m [36mstep[0m=[35m27812[0m
[2m2023-10-09 15:11:50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_27812.pt[0m


Epoch 35/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:12:04[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=35 step=28630[0m [36mepoch[0m=[35m35[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000214383188261671, 'time_algorithm_update': 0.015753144739892488, 'loss': -74.72553219480444, 'time_step': 0.01603160569020763, 'observation_error': 0.06006994041364091, 'reward_error': 4.266656597603844e-06, 'variance': 0.05396580530615414}[0m [36mstep[0m=[35m28630[0m
[2m2023-10-09 15:12:04[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_28630.pt[0m


Epoch 36/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:12:19[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=36 step=29448[0m [36mepoch[0m=[35m36[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022326238580904262, 'time_algorithm_update': 0.016186483914811336, 'loss': -74.28994323571912, 'time_step': 0.016475829635097812, 'observation_error': 0.049053791592193724, 'reward_error': 3.7484428235663262e-06, 'variance': 0.045378253281965965}[0m [36mstep[0m=[35m29448[0m
[2m2023-10-09 15:12:19[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_29448.pt[0m


Epoch 37/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:12:34[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=37 step=30266[0m [36mepoch[0m=[35m37[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022030925983904627, 'time_algorithm_update': 0.01582723987131655, 'loss': -75.75306746895563, 'time_step': 0.016110708486188594, 'observation_error': 0.05133180963198681, 'reward_error': 3.694431024437006e-06, 'variance': 0.04684501139638015}[0m [36mstep[0m=[35m30266[0m
[2m2023-10-09 15:12:34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_30266.pt[0m


Epoch 38/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:12:49[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=38 step=31084[0m [36mepoch[0m=[35m38[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022492402923136293, 'time_algorithm_update': 0.016095730961097773, 'loss': -76.08302793992469, 'time_step': 0.016385942916123965, 'observation_error': 0.05283922761250715, 'reward_error': 1.4054585989788766e-05, 'variance': 0.048263764743816454}[0m [36mstep[0m=[35m31084[0m
[2m2023-10-09 15:12:49[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_31084.pt[0m


Epoch 39/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:13:03[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=39 step=31902[0m [36mepoch[0m=[35m39[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021891226686883382, 'time_algorithm_update': 0.015757548488439733, 'loss': -75.92612942273576, 'time_step': 0.016039870478996146, 'observation_error': 0.053127705611640375, 'reward_error': 5.9442899783818686e-06, 'variance': 0.04494277200842404}[0m [36mstep[0m=[35m31902[0m
[2m2023-10-09 15:13:03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_31902.pt[0m


Epoch 40/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:13:18[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=40 step=32720[0m [36mepoch[0m=[35m40[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022781565603242234, 'time_algorithm_update': 0.016307304424295215, 'loss': -75.25768259043798, 'time_step': 0.016602176325828347, 'observation_error': 0.044300169058225015, 'reward_error': 5.578885521527245e-06, 'variance': 0.04352763659572413}[0m [36mstep[0m=[35m32720[0m
[2m2023-10-09 15:13:18[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_32720.pt[0m


Epoch 41/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:13:33[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=41 step=33538[0m [36mepoch[0m=[35m41[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002240644981925237, 'time_algorithm_update': 0.015940537954017994, 'loss': -76.40300252210248, 'time_step': 0.016230005798246575, 'observation_error': 0.048443900797156235, 'reward_error': 3.206454619411474e-06, 'variance': 0.03932724778386226}[0m [36mstep[0m=[35m33538[0m
[2m2023-10-09 15:13:33[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_33538.pt[0m


Epoch 42/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:13:48[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=42 step=34356[0m [36mepoch[0m=[35m42[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022341627946401285, 'time_algorithm_update': 0.016121122831237344, 'loss': -76.20811158462375, 'time_step': 0.01640882876799567, 'observation_error': 0.048906017133673214, 'reward_error': 5.185277123178371e-06, 'variance': 0.03811088688093836}[0m [36mstep[0m=[35m34356[0m
[2m2023-10-09 15:13:48[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_34356.pt[0m


Epoch 43/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:14:03[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=43 step=35174[0m [36mepoch[0m=[35m43[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022228976625102072, 'time_algorithm_update': 0.015811581774853842, 'loss': -75.38245046867426, 'time_step': 0.016099318606929265, 'observation_error': 0.0440884543707732, 'reward_error': 7.808666564953138e-06, 'variance': 0.03727357124989537}[0m [36mstep[0m=[35m35174[0m
[2m2023-10-09 15:14:03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_35174.pt[0m


Epoch 44/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:14:18[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=44 step=35992[0m [36mepoch[0m=[35m44[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022514700015191635, 'time_algorithm_update': 0.016165874115120812, 'loss': -74.9051051454614, 'time_step': 0.016458429159336978, 'observation_error': 0.050145762138977694, 'reward_error': 4.283342921006687e-06, 'variance': 0.036609894083832195}[0m [36mstep[0m=[35m35992[0m
[2m2023-10-09 15:14:18[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_35992.pt[0m


Epoch 45/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:14:32[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=45 step=36810[0m [36mepoch[0m=[35m45[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021787814814187495, 'time_algorithm_update': 0.015504307152298086, 'loss': -77.59218037215888, 'time_step': 0.015785703157737377, 'observation_error': 0.04358441194609604, 'reward_error': 4.640160871899256e-06, 'variance': 0.02952607919145557}[0m [36mstep[0m=[35m36810[0m
[2m2023-10-09 15:14:32[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_36810.pt[0m


Epoch 46/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:14:46[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=46 step=37628[0m [36mepoch[0m=[35m46[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022267479185370187, 'time_algorithm_update': 0.01566726566818349, 'loss': -76.70981581811509, 'time_step': 0.015956094912037, 'observation_error': 0.03963523757512673, 'reward_error': 3.7311790450885764e-06, 'variance': 0.030297722123337104}[0m [36mstep[0m=[35m37628[0m
[2m2023-10-09 15:14:46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_37628.pt[0m


Epoch 47/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:15:01[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=47 step=38446[0m [36mepoch[0m=[35m47[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021643131461295055, 'time_algorithm_update': 0.015474657849169593, 'loss': -76.6419468205832, 'time_step': 0.015754965231879126, 'observation_error': 0.038647513050914115, 'reward_error': 4.812623506952493e-06, 'variance': 0.029201077665928592}[0m [36mstep[0m=[35m38446[0m
[2m2023-10-09 15:15:01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_38446.pt[0m


Epoch 48/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:15:15[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=48 step=39264[0m [36mepoch[0m=[35m48[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002217680434434513, 'time_algorithm_update': 0.01567915861647403, 'loss': -77.28949417869735, 'time_step': 0.01596571180814636, 'observation_error': 0.03887315073569649, 'reward_error': 4.741947865999193e-06, 'variance': 0.027941804115869214}[0m [36mstep[0m=[35m39264[0m
[2m2023-10-09 15:15:15[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_39264.pt[0m


Epoch 49/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:15:29[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=49 step=40082[0m [36mepoch[0m=[35m49[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022104841572731222, 'time_algorithm_update': 0.015147778982055216, 'loss': -76.1444328705664, 'time_step': 0.015433844260889626, 'observation_error': 0.03809984984516382, 'reward_error': 3.97242587691124e-06, 'variance': 0.02815087344810202}[0m [36mstep[0m=[35m40082[0m
[2m2023-10-09 15:15:29[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_40082.pt[0m


Epoch 50/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:15:43[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=50 step=40900[0m [36mepoch[0m=[35m50[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000222880857789429, 'time_algorithm_update': 0.015559557306154433, 'loss': -75.7226313402134, 'time_step': 0.015849024275987247, 'observation_error': 0.040693747429734965, 'reward_error': 4.0909545867393536e-06, 'variance': 0.028227591936593122}[0m [36mstep[0m=[35m40900[0m
[2m2023-10-09 15:15:43[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_40900.pt[0m


Epoch 51/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:15:57[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=51 step=41718[0m [36mepoch[0m=[35m51[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022168847342866557, 'time_algorithm_update': 0.01507761309374224, 'loss': -77.03838123173469, 'time_step': 0.015364891159505309, 'observation_error': 0.04527247017635249, 'reward_error': 4.682259789566537e-06, 'variance': 0.02746925682153496}[0m [36mstep[0m=[35m41718[0m
[2m2023-10-09 15:15:57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_41718.pt[0m


Epoch 52/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:16:11[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=52 step=42536[0m [36mepoch[0m=[35m52[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000222878526067384, 'time_algorithm_update': 0.015197666758138568, 'loss': -78.21171181592499, 'time_step': 0.015485861190665322, 'observation_error': 0.037419793511145404, 'reward_error': 3.0200449371060233e-06, 'variance': 0.027537578087795746}[0m [36mstep[0m=[35m42536[0m
[2m2023-10-09 15:16:11[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_42536.pt[0m


Epoch 53/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:16:25[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=53 step=43354[0m [36mepoch[0m=[35m53[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022455736593978622, 'time_algorithm_update': 0.015277357731005381, 'loss': -77.56137572990362, 'time_step': 0.015566738427122532, 'observation_error': 0.03725009845687066, 'reward_error': 2.901961427967496e-06, 'variance': 0.02383738156421874}[0m [36mstep[0m=[35m43354[0m
[2m2023-10-09 15:16:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_43354.pt[0m


Epoch 54/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:16:40[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=54 step=44172[0m [36mepoch[0m=[35m54[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002356394578891745, 'time_algorithm_update': 0.01658014271836409, 'loss': -77.15747442921683, 'time_step': 0.01688477287665556, 'observation_error': 0.038252190479854294, 'reward_error': 2.7649933786984753e-06, 'variance': 0.02353924960990608}[0m [36mstep[0m=[35m44172[0m
[2m2023-10-09 15:16:41[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_44172.pt[0m


Epoch 55/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:16:57[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=55 step=44990[0m [36mepoch[0m=[35m55[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002655397126027599, 'time_algorithm_update': 0.01772734500959625, 'loss': -77.15015738634142, 'time_step': 0.018068378303919563, 'observation_error': 0.036053182059890845, 'reward_error': 4.12895670699497e-06, 'variance': 0.02679926472155002}[0m [36mstep[0m=[35m44990[0m
[2m2023-10-09 15:16:57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_44990.pt[0m


Epoch 56/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:17:11[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=56 step=45808[0m [36mepoch[0m=[35m56[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002138573849405228, 'time_algorithm_update': 0.015185132294820398, 'loss': -77.36773414308692, 'time_step': 0.015461736902922464, 'observation_error': 0.03498385357282526, 'reward_error': 8.17243916340599e-06, 'variance': 0.02307263387601928}[0m [36mstep[0m=[35m45808[0m
[2m2023-10-09 15:17:11[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_45808.pt[0m


Epoch 57/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:17:25[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=57 step=46626[0m [36mepoch[0m=[35m57[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002181427985939828, 'time_algorithm_update': 0.015030032265157164, 'loss': -78.15750265238046, 'time_step': 0.015312174130185601, 'observation_error': 0.03346068538011194, 'reward_error': 3.273477597722099e-06, 'variance': 0.022875057452559532}[0m [36mstep[0m=[35m46626[0m
[2m2023-10-09 15:17:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_46626.pt[0m


Epoch 58/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:17:38[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=58 step=47444[0m [36mepoch[0m=[35m58[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021839841362316625, 'time_algorithm_update': 0.01506464026667961, 'loss': -77.96998687769789, 'time_step': 0.015348685691292536, 'observation_error': 0.03359183579192088, 'reward_error': 2.630473143360947e-06, 'variance': 0.021286602158774915}[0m [36mstep[0m=[35m47444[0m
[2m2023-10-09 15:17:38[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_47444.pt[0m


Epoch 59/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:17:53[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=59 step=48262[0m [36mepoch[0m=[35m59[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023313460548232996, 'time_algorithm_update': 0.0157396067616992, 'loss': -77.73223232698324, 'time_step': 0.016040909261167195, 'observation_error': 0.03311957491315062, 'reward_error': 4.3259166674260524e-06, 'variance': 0.02174168039098619}[0m [36mstep[0m=[35m48262[0m
[2m2023-10-09 15:17:53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_48262.pt[0m


Epoch 60/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:18:06[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=60 step=49080[0m [36mepoch[0m=[35m60[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021550941000940746, 'time_algorithm_update': 0.015087167033356384, 'loss': -78.86767993171524, 'time_step': 0.015365193408975391, 'observation_error': 0.034565007721061594, 'reward_error': 2.797089490619136e-06, 'variance': 0.021612825275638983}[0m [36mstep[0m=[35m49080[0m
[2m2023-10-09 15:18:07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_49080.pt[0m


Epoch 61/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:18:21[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=61 step=49898[0m [36mepoch[0m=[35m61[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002237179460035851, 'time_algorithm_update': 0.015430990816037054, 'loss': -78.52087147253359, 'time_step': 0.015719781003546308, 'observation_error': 0.03562895527911944, 'reward_error': 2.7887039631189803e-06, 'variance': 0.019943884511603355}[0m [36mstep[0m=[35m49898[0m
[2m2023-10-09 15:18:21[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_49898.pt[0m


Epoch 62/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:18:35[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=62 step=50716[0m [36mepoch[0m=[35m62[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000221925434681489, 'time_algorithm_update': 0.015242336140285202, 'loss': -78.33577814312028, 'time_step': 0.015528986098422399, 'observation_error': 0.034200035400393056, 'reward_error': 3.214119678383126e-06, 'variance': 0.018401421115428043}[0m [36mstep[0m=[35m50716[0m
[2m2023-10-09 15:18:35[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_50716.pt[0m


Epoch 63/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:18:49[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=63 step=51534[0m [36mepoch[0m=[35m63[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022486369592344848, 'time_algorithm_update': 0.01539324927155138, 'loss': -78.93060980565973, 'time_step': 0.015688082116740255, 'observation_error': 0.03463403571951898, 'reward_error': 2.670599795653854e-06, 'variance': 0.018217206278744225}[0m [36mstep[0m=[35m51534[0m
[2m2023-10-09 15:18:49[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_51534.pt[0m


Epoch 64/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:19:03[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=64 step=52352[0m [36mepoch[0m=[35m64[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022565677288400514, 'time_algorithm_update': 0.015461330017425612, 'loss': -78.4054233135979, 'time_step': 0.015753627114890548, 'observation_error': 0.03406804948634095, 'reward_error': 2.338889330745983e-06, 'variance': 0.01854433285278157}[0m [36mstep[0m=[35m52352[0m
[2m2023-10-09 15:19:03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_52352.pt[0m


Epoch 65/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:19:17[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=65 step=53170[0m [36mepoch[0m=[35m65[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022934992913803436, 'time_algorithm_update': 0.01568769883992911, 'loss': -78.2133836081675, 'time_step': 0.01598511202702604, 'observation_error': 0.03683595718341564, 'reward_error': 3.4838662676099517e-06, 'variance': 0.0190330611443163}[0m [36mstep[0m=[35m53170[0m
[2m2023-10-09 15:19:17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_53170.pt[0m


Epoch 66/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:19:31[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=66 step=53988[0m [36mepoch[0m=[35m66[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022140167161713019, 'time_algorithm_update': 0.015197583690540716, 'loss': -78.8553464675008, 'time_step': 0.015482986177383834, 'observation_error': 0.033205328061928406, 'reward_error': 3.766362009872323e-06, 'variance': 0.018248888040167093}[0m [36mstep[0m=[35m53988[0m
[2m2023-10-09 15:19:31[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_53988.pt[0m


Epoch 67/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:19:45[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=67 step=54806[0m [36mepoch[0m=[35m67[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022631577582697413, 'time_algorithm_update': 0.015551117346747289, 'loss': -79.18762292022518, 'time_step': 0.01584333166807963, 'observation_error': 0.030714349029622202, 'reward_error': 3.896950801273891e-06, 'variance': 0.01726716561452373}[0m [36mstep[0m=[35m54806[0m
[2m2023-10-09 15:19:45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_54806.pt[0m


Epoch 68/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:20:00[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=68 step=55624[0m [36mepoch[0m=[35m68[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023677966996918097, 'time_algorithm_update': 0.016631793276313464, 'loss': -78.1698149305684, 'time_step': 0.016938441540034883, 'observation_error': 0.029983009120466173, 'reward_error': 2.621944132483455e-06, 'variance': 0.017591105621311297}[0m [36mstep[0m=[35m55624[0m
[2m2023-10-09 15:20:00[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_55624.pt[0m


Epoch 69/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:20:16[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=69 step=56442[0m [36mepoch[0m=[35m69[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00026311734486325736, 'time_algorithm_update': 0.017525634439183913, 'loss': -78.99351132819588, 'time_step': 0.01786730312776449, 'observation_error': 0.03159015203444071, 'reward_error': 2.7651049002375724e-06, 'variance': 0.017394395777970192}[0m [36mstep[0m=[35m56442[0m
[2m2023-10-09 15:20:16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_56442.pt[0m


Epoch 70/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:20:30[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=70 step=57260[0m [36mepoch[0m=[35m70[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002188399834854387, 'time_algorithm_update': 0.014997058800788264, 'loss': -77.56218701994507, 'time_step': 0.015283461013458177, 'observation_error': 0.030699244318837905, 'reward_error': 2.745748536850782e-06, 'variance': 0.017273159642575735}[0m [36mstep[0m=[35m57260[0m
[2m2023-10-09 15:20:30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_57260.pt[0m


Epoch 71/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:20:44[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=71 step=58078[0m [36mepoch[0m=[35m71[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022344367719804163, 'time_algorithm_update': 0.015327850880424668, 'loss': -79.19636962349665, 'time_step': 0.015617163373672001, 'observation_error': 0.03263085163668076, 'reward_error': 5.876472097585756e-06, 'variance': 0.016716382380679642}[0m [36mstep[0m=[35m58078[0m
[2m2023-10-09 15:20:44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_58078.pt[0m


Epoch 72/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:20:58[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=72 step=58896[0m [36mepoch[0m=[35m72[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002175525814513414, 'time_algorithm_update': 0.015123912932528844, 'loss': -78.6327008326654, 'time_step': 0.015407046362356918, 'observation_error': 0.03062907364547528, 'reward_error': 2.9035753477683454e-06, 'variance': 0.01733232470503508}[0m [36mstep[0m=[35m58896[0m
[2m2023-10-09 15:20:58[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_58896.pt[0m


Epoch 73/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:21:12[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=73 step=59714[0m [36mepoch[0m=[35m73[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023414598991935003, 'time_algorithm_update': 0.015532831399539863, 'loss': -79.33993287774345, 'time_step': 0.015836267424679036, 'observation_error': 0.031560205243173836, 'reward_error': 3.3467939018696467e-06, 'variance': 0.016556721483954984}[0m [36mstep[0m=[35m59714[0m
[2m2023-10-09 15:21:12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_59714.pt[0m


Epoch 74/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:21:25[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=74 step=60532[0m [36mepoch[0m=[35m74[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002163400859879398, 'time_algorithm_update': 0.015017665685826234, 'loss': -78.4512659902969, 'time_step': 0.01529810131324824, 'observation_error': 0.03136766613059061, 'reward_error': 3.0826805395868265e-06, 'variance': 0.01762191906968223}[0m [36mstep[0m=[35m60532[0m
[2m2023-10-09 15:21:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_60532.pt[0m


Epoch 75/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:21:39[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=75 step=61350[0m [36mepoch[0m=[35m75[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021791020931999374, 'time_algorithm_update': 0.015135767698870895, 'loss': -78.88023703372275, 'time_step': 0.015418246497734834, 'observation_error': 0.03360793042002998, 'reward_error': 2.2062583217817736e-06, 'variance': 0.016684363444488537}[0m [36mstep[0m=[35m61350[0m
[2m2023-10-09 15:21:39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_61350.pt[0m


Epoch 76/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:21:53[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=76 step=62168[0m [36mepoch[0m=[35m76[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002313012890244463, 'time_algorithm_update': 0.01556766062319133, 'loss': -78.89571066167943, 'time_step': 0.0158674696546895, 'observation_error': 0.029500742653596836, 'reward_error': 2.5583191671682693e-06, 'variance': 0.016858960707450747}[0m [36mstep[0m=[35m62168[0m
[2m2023-10-09 15:21:53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_62168.pt[0m


Epoch 77/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:22:07[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=77 step=62986[0m [36mepoch[0m=[35m77[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022286162108255773, 'time_algorithm_update': 0.014929086771454379, 'loss': -80.26808811691396, 'time_step': 0.015216040727853193, 'observation_error': 0.029818830768668765, 'reward_error': 4.256647831390502e-06, 'variance': 0.016010047750736563}[0m [36mstep[0m=[35m62986[0m
[2m2023-10-09 15:22:07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_62986.pt[0m


Epoch 78/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:22:21[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=78 step=63804[0m [36mepoch[0m=[35m78[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002284903980991951, 'time_algorithm_update': 0.015195532649536879, 'loss': -79.86192321544172, 'time_step': 0.015491188884072898, 'observation_error': 0.03247185096838409, 'reward_error': 3.006350107580871e-06, 'variance': 0.015619647379173231}[0m [36mstep[0m=[35m63804[0m
[2m2023-10-09 15:22:21[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_63804.pt[0m


Epoch 79/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:22:35[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=79 step=64622[0m [36mepoch[0m=[35m79[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002217633799993613, 'time_algorithm_update': 0.015060782724021409, 'loss': -78.53694005467197, 'time_step': 0.015349294562211537, 'observation_error': 0.030171110043900516, 'reward_error': 4.84360877381875e-06, 'variance': 0.015155846307797415}[0m [36mstep[0m=[35m64622[0m
[2m2023-10-09 15:22:35[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_64622.pt[0m


Epoch 80/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:22:48[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=80 step=65440[0m [36mepoch[0m=[35m80[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022930504348866806, 'time_algorithm_update': 0.015296754452301995, 'loss': -79.25058192495612, 'time_step': 0.015592939404811719, 'observation_error': 0.028781095978438274, 'reward_error': 2.7226944037852905e-06, 'variance': 0.01606560539115773}[0m [36mstep[0m=[35m65440[0m
[2m2023-10-09 15:22:48[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_65440.pt[0m


Epoch 81/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:23:02[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=81 step=66258[0m [36mepoch[0m=[35m81[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022355764011299114, 'time_algorithm_update': 0.014905224802441585, 'loss': -79.35814372601311, 'time_step': 0.015195166860641068, 'observation_error': 0.02931656782720203, 'reward_error': 3.252667071795495e-06, 'variance': 0.015580504524590623}[0m [36mstep[0m=[35m66258[0m
[2m2023-10-09 15:23:02[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_66258.pt[0m


Epoch 82/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:23:16[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=82 step=67076[0m [36mepoch[0m=[35m82[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002298142332902456, 'time_algorithm_update': 0.015261919982275928, 'loss': -81.29450143753462, 'time_step': 0.01555811309581281, 'observation_error': 0.029704931119548188, 'reward_error': 3.175108538884244e-06, 'variance': 0.01450747029531629}[0m [36mstep[0m=[35m67076[0m
[2m2023-10-09 15:23:16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_67076.pt[0m


Epoch 83/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:23:30[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=83 step=67894[0m [36mepoch[0m=[35m83[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022338450975114966, 'time_algorithm_update': 0.015173835975908125, 'loss': -80.35040493058109, 'time_step': 0.015463152549669037, 'observation_error': 0.03124888269096222, 'reward_error': 3.097100893119114e-06, 'variance': 0.015377626089532567}[0m [36mstep[0m=[35m67894[0m
[2m2023-10-09 15:23:30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_67894.pt[0m


Epoch 84/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:23:44[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=84 step=68712[0m [36mepoch[0m=[35m84[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023263474256893243, 'time_algorithm_update': 0.015816235600590416, 'loss': -79.20114408203908, 'time_step': 0.016118898951336922, 'observation_error': 0.02728793249550995, 'reward_error': 3.218636255810467e-06, 'variance': 0.014333999709690456}[0m [36mstep[0m=[35m68712[0m
[2m2023-10-09 15:23:44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_68712.pt[0m


Epoch 85/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:23:58[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=85 step=69530[0m [36mepoch[0m=[35m85[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002235214984212936, 'time_algorithm_update': 0.015304766249248626, 'loss': -80.14540621237533, 'time_step': 0.01559439119324999, 'observation_error': 0.029593769223185604, 'reward_error': 2.6800469211335094e-06, 'variance': 0.015284143356910097}[0m [36mstep[0m=[35m69530[0m
[2m2023-10-09 15:23:58[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_69530.pt[0m


Epoch 86/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:24:12[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=86 step=70348[0m [36mepoch[0m=[35m86[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022713071268170272, 'time_algorithm_update': 0.015677726356207886, 'loss': -80.19864690099777, 'time_step': 0.015973499176846158, 'observation_error': 0.025312000187781174, 'reward_error': 3.14984381861997e-06, 'variance': 0.013757390066101351}[0m [36mstep[0m=[35m70348[0m
[2m2023-10-09 15:24:12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_70348.pt[0m


Epoch 87/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:24:26[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=87 step=71166[0m [36mepoch[0m=[35m87[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002220082108140866, 'time_algorithm_update': 0.01511747971140668, 'loss': -80.80092672497253, 'time_step': 0.015403600660104915, 'observation_error': 0.028777173806728926, 'reward_error': 3.309932017397037e-06, 'variance': 0.014309596855950842}[0m [36mstep[0m=[35m71166[0m
[2m2023-10-09 15:24:26[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_71166.pt[0m


Epoch 88/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:24:40[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=88 step=71984[0m [36mepoch[0m=[35m88[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002188673812194675, 'time_algorithm_update': 0.015218816934413024, 'loss': -81.60399834509292, 'time_step': 0.01550231935925472, 'observation_error': 0.026695256365208895, 'reward_error': 1.8574039347975652e-06, 'variance': 0.013717397450608945}[0m [36mstep[0m=[35m71984[0m
[2m2023-10-09 15:24:40[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_71984.pt[0m


Epoch 89/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:24:54[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=89 step=72802[0m [36mepoch[0m=[35m89[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002290954799698734, 'time_algorithm_update': 0.015465456873980012, 'loss': -80.38904716566314, 'time_step': 0.015762049003451843, 'observation_error': 0.029604658160267052, 'reward_error': 2.8323905721335876e-06, 'variance': 0.013916183511610121}[0m [36mstep[0m=[35m72802[0m
[2m2023-10-09 15:24:54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_72802.pt[0m


Epoch 90/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:25:08[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=90 step=73620[0m [36mepoch[0m=[35m90[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022476430627128022, 'time_algorithm_update': 0.015087002646952211, 'loss': -80.51968413518519, 'time_step': 0.015379651251515433, 'observation_error': 0.03142745878791561, 'reward_error': 4.613593645117303e-06, 'variance': 0.014285789892566899}[0m [36mstep[0m=[35m73620[0m
[2m2023-10-09 15:25:08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_73620.pt[0m


Epoch 91/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:25:22[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=91 step=74438[0m [36mepoch[0m=[35m91[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002334348146956241, 'time_algorithm_update': 0.015513755290024437, 'loss': -80.39100881543894, 'time_step': 0.015815472544551186, 'observation_error': 0.030492833668573457, 'reward_error': 2.420640111593023e-06, 'variance': 0.01365369451639162}[0m [36mstep[0m=[35m74438[0m
[2m2023-10-09 15:25:22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_74438.pt[0m


Epoch 92/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:25:36[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=92 step=75256[0m [36mepoch[0m=[35m92[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002244471920731598, 'time_algorithm_update': 0.015182167801705433, 'loss': -81.44072558535923, 'time_step': 0.015471741447821806, 'observation_error': 0.029006168493584086, 'reward_error': 2.852570069709898e-06, 'variance': 0.014230694509185701}[0m [36mstep[0m=[35m75256[0m
[2m2023-10-09 15:25:36[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_75256.pt[0m


Epoch 93/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:25:50[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=93 step=76074[0m [36mepoch[0m=[35m93[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002322590438544313, 'time_algorithm_update': 0.015441662233441266, 'loss': -81.33841397534955, 'time_step': 0.015742409491597293, 'observation_error': 0.028189122043508662, 'reward_error': 2.496322176164793e-06, 'variance': 0.013720349842348734}[0m [36mstep[0m=[35m76074[0m
[2m2023-10-09 15:25:50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_76074.pt[0m


Epoch 94/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:26:04[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=94 step=76892[0m [36mepoch[0m=[35m94[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022133580046935887, 'time_algorithm_update': 0.01500906046561915, 'loss': -77.4935796686373, 'time_step': 0.015298268322839714, 'observation_error': 0.03307919316540737, 'reward_error': 2.528662600652645e-06, 'variance': 0.014667001603682422}[0m [36mstep[0m=[35m76892[0m
[2m2023-10-09 15:26:04[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_76892.pt[0m


Epoch 95/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:26:18[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=95 step=77710[0m [36mepoch[0m=[35m95[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022728548073243978, 'time_algorithm_update': 0.015408931268165048, 'loss': -81.61916836782889, 'time_step': 0.015706160832150932, 'observation_error': 0.03075173328069022, 'reward_error': 2.3742948112883836e-06, 'variance': 0.013952074184502304}[0m [36mstep[0m=[35m77710[0m
[2m2023-10-09 15:26:18[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_77710.pt[0m


Epoch 96/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:26:31[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=96 step=78528[0m [36mepoch[0m=[35m96[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022378935499121333, 'time_algorithm_update': 0.01515089212245055, 'loss': -81.18096916832958, 'time_step': 0.015442091270297546, 'observation_error': 0.028875441978216135, 'reward_error': 2.217478415727027e-06, 'variance': 0.013284839978146314}[0m [36mstep[0m=[35m78528[0m
[2m2023-10-09 15:26:31[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_78528.pt[0m


Epoch 97/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:26:45[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=97 step=79346[0m [36mepoch[0m=[35m97[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022659150195879574, 'time_algorithm_update': 0.01526013825516829, 'loss': -81.1978529811197, 'time_step': 0.01555396554522526, 'observation_error': 0.027511689329463584, 'reward_error': 2.2924239343162614e-06, 'variance': 0.013238926345659525}[0m [36mstep[0m=[35m79346[0m
[2m2023-10-09 15:26:45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_79346.pt[0m


Epoch 98/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:26:59[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=98 step=80164[0m [36mepoch[0m=[35m98[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002270595951593301, 'time_algorithm_update': 0.015273751139932273, 'loss': -79.72670632294746, 'time_step': 0.015568548717825221, 'observation_error': 0.027566154351661293, 'reward_error': 2.518398012586861e-06, 'variance': 0.013148311604139705}[0m [36mstep[0m=[35m80164[0m
[2m2023-10-09 15:26:59[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_80164.pt[0m


Epoch 99/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:27:13[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=99 step=80982[0m [36mepoch[0m=[35m99[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023516028900892635, 'time_algorithm_update': 0.015564698753263665, 'loss': -81.92433531884751, 'time_step': 0.015867405240868007, 'observation_error': 0.029063578037367505, 'reward_error': 2.236008889484335e-06, 'variance': 0.014187622661500187}[0m [36mstep[0m=[35m80982[0m
[2m2023-10-09 15:27:13[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_80982.pt[0m


Epoch 100/100:   0%|          | 0/818 [00:00<?, ?it/s]

[2m2023-10-09 15:27:27[0m [[32m[1minfo     [0m] [1mexp_0_20231009150306: epoch=100 step=81800[0m [36mepoch[0m=[35m100[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022261037803220866, 'time_algorithm_update': 0.015055440457351051, 'loss': -80.93698149846644, 'time_step': 0.015344099194030016, 'observation_error': 0.02943925721547232, 'reward_error': 2.6864894979649483e-06, 'variance': 0.012914434391427707}[0m [36mstep[0m=[35m81800[0m
[2m2023-10-09 15:27:27[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009150306/model_81800.pt[0m
[2m2023-10-09 15:27:27[0m [[32m[1mdebug    [0m] [1mRoundIterator is selected.[0m
[2m2023-10-09 15:27:27[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/exp_0_20231009152727[0m
[2m2023-10-09 15:27:27[0m [[32m[1mdebug    [0m] [1mBuilding models...[0m
[2m2023-10-09 15:27:27[0m [[32m[1mdebug    [0m] [1mModels have been built.[0m
[2m2023-10-09 15:27:27[0m [[

Epoch 1/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:27:38[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=1 step=753[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002217153470671351, 'time_algorithm_update': 0.012521320130245619, 'loss': -19.853344014243458, 'time_step': 0.012814625959155727, 'observation_error': 0.06252040833884205, 'reward_error': 0.0027137589529479065, 'variance': 0.05180356261083868}[0m [36mstep[0m=[35m753[0m
[2m2023-10-09 15:27:38[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_753.pt[0m


Epoch 2/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:27:49[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=2 step=1506[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023507945249438445, 'time_algorithm_update': 0.013210705077030744, 'loss': -25.504814712491484, 'time_step': 0.013523006185909034, 'observation_error': 0.034435984646922255, 'reward_error': 0.0013754829824024623, 'variance': 0.033100808305974984}[0m [36mstep[0m=[35m1506[0m
[2m2023-10-09 15:27:49[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_1506.pt[0m


Epoch 3/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:28:00[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=3 step=2259[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022402195974808447, 'time_algorithm_update': 0.013369314857371459, 'loss': -35.81385330612441, 'time_step': 0.01365915142682444, 'observation_error': 0.018880901627573957, 'reward_error': 0.00022897749469212293, 'variance': 0.014195697491399517}[0m [36mstep[0m=[35m2259[0m
[2m2023-10-09 15:28:00[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_2259.pt[0m


Epoch 4/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:28:13[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=4 step=3012[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022839898290545502, 'time_algorithm_update': 0.014428525015335793, 'loss': -44.1010386472046, 'time_step': 0.014725008492134165, 'observation_error': 0.012711417473393815, 'reward_error': 4.156403376126466e-05, 'variance': 0.003489984858361877}[0m [36mstep[0m=[35m3012[0m
[2m2023-10-09 15:28:13[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_3012.pt[0m


Epoch 5/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:28:26[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=5 step=3765[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002195258856136327, 'time_algorithm_update': 0.014594745509336352, 'loss': -51.66540190454815, 'time_step': 0.014879991967047987, 'observation_error': 0.012598614314933362, 'reward_error': 1.686198230945337e-05, 'variance': 0.002768270528974095}[0m [36mstep[0m=[35m3765[0m
[2m2023-10-09 15:28:26[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_3765.pt[0m


Epoch 6/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:28:40[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=6 step=4518[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023060744185530015, 'time_algorithm_update': 0.015379204693068546, 'loss': -57.17712620371683, 'time_step': 0.01567544531853867, 'observation_error': 0.015300529512287083, 'reward_error': 1.3107851213968796e-05, 'variance': 0.006219900082509727}[0m [36mstep[0m=[35m4518[0m
[2m2023-10-09 15:28:40[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_4518.pt[0m


Epoch 7/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:28:54[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=7 step=5271[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002317548906344025, 'time_algorithm_update': 0.016675213260321346, 'loss': -60.177249918579264, 'time_step': 0.01697524293643703, 'observation_error': 0.0251930076057631, 'reward_error': 1.3962031057391216e-05, 'variance': 0.014753745748412973}[0m [36mstep[0m=[35m5271[0m
[2m2023-10-09 15:28:54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_5271.pt[0m


Epoch 8/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:29:08[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=8 step=6024[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022056093253937375, 'time_algorithm_update': 0.01606442222240595, 'loss': -62.19507494770673, 'time_step': 0.016349474272405008, 'observation_error': 0.022341485038660455, 'reward_error': 8.052737297433449e-06, 'variance': 0.02190918570323203}[0m [36mstep[0m=[35m6024[0m
[2m2023-10-09 15:29:08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_6024.pt[0m


Epoch 9/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:29:23[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=9 step=6777[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022709892090573254, 'time_algorithm_update': 0.016222221442902706, 'loss': -63.30456771255331, 'time_step': 0.01651585656174942, 'observation_error': 0.023139286953048187, 'reward_error': 8.527654044209479e-06, 'variance': 0.026281994278573383}[0m [36mstep[0m=[35m6777[0m
[2m2023-10-09 15:29:23[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_6777.pt[0m


Epoch 10/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:29:37[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=10 step=7530[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002260907870998104, 'time_algorithm_update': 0.01606496618405123, 'loss': -64.68056398622227, 'time_step': 0.016357981034642356, 'observation_error': 0.02383844836014831, 'reward_error': 1.403446653771894e-05, 'variance': 0.03234431069911572}[0m [36mstep[0m=[35m7530[0m
[2m2023-10-09 15:29:37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_7530.pt[0m


Epoch 11/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:29:51[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=11 step=8283[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023321136535401363, 'time_algorithm_update': 0.01644314079487308, 'loss': -64.18360253824181, 'time_step': 0.016748868136766897, 'observation_error': 0.04384582345833991, 'reward_error': 1.0629560810053452e-05, 'variance': 0.04705228670233988}[0m [36mstep[0m=[35m8283[0m
[2m2023-10-09 15:29:51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_8283.pt[0m


Epoch 12/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:30:05[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=12 step=9036[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022004895001293654, 'time_algorithm_update': 0.016116312933791364, 'loss': -66.59028523218426, 'time_step': 0.016400780810777886, 'observation_error': 0.04155088993414627, 'reward_error': 9.609906206474693e-06, 'variance': 0.03921817417385938}[0m [36mstep[0m=[35m9036[0m
[2m2023-10-09 15:30:05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_9036.pt[0m


Epoch 13/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:30:20[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=13 step=9789[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022399979600235128, 'time_algorithm_update': 0.0163793649331507, 'loss': -67.65841437335983, 'time_step': 0.016668254477410993, 'observation_error': 0.04298947900542501, 'reward_error': 8.066599751386318e-06, 'variance': 0.04877557789593143}[0m [36mstep[0m=[35m9789[0m
[2m2023-10-09 15:30:20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_9789.pt[0m


Epoch 14/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:30:34[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=14 step=10542[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021793769491937847, 'time_algorithm_update': 0.01597986183318484, 'loss': -67.74077157860258, 'time_step': 0.016262292228688597, 'observation_error': 0.04681894208792894, 'reward_error': 8.597301724857565e-06, 'variance': 0.04752203665555111}[0m [36mstep[0m=[35m10542[0m
[2m2023-10-09 15:30:34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_10542.pt[0m


Epoch 15/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:30:49[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=15 step=11295[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022950843669187183, 'time_algorithm_update': 0.0165444091655027, 'loss': -68.11045183318545, 'time_step': 0.016841974866342733, 'observation_error': 0.04726066150175103, 'reward_error': 1.0762532529124628e-05, 'variance': 0.050934401708291506}[0m [36mstep[0m=[35m11295[0m
[2m2023-10-09 15:30:49[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_11295.pt[0m


Epoch 16/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:31:03[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=16 step=12048[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002259831346205348, 'time_algorithm_update': 0.016267027671276968, 'loss': -69.47860814850644, 'time_step': 0.016559378242745976, 'observation_error': 0.05138126490034734, 'reward_error': 6.361785130995316e-06, 'variance': 0.05682344646213915}[0m [36mstep[0m=[35m12048[0m
[2m2023-10-09 15:31:03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_12048.pt[0m


Epoch 17/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:31:18[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=17 step=12801[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022925323699100083, 'time_algorithm_update': 0.016088940391185906, 'loss': -69.1190402317807, 'time_step': 0.01638730905287136, 'observation_error': 0.057854198319158605, 'reward_error': 5.714333216990147e-06, 'variance': 0.055503361636398005}[0m [36mstep[0m=[35m12801[0m
[2m2023-10-09 15:31:18[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_12801.pt[0m


Epoch 18/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:31:32[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=18 step=13554[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002258526851456479, 'time_algorithm_update': 0.015379587492619853, 'loss': -69.07872334514481, 'time_step': 0.015674623043571968, 'observation_error': 0.054536470972295005, 'reward_error': 5.189368224737635e-06, 'variance': 0.052877625381847405}[0m [36mstep[0m=[35m13554[0m
[2m2023-10-09 15:31:32[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_13554.pt[0m


Epoch 19/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:31:46[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=19 step=14307[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023185272774056768, 'time_algorithm_update': 0.015821408465564013, 'loss': -70.51102950563468, 'time_step': 0.016119727733758974, 'observation_error': 0.049737767601305646, 'reward_error': 5.396991324889522e-06, 'variance': 0.058692828604009824}[0m [36mstep[0m=[35m14307[0m
[2m2023-10-09 15:31:46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_14307.pt[0m


Epoch 20/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:32:00[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=20 step=15060[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022440190967493956, 'time_algorithm_update': 0.01539061965537103, 'loss': -70.74169809916421, 'time_step': 0.01568079849638312, 'observation_error': 0.049065752876945416, 'reward_error': 5.675010277886051e-06, 'variance': 0.0546500640660523}[0m [36mstep[0m=[35m15060[0m
[2m2023-10-09 15:32:00[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_15060.pt[0m


Epoch 21/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:32:14[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=21 step=15813[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022426322795163746, 'time_algorithm_update': 0.01565605615714632, 'loss': -70.61717313884265, 'time_step': 0.015948591637579727, 'observation_error': 0.053214195996303934, 'reward_error': 5.0752865450254255e-06, 'variance': 0.052658489264326126}[0m [36mstep[0m=[35m15813[0m
[2m2023-10-09 15:32:14[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_15813.pt[0m


Epoch 22/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:32:28[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=22 step=16566[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022344823535853331, 'time_algorithm_update': 0.015665815804267468, 'loss': -70.75030198926787, 'time_step': 0.01595625617747921, 'observation_error': 0.06208626835685338, 'reward_error': 4.901460945699703e-06, 'variance': 0.046929329387084495}[0m [36mstep[0m=[35m16566[0m
[2m2023-10-09 15:32:28[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_16566.pt[0m


Epoch 23/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:32:42[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=23 step=17319[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022242268718096364, 'time_algorithm_update': 0.01565521931743242, 'loss': -71.58734821355043, 'time_step': 0.015944679419832877, 'observation_error': 0.04758177422471666, 'reward_error': 5.065112933189913e-06, 'variance': 0.04684748123571649}[0m [36mstep[0m=[35m17319[0m
[2m2023-10-09 15:32:42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_17319.pt[0m


Epoch 24/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:32:56[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=24 step=18072[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022503895905229992, 'time_algorithm_update': 0.015865421865090906, 'loss': -71.61306907962835, 'time_step': 0.016154883867240996, 'observation_error': 0.048405080442034086, 'reward_error': 4.666262297021651e-06, 'variance': 0.04318065387098363}[0m [36mstep[0m=[35m18072[0m
[2m2023-10-09 15:32:56[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_18072.pt[0m


Epoch 25/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:33:10[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=25 step=18825[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00021971681045187738, 'time_algorithm_update': 0.015449227248213364, 'loss': -72.64397664975677, 'time_step': 0.015731673791589015, 'observation_error': 0.04572156797797297, 'reward_error': 4.652094909859744e-06, 'variance': 0.04330280806121047}[0m [36mstep[0m=[35m18825[0m
[2m2023-10-09 15:33:10[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_18825.pt[0m


Epoch 26/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:33:24[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=26 step=19578[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002277980287711459, 'time_algorithm_update': 0.01591878678219252, 'loss': -72.11539772813855, 'time_step': 0.01621614002769845, 'observation_error': 0.05196998011014951, 'reward_error': 4.755402579508554e-06, 'variance': 0.04048153188404034}[0m [36mstep[0m=[35m19578[0m
[2m2023-10-09 15:33:24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_19578.pt[0m


Epoch 27/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:33:38[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=27 step=20331[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022232864957406702, 'time_algorithm_update': 0.015943427801448827, 'loss': -73.65821777080318, 'time_step': 0.01623173760544573, 'observation_error': 0.04168545801674244, 'reward_error': 4.513710491317647e-06, 'variance': 0.03848932606141642}[0m [36mstep[0m=[35m20331[0m
[2m2023-10-09 15:33:38[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_20331.pt[0m


Epoch 28/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:33:53[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=28 step=21084[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023054728311688142, 'time_algorithm_update': 0.016444242016411082, 'loss': -72.60636798762388, 'time_step': 0.016743386092572258, 'observation_error': 0.038455264219313105, 'reward_error': 5.307495272268197e-06, 'variance': 0.03621490571858407}[0m [36mstep[0m=[35m21084[0m
[2m2023-10-09 15:33:53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_21084.pt[0m


Epoch 29/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:34:07[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=29 step=21837[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022212252673874813, 'time_algorithm_update': 0.016070015085329258, 'loss': -72.88105125731207, 'time_step': 0.016359289012265552, 'observation_error': 0.04468046865220231, 'reward_error': 4.132764104944429e-06, 'variance': 0.03694644164134152}[0m [36mstep[0m=[35m21837[0m
[2m2023-10-09 15:34:07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_21837.pt[0m


Epoch 30/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:34:21[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=30 step=22590[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022623611794683247, 'time_algorithm_update': 0.01615668071377008, 'loss': -73.73545357778886, 'time_step': 0.016447695444621255, 'observation_error': 0.041361437650373555, 'reward_error': 6.946028614980669e-06, 'variance': 0.03573182023304679}[0m [36mstep[0m=[35m22590[0m
[2m2023-10-09 15:34:21[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_22590.pt[0m


Epoch 31/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:34:35[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=31 step=23343[0m [36mepoch[0m=[35m31[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002191215555664711, 'time_algorithm_update': 0.01586043660542563, 'loss': -73.02393374373396, 'time_step': 0.016144109753815142, 'observation_error': 0.037519339457664835, 'reward_error': 7.063648885184671e-06, 'variance': 0.03889597675169637}[0m [36mstep[0m=[35m23343[0m
[2m2023-10-09 15:34:35[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_23343.pt[0m


Epoch 32/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:34:49[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=32 step=24096[0m [36mepoch[0m=[35m32[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022795032536682697, 'time_algorithm_update': 0.015911225778648102, 'loss': -73.69712046259744, 'time_step': 0.016206259113225645, 'observation_error': 0.03425093343873942, 'reward_error': 3.157343280060662e-06, 'variance': 0.028804316765392055}[0m [36mstep[0m=[35m24096[0m
[2m2023-10-09 15:34:49[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_24096.pt[0m


Epoch 33/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:35:03[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=33 step=24849[0m [36mepoch[0m=[35m33[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022476349535532997, 'time_algorithm_update': 0.01578665761200397, 'loss': -73.47523230543808, 'time_step': 0.016076792125524594, 'observation_error': 0.03773266484534493, 'reward_error': 3.4431758823497965e-06, 'variance': 0.028878473645239284}[0m [36mstep[0m=[35m24849[0m
[2m2023-10-09 15:35:03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_24849.pt[0m


Epoch 34/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:35:17[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=34 step=25602[0m [36mepoch[0m=[35m34[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022425847857755177, 'time_algorithm_update': 0.015950532231831138, 'loss': -73.83861041480642, 'time_step': 0.016240394447904184, 'observation_error': 0.03307928207299982, 'reward_error': 5.832603523296627e-06, 'variance': 0.02828327684697329}[0m [36mstep[0m=[35m25602[0m
[2m2023-10-09 15:35:17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_25602.pt[0m


Epoch 35/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:35:32[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=35 step=26355[0m [36mepoch[0m=[35m35[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00026215626740677266, 'time_algorithm_update': 0.017977547043981464, 'loss': -74.62488337563646, 'time_step': 0.018322469070454834, 'observation_error': 0.033475162865454766, 'reward_error': 4.431119638322296e-06, 'variance': 0.026256114922122344}[0m [36mstep[0m=[35m26355[0m
[2m2023-10-09 15:35:32[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_26355.pt[0m


Epoch 36/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:35:47[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=36 step=27108[0m [36mepoch[0m=[35m36[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002375190476497331, 'time_algorithm_update': 0.01636403300372728, 'loss': -74.86137463340404, 'time_step': 0.016669634645520296, 'observation_error': 0.028758071509083452, 'reward_error': 3.058839784135988e-06, 'variance': 0.024364944202806802}[0m [36mstep[0m=[35m27108[0m
[2m2023-10-09 15:35:47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_27108.pt[0m


Epoch 37/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:36:00[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=37 step=27861[0m [36mepoch[0m=[35m37[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002195686299804039, 'time_algorithm_update': 0.01538112787294831, 'loss': -75.39778652355807, 'time_step': 0.015663899590136362, 'observation_error': 0.02717764179023671, 'reward_error': 4.907653918930111e-06, 'variance': 0.02318967858359053}[0m [36mstep[0m=[35m27861[0m
[2m2023-10-09 15:36:00[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_27861.pt[0m


Epoch 38/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:36:14[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=38 step=28614[0m [36mepoch[0m=[35m38[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002321544713074784, 'time_algorithm_update': 0.01594135644109759, 'loss': -75.8330176240736, 'time_step': 0.016239992650856534, 'observation_error': 0.027719024419046324, 'reward_error': 4.274853953413878e-06, 'variance': 0.022662088482671178}[0m [36mstep[0m=[35m28614[0m
[2m2023-10-09 15:36:14[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_28614.pt[0m


Epoch 39/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:36:27[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=39 step=29367[0m [36mepoch[0m=[35m39[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022185846153958386, 'time_algorithm_update': 0.015339252641634795, 'loss': -75.34900593852616, 'time_step': 0.01562676835028457, 'observation_error': 0.02982310464370028, 'reward_error': 3.1822244429565905e-06, 'variance': 0.022170434940639677}[0m [36mstep[0m=[35m29367[0m
[2m2023-10-09 15:36:27[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_29367.pt[0m


Epoch 40/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:36:41[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=40 step=30120[0m [36mepoch[0m=[35m40[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002248663984605199, 'time_algorithm_update': 0.015209524438359032, 'loss': -75.57307652933189, 'time_step': 0.015499751722986796, 'observation_error': 0.02688497682483608, 'reward_error': 3.0250134890165556e-06, 'variance': 0.020019333489957696}[0m [36mstep[0m=[35m30120[0m
[2m2023-10-09 15:36:41[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_30120.pt[0m


Epoch 41/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:36:54[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=41 step=30873[0m [36mepoch[0m=[35m41[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022540054473269033, 'time_algorithm_update': 0.015139902097137168, 'loss': -75.4682918842412, 'time_step': 0.01543184263931011, 'observation_error': 0.025584864211484742, 'reward_error': 2.560405918905992e-06, 'variance': 0.019798544002251825}[0m [36mstep[0m=[35m30873[0m
[2m2023-10-09 15:36:54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_30873.pt[0m


Epoch 42/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:37:07[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=42 step=31626[0m [36mepoch[0m=[35m42[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002228548802227613, 'time_algorithm_update': 0.01501312863779258, 'loss': -75.81762559860351, 'time_step': 0.01530167876011822, 'observation_error': 0.027468066075229444, 'reward_error': 4.243625628546184e-06, 'variance': 0.019061000093749406}[0m [36mstep[0m=[35m31626[0m
[2m2023-10-09 15:37:07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_31626.pt[0m


Epoch 43/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:37:20[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=43 step=32379[0m [36mepoch[0m=[35m43[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022860763874028625, 'time_algorithm_update': 0.015205248101932278, 'loss': -76.10127555674924, 'time_step': 0.015501219912829153, 'observation_error': 0.02602176166586805, 'reward_error': 7.622050520208937e-06, 'variance': 0.018898741902051354}[0m [36mstep[0m=[35m32379[0m
[2m2023-10-09 15:37:20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_32379.pt[0m


Epoch 44/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:37:33[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=44 step=33132[0m [36mepoch[0m=[35m44[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022903476578305917, 'time_algorithm_update': 0.015341914824122293, 'loss': -74.4792953126459, 'time_step': 0.015640964862676573, 'observation_error': 0.029315798806283294, 'reward_error': 4.402830077658893e-06, 'variance': 0.01884924281706839}[0m [36mstep[0m=[35m33132[0m
[2m2023-10-09 15:37:33[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_33132.pt[0m


Epoch 45/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:37:47[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=45 step=33885[0m [36mepoch[0m=[35m45[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022270131712732403, 'time_algorithm_update': 0.015139731119670083, 'loss': -76.17401660549372, 'time_step': 0.015428147626271444, 'observation_error': 0.02702043408305088, 'reward_error': 3.8299873072551775e-06, 'variance': 0.01772274272949244}[0m [36mstep[0m=[35m33885[0m
[2m2023-10-09 15:37:47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_33885.pt[0m


Epoch 46/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:38:00[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=46 step=34638[0m [36mepoch[0m=[35m46[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022665627924094636, 'time_algorithm_update': 0.015030805174889634, 'loss': -75.18995531495032, 'time_step': 0.015325021933749378, 'observation_error': 0.030484791405600637, 'reward_error': 3.458806797454798e-06, 'variance': 0.017893950320850917}[0m [36mstep[0m=[35m34638[0m
[2m2023-10-09 15:38:00[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_34638.pt[0m


Epoch 47/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:38:13[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=47 step=35391[0m [36mepoch[0m=[35m47[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002226294432661606, 'time_algorithm_update': 0.014957652465597408, 'loss': -76.07082381634757, 'time_step': 0.015246260213661954, 'observation_error': 0.026633162559340114, 'reward_error': 3.779487417427958e-06, 'variance': 0.01701078524975258}[0m [36mstep[0m=[35m35391[0m
[2m2023-10-09 15:38:13[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_35391.pt[0m


Epoch 48/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:38:25[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=48 step=36144[0m [36mepoch[0m=[35m48[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002237572612990421, 'time_algorithm_update': 0.014643727387406753, 'loss': -76.78200039097354, 'time_step': 0.014930929320741934, 'observation_error': 0.021803240894558977, 'reward_error': 2.309537676404668e-06, 'variance': 0.014901202986327326}[0m [36mstep[0m=[35m36144[0m
[2m2023-10-09 15:38:26[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_36144.pt[0m


Epoch 49/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:38:38[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=49 step=36897[0m [36mepoch[0m=[35m49[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022455642264519396, 'time_algorithm_update': 0.014673274826718517, 'loss': -76.90509633524009, 'time_step': 0.01496428259182103, 'observation_error': 0.025010108045548488, 'reward_error': 2.014891768419336e-06, 'variance': 0.01406064338548257}[0m [36mstep[0m=[35m36897[0m
[2m2023-10-09 15:38:38[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_36897.pt[0m


Epoch 50/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:38:51[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=50 step=37650[0m [36mepoch[0m=[35m50[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002292364558692314, 'time_algorithm_update': 0.014920527558244398, 'loss': -77.40320848563753, 'time_step': 0.0152143133905621, 'observation_error': 0.022339586395485576, 'reward_error': 2.074184188472183e-06, 'variance': 0.013410413502603734}[0m [36mstep[0m=[35m37650[0m
[2m2023-10-09 15:38:51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_37650.pt[0m


Epoch 51/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:39:03[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=51 step=38403[0m [36mepoch[0m=[35m51[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022411758047967634, 'time_algorithm_update': 0.014578540644955984, 'loss': -76.8177365813439, 'time_step': 0.014869203922124815, 'observation_error': 0.023205390930217114, 'reward_error': 3.5006914310666295e-06, 'variance': 0.012725848374038493}[0m [36mstep[0m=[35m38403[0m
[2m2023-10-09 15:39:03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_38403.pt[0m


Epoch 52/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:39:16[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=52 step=39156[0m [36mepoch[0m=[35m52[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022816056432635345, 'time_algorithm_update': 0.014860083540755598, 'loss': -76.99370253766834, 'time_step': 0.015156197516250103, 'observation_error': 0.024875643083414625, 'reward_error': 1.969892325597591e-06, 'variance': 0.012833547203087093}[0m [36mstep[0m=[35m39156[0m
[2m2023-10-09 15:39:16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_39156.pt[0m


Epoch 53/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:39:29[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=53 step=39909[0m [36mepoch[0m=[35m53[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002252995413771347, 'time_algorithm_update': 0.014933882798173355, 'loss': -76.32967674684714, 'time_step': 0.015226421444856154, 'observation_error': 0.02058417442252433, 'reward_error': 3.4069669008628855e-06, 'variance': 0.01339977417390409}[0m [36mstep[0m=[35m39909[0m
[2m2023-10-09 15:39:29[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_39909.pt[0m


Epoch 54/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:39:42[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=54 step=40662[0m [36mepoch[0m=[35m54[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022306163630795827, 'time_algorithm_update': 0.01473587885954149, 'loss': -76.60616520581492, 'time_step': 0.01502590793695741, 'observation_error': 0.024586159418508446, 'reward_error': 4.067734498262322e-06, 'variance': 0.012851987079372889}[0m [36mstep[0m=[35m40662[0m
[2m2023-10-09 15:39:42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_40662.pt[0m


Epoch 55/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:39:55[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=55 step=41415[0m [36mepoch[0m=[35m55[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023062390635213053, 'time_algorithm_update': 0.015109858506545918, 'loss': -77.52558474268413, 'time_step': 0.01540897756933691, 'observation_error': 0.022187391606860413, 'reward_error': 2.4453665699017433e-06, 'variance': 0.011054166575894033}[0m [36mstep[0m=[35m41415[0m
[2m2023-10-09 15:39:55[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_41415.pt[0m


Epoch 56/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:40:07[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=56 step=42168[0m [36mepoch[0m=[35m56[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002308471269341579, 'time_algorithm_update': 0.014863311215384232, 'loss': -77.78999590018832, 'time_step': 0.015161300243767767, 'observation_error': 0.023356058970598545, 'reward_error': 2.6197398742304786e-06, 'variance': 0.010784566264144105}[0m [36mstep[0m=[35m42168[0m
[2m2023-10-09 15:40:07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_42168.pt[0m


Epoch 57/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:40:20[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=57 step=42921[0m [36mepoch[0m=[35m57[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022963920279169778, 'time_algorithm_update': 0.014984029222769566, 'loss': -77.95664615301814, 'time_step': 0.015282877254612734, 'observation_error': 0.017292029543475227, 'reward_error': 2.228305116031309e-06, 'variance': 0.010671189875540327}[0m [36mstep[0m=[35m42921[0m
[2m2023-10-09 15:40:20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_42921.pt[0m


Epoch 58/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:40:33[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=58 step=43674[0m [36mepoch[0m=[35m58[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002256658764316108, 'time_algorithm_update': 0.015101940666695199, 'loss': -77.47014710374404, 'time_step': 0.015393980629099002, 'observation_error': 0.018898463559326905, 'reward_error': 3.7547017193452263e-06, 'variance': 0.011041973292752302}[0m [36mstep[0m=[35m43674[0m
[2m2023-10-09 15:40:33[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_43674.pt[0m


Epoch 59/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:40:46[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=59 step=44427[0m [36mepoch[0m=[35m59[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022860953848992052, 'time_algorithm_update': 0.01507186414711029, 'loss': -77.6033135828269, 'time_step': 0.015369542249803683, 'observation_error': 0.023203405671205993, 'reward_error': 1.8025186140009424e-06, 'variance': 0.010154850691214249}[0m [36mstep[0m=[35m44427[0m
[2m2023-10-09 15:40:46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_44427.pt[0m


Epoch 60/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:40:59[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=60 step=45180[0m [36mepoch[0m=[35m60[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022578809365808252, 'time_algorithm_update': 0.014944782928324948, 'loss': -77.10992321075196, 'time_step': 0.015237549544963862, 'observation_error': 0.02353306181734934, 'reward_error': 3.624270858574115e-06, 'variance': 0.011150446542570342}[0m [36mstep[0m=[35m45180[0m
[2m2023-10-09 15:40:59[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_45180.pt[0m


Epoch 61/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:41:12[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=61 step=45933[0m [36mepoch[0m=[35m61[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022221909734515714, 'time_algorithm_update': 0.01477435448888447, 'loss': -78.27127676909346, 'time_step': 0.01506368350856016, 'observation_error': 0.018897979609202507, 'reward_error': 1.9061074169694553e-06, 'variance': 0.00966178230604118}[0m [36mstep[0m=[35m45933[0m
[2m2023-10-09 15:41:12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_45933.pt[0m


Epoch 62/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:41:25[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=62 step=46686[0m [36mepoch[0m=[35m62[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00022904774740556004, 'time_algorithm_update': 0.01519818546604192, 'loss': -77.46618200456636, 'time_step': 0.015495225569483136, 'observation_error': 0.020136983544381617, 'reward_error': 2.029110011159931e-06, 'variance': 0.009545479552788453}[0m [36mstep[0m=[35m46686[0m
[2m2023-10-09 15:41:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_46686.pt[0m


Epoch 63/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:41:39[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=63 step=47439[0m [36mepoch[0m=[35m63[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002659842629198376, 'time_algorithm_update': 0.01762623805923766, 'loss': -77.66027475131618, 'time_step': 0.017968902233270693, 'observation_error': 0.019435200596247933, 'reward_error': 3.002690188104392e-06, 'variance': 0.011649810818051737}[0m [36mstep[0m=[35m47439[0m
[2m2023-10-09 15:41:39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_47439.pt[0m


Epoch 64/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:41:54[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=64 step=48192[0m [36mepoch[0m=[35m64[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00026836211621207226, 'time_algorithm_update': 0.017695218285083136, 'loss': -78.56446311008408, 'time_step': 0.018043070675367378, 'observation_error': 0.022749238277618915, 'reward_error': 1.924219478945127e-06, 'variance': 0.009339569015068775}[0m [36mstep[0m=[35m48192[0m
[2m2023-10-09 15:41:54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_48192.pt[0m


Epoch 65/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:42:09[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=65 step=48945[0m [36mepoch[0m=[35m65[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000275616943440431, 'time_algorithm_update': 0.01778891425367054, 'loss': -78.63024273399971, 'time_step': 0.01813900708202347, 'observation_error': 0.018744238222501312, 'reward_error': 2.2132001361404553e-06, 'variance': 0.009138339982563276}[0m [36mstep[0m=[35m48945[0m
[2m2023-10-09 15:42:09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_48945.pt[0m


Epoch 66/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:42:24[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=66 step=49698[0m [36mepoch[0m=[35m66[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002810381956467749, 'time_algorithm_update': 0.017975817955189335, 'loss': -78.16637650144052, 'time_step': 0.018335790114890373, 'observation_error': 0.01903930023891737, 'reward_error': 1.1836559249799578e-05, 'variance': 0.009058792627783871}[0m [36mstep[0m=[35m49698[0m
[2m2023-10-09 15:42:24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_49698.pt[0m


Epoch 67/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:42:39[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=67 step=50451[0m [36mepoch[0m=[35m67[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002717686839312671, 'time_algorithm_update': 0.017817593824024376, 'loss': -77.71912224454233, 'time_step': 0.018167750610614996, 'observation_error': 0.020156301380698856, 'reward_error': 2.1881262700254166e-06, 'variance': 0.00857312295537599}[0m [36mstep[0m=[35m50451[0m
[2m2023-10-09 15:42:39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_50451.pt[0m


Epoch 68/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:42:54[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=68 step=51204[0m [36mepoch[0m=[35m68[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00026882502187295737, 'time_algorithm_update': 0.017541869228104672, 'loss': -79.31683199909104, 'time_step': 0.017887186402501972, 'observation_error': 0.019458187185820933, 'reward_error': 2.3969512303074683e-06, 'variance': 0.008521093964996044}[0m [36mstep[0m=[35m51204[0m
[2m2023-10-09 15:42:54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_51204.pt[0m


Epoch 69/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:43:09[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=69 step=51957[0m [36mepoch[0m=[35m69[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002706000212812487, 'time_algorithm_update': 0.0173881503531974, 'loss': -78.52580674615989, 'time_step': 0.017737960118854822, 'observation_error': 0.019090344339731887, 'reward_error': 1.5155592939544596e-06, 'variance': 0.008157600199890475}[0m [36mstep[0m=[35m51957[0m
[2m2023-10-09 15:43:09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_51957.pt[0m


Epoch 70/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:43:24[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=70 step=52710[0m [36mepoch[0m=[35m70[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00027760408155788304, 'time_algorithm_update': 0.01815901746154623, 'loss': -78.34058013095324, 'time_step': 0.01851858054182602, 'observation_error': 0.020529230333975936, 'reward_error': 3.159525367517561e-06, 'variance': 0.00802431099855893}[0m [36mstep[0m=[35m52710[0m
[2m2023-10-09 15:43:24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_52710.pt[0m


Epoch 71/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:43:39[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=71 step=53463[0m [36mepoch[0m=[35m71[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00028114173200184287, 'time_algorithm_update': 0.018252301184462996, 'loss': -79.08796460378376, 'time_step': 0.01861319472273666, 'observation_error': 0.022507518897423428, 'reward_error': 1.3757929532116115e-06, 'variance': 0.007782652979559539}[0m [36mstep[0m=[35m53463[0m
[2m2023-10-09 15:43:39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_53463.pt[0m


Epoch 72/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:43:55[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=72 step=54216[0m [36mepoch[0m=[35m72[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00028460972495921263, 'time_algorithm_update': 0.018545242261443322, 'loss': -78.34609706721616, 'time_step': 0.01891211779469038, 'observation_error': 0.02100077535649861, 'reward_error': 1.777066431864004e-06, 'variance': 0.008008033584007904}[0m [36mstep[0m=[35m54216[0m
[2m2023-10-09 15:43:55[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_54216.pt[0m


Epoch 73/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:44:10[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=73 step=54969[0m [36mepoch[0m=[35m73[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002751258581599708, 'time_algorithm_update': 0.017859263249127514, 'loss': -79.8785446643196, 'time_step': 0.018210247693308795, 'observation_error': 0.0179855948597959, 'reward_error': 2.023917209146207e-06, 'variance': 0.007876603435974102}[0m [36mstep[0m=[35m54969[0m
[2m2023-10-09 15:44:10[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_54969.pt[0m


Epoch 74/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:44:25[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=74 step=55722[0m [36mepoch[0m=[35m74[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002807912281943191, 'time_algorithm_update': 0.017873581345496107, 'loss': -79.2974529896441, 'time_step': 0.018235903812119685, 'observation_error': 0.017602076590796052, 'reward_error': 1.6497667805396051e-06, 'variance': 0.0075588888354088035}[0m [36mstep[0m=[35m55722[0m
[2m2023-10-09 15:44:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_55722.pt[0m


Epoch 75/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:44:40[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=75 step=56475[0m [36mepoch[0m=[35m75[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002811376158776353, 'time_algorithm_update': 0.017645898884827715, 'loss': -77.89096747293257, 'time_step': 0.018010419995028025, 'observation_error': 0.017969916805554962, 'reward_error': 2.1299734537883243e-06, 'variance': 0.00744393604421243}[0m [36mstep[0m=[35m56475[0m
[2m2023-10-09 15:44:40[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_56475.pt[0m


Epoch 76/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:44:55[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=76 step=57228[0m [36mepoch[0m=[35m76[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002692730461617073, 'time_algorithm_update': 0.017567672894137156, 'loss': -79.40750562052328, 'time_step': 0.0179135466951773, 'observation_error': 0.01572423583687546, 'reward_error': 1.9715627201176704e-06, 'variance': 0.007377595436361214}[0m [36mstep[0m=[35m57228[0m
[2m2023-10-09 15:44:55[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_57228.pt[0m


Epoch 77/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:45:09[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=77 step=57981[0m [36mepoch[0m=[35m77[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002738264294101264, 'time_algorithm_update': 0.01742473984777848, 'loss': -79.91817866980159, 'time_step': 0.01777531172966419, 'observation_error': 0.020358141933085, 'reward_error': 5.559921781977403e-06, 'variance': 0.007254887130253594}[0m [36mstep[0m=[35m57981[0m
[2m2023-10-09 15:45:09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_57981.pt[0m


Epoch 78/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:45:24[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=78 step=58734[0m [36mepoch[0m=[35m78[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002848006497974573, 'time_algorithm_update': 0.017753300280526657, 'loss': -78.35947988581056, 'time_step': 0.018118631317321048, 'observation_error': 0.022322578883347176, 'reward_error': 1.974321097341022e-06, 'variance': 0.008341939809768804}[0m [36mstep[0m=[35m58734[0m
[2m2023-10-09 15:45:24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_58734.pt[0m


Epoch 79/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:45:39[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=79 step=59487[0m [36mepoch[0m=[35m79[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00027889464480943416, 'time_algorithm_update': 0.01731242633277518, 'loss': -80.08426590269781, 'time_step': 0.017668645537073712, 'observation_error': 0.016276186055493806, 'reward_error': 1.6398266143790417e-06, 'variance': 0.007507906789451498}[0m [36mstep[0m=[35m59487[0m
[2m2023-10-09 15:45:39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_59487.pt[0m


Epoch 80/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:45:53[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=80 step=60240[0m [36mepoch[0m=[35m80[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00028109582138568124, 'time_algorithm_update': 0.01732896903596551, 'loss': -79.67669833260545, 'time_step': 0.017688234488802603, 'observation_error': 0.019184900016679426, 'reward_error': 1.5256099212711638e-06, 'variance': 0.00732122874014069}[0m [36mstep[0m=[35m60240[0m
[2m2023-10-09 15:45:53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_60240.pt[0m


Epoch 81/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:46:08[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=81 step=60993[0m [36mepoch[0m=[35m81[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000282680529205606, 'time_algorithm_update': 0.017597145293338366, 'loss': -78.47949920137565, 'time_step': 0.017962888259178456, 'observation_error': 0.019572734863554115, 'reward_error': 1.8853934114134106e-06, 'variance': 0.007418761461240217}[0m [36mstep[0m=[35m60993[0m
[2m2023-10-09 15:46:08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_60993.pt[0m


Epoch 82/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:46:23[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=82 step=61746[0m [36mepoch[0m=[35m82[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00028010193570201613, 'time_algorithm_update': 0.017601523582995493, 'loss': -80.90481209723282, 'time_step': 0.017958660049742437, 'observation_error': 0.017850370079888243, 'reward_error': 1.4140556524946014e-06, 'variance': 0.0069334597879825025}[0m [36mstep[0m=[35m61746[0m
[2m2023-10-09 15:46:23[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_61746.pt[0m


Epoch 83/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:46:38[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=83 step=62499[0m [36mepoch[0m=[35m83[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002829195810345856, 'time_algorithm_update': 0.01770306646744727, 'loss': -79.35545484636567, 'time_step': 0.018066379020255242, 'observation_error': 0.01845890820944172, 'reward_error': 1.7648990121835402e-06, 'variance': 0.007324243992894534}[0m [36mstep[0m=[35m62499[0m
[2m2023-10-09 15:46:38[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_62499.pt[0m


Epoch 84/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:46:53[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=84 step=63252[0m [36mepoch[0m=[35m84[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00027315613441416625, 'time_algorithm_update': 0.017328496314931517, 'loss': -79.7906540671826, 'time_step': 0.017680010789260764, 'observation_error': 0.018210074857367736, 'reward_error': 1.923234152984039e-06, 'variance': 0.007130677224370809}[0m [36mstep[0m=[35m63252[0m
[2m2023-10-09 15:46:53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_63252.pt[0m


Epoch 85/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:47:07[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=85 step=64005[0m [36mepoch[0m=[35m85[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00028014056394457974, 'time_algorithm_update': 0.017439918214106464, 'loss': -79.2364714839069, 'time_step': 0.017800727213521404, 'observation_error': 0.018051921420876197, 'reward_error': 1.447693146017632e-06, 'variance': 0.006960264949559539}[0m [36mstep[0m=[35m64005[0m
[2m2023-10-09 15:47:07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_64005.pt[0m


Epoch 86/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:47:22[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=86 step=64758[0m [36mepoch[0m=[35m86[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002791327467635967, 'time_algorithm_update': 0.017551849562808336, 'loss': -80.65641744393277, 'time_step': 0.017911423091711117, 'observation_error': 0.019552686572608124, 'reward_error': 1.5851095426235136e-06, 'variance': 0.0069362914890755065}[0m [36mstep[0m=[35m64758[0m
[2m2023-10-09 15:47:22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_64758.pt[0m


Epoch 87/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:47:37[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=87 step=65511[0m [36mepoch[0m=[35m87[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00028822811476262916, 'time_algorithm_update': 0.01785229211784454, 'loss': -80.09239050876572, 'time_step': 0.01822483207125113, 'observation_error': 0.017811036042150716, 'reward_error': 3.463171859792479e-06, 'variance': 0.006603670830520031}[0m [36mstep[0m=[35m65511[0m
[2m2023-10-09 15:47:37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_65511.pt[0m


Epoch 88/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:47:52[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=88 step=66264[0m [36mepoch[0m=[35m88[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002816059041624842, 'time_algorithm_update': 0.01781657429172065, 'loss': -80.40664194621255, 'time_step': 0.01817674624175823, 'observation_error': 0.01691409130930828, 'reward_error': 1.4015619389657663e-06, 'variance': 0.00631893454651465}[0m [36mstep[0m=[35m66264[0m
[2m2023-10-09 15:47:52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_66264.pt[0m


Epoch 89/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:48:07[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=89 step=67017[0m [36mepoch[0m=[35m89[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00027759173318526027, 'time_algorithm_update': 0.017940276172531553, 'loss': -80.06238330217947, 'time_step': 0.01829901159522068, 'observation_error': 0.01712787524987186, 'reward_error': 1.5098198561138414e-06, 'variance': 0.005985797617504652}[0m [36mstep[0m=[35m67017[0m
[2m2023-10-09 15:48:07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_67017.pt[0m


Epoch 90/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:48:22[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=90 step=67770[0m [36mepoch[0m=[35m90[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00027814361045401726, 'time_algorithm_update': 0.017801158140063444, 'loss': -79.74418034243236, 'time_step': 0.01815920300376051, 'observation_error': 0.018816848168251986, 'reward_error': 2.1249774637968506e-06, 'variance': 0.007129022143761815}[0m [36mstep[0m=[35m67770[0m
[2m2023-10-09 15:48:22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_67770.pt[0m


Epoch 91/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:48:37[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=91 step=68523[0m [36mepoch[0m=[35m91[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00027547857834206793, 'time_algorithm_update': 0.017770878347267666, 'loss': -81.25001010666806, 'time_step': 0.018124114944640383, 'observation_error': 0.017317955457783873, 'reward_error': 1.7132412744242108e-06, 'variance': 0.006402927940156337}[0m [36mstep[0m=[35m68523[0m
[2m2023-10-09 15:48:37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_68523.pt[0m


Epoch 92/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:48:52[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=92 step=69276[0m [36mepoch[0m=[35m92[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002747671221040318, 'time_algorithm_update': 0.017719706374493886, 'loss': -80.81942127805307, 'time_step': 0.018075280930416517, 'observation_error': 0.016784680811769207, 'reward_error': 1.315304368452339e-06, 'variance': 0.006177404787564686}[0m [36mstep[0m=[35m69276[0m
[2m2023-10-09 15:48:52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_69276.pt[0m


Epoch 93/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:49:07[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=93 step=70029[0m [36mepoch[0m=[35m93[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000275754991913855, 'time_algorithm_update': 0.017864675635835565, 'loss': -78.42146061141177, 'time_step': 0.018220896106633848, 'observation_error': 0.017644313522778517, 'reward_error': 2.794846841458469e-06, 'variance': 0.006970182229306104}[0m [36mstep[0m=[35m70029[0m
[2m2023-10-09 15:49:07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_70029.pt[0m


Epoch 94/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:49:22[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=94 step=70782[0m [36mepoch[0m=[35m94[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00028732288406189694, 'time_algorithm_update': 0.01819215416116702, 'loss': -80.58461690170672, 'time_step': 0.01856302391802014, 'observation_error': 0.018750477341812344, 'reward_error': 1.5809873258382133e-06, 'variance': 0.005938184963645746}[0m [36mstep[0m=[35m70782[0m
[2m2023-10-09 15:49:22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_70782.pt[0m


Epoch 95/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:49:37[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=95 step=71535[0m [36mepoch[0m=[35m95[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000279866366747366, 'time_algorithm_update': 0.01801933900293601, 'loss': -80.58846734846098, 'time_step': 0.01837963570319957, 'observation_error': 0.01864428860034589, 'reward_error': 2.0427225259674224e-06, 'variance': 0.006447409068339883}[0m [36mstep[0m=[35m71535[0m
[2m2023-10-09 15:49:37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_71535.pt[0m


Epoch 96/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:49:53[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=96 step=72288[0m [36mepoch[0m=[35m96[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002914133616494309, 'time_algorithm_update': 0.018998490861687527, 'loss': -79.94595849086564, 'time_step': 0.019372948612349918, 'observation_error': 0.01760555121564388, 'reward_error': 1.9446780599873206e-06, 'variance': 0.006706867687018506}[0m [36mstep[0m=[35m72288[0m
[2m2023-10-09 15:49:53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_72288.pt[0m


Epoch 97/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:50:09[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=97 step=73041[0m [36mepoch[0m=[35m97[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00028397520858136464, 'time_algorithm_update': 0.01815756351982613, 'loss': -81.2709482681862, 'time_step': 0.01852267323578813, 'observation_error': 0.017724793093706878, 'reward_error': 1.4980350102189475e-06, 'variance': 0.00617350367623048}[0m [36mstep[0m=[35m73041[0m
[2m2023-10-09 15:50:09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_73041.pt[0m


Epoch 98/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:50:24[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=98 step=73794[0m [36mepoch[0m=[35m98[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002708739018535234, 'time_algorithm_update': 0.017591896601723802, 'loss': -79.28485317914135, 'time_step': 0.017946391466604285, 'observation_error': 0.01687610863809687, 'reward_error': 5.672906855095364e-06, 'variance': 0.006757071173171985}[0m [36mstep[0m=[35m73794[0m
[2m2023-10-09 15:50:24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_73794.pt[0m


Epoch 99/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:50:39[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=99 step=74547[0m [36mepoch[0m=[35m99[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00027290663396219806, 'time_algorithm_update': 0.017880583822648045, 'loss': -81.4701572294096, 'time_step': 0.01823552734506716, 'observation_error': 0.016922366055418418, 'reward_error': 1.481914801765023e-06, 'variance': 0.00570735756371923}[0m [36mstep[0m=[35m74547[0m
[2m2023-10-09 15:50:39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_74547.pt[0m


Epoch 100/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:50:54[0m [[32m[1minfo     [0m] [1mexp_0_20231009152727: epoch=100 step=75300[0m [36mepoch[0m=[35m100[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00026127098407720034, 'time_algorithm_update': 0.01759641420635411, 'loss': -80.37566599396278, 'time_step': 0.017940328099021557, 'observation_error': 0.01644048728620092, 'reward_error': 1.535478392178729e-06, 'variance': 0.005878924894489776}[0m [36mstep[0m=[35m75300[0m
[2m2023-10-09 15:50:54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009152727/model_75300.pt[0m
Using InvertedPendulumEncoderFactory
[2m2023-10-09 15:50:54[0m [[32m[1mdebug    [0m] [1mRoundIterator is selected.[0m
[2m2023-10-09 15:50:54[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/exp_0_20231009155054[0m
[2m2023-10-09 15:50:54[0m [[32m[1mdebug    [0m] [1mBuilding models...[0m
[2m2023-10-09 15:50:54[0m [[32m[1mdebug    [0m] [1mModels have been built.

Epoch 1/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:51:06[0m [[32m[1minfo     [0m] [1mexp_0_20231009155054: epoch=1 step=753[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002622034445226905, 'time_algorithm_update': 0.014336040137140241, 'loss': -20.728914955791684, 'time_step': 0.014681202798567286, 'observation_error': 0.04790484093367549, 'reward_error': 0.001137922741786604, 'variance': 0.04998685512913959}[0m [36mstep[0m=[35m753[0m
[2m2023-10-09 15:51:06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009155054/model_753.pt[0m


Epoch 2/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:51:19[0m [[32m[1minfo     [0m] [1mexp_0_20231009155054: epoch=2 step=1506[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000259144530986568, 'time_algorithm_update': 0.01495211976141252, 'loss': -31.797543379415078, 'time_step': 0.015287691536810926, 'observation_error': 0.023725963813827106, 'reward_error': 0.0003891510752884542, 'variance': 0.016093319677492324}[0m [36mstep[0m=[35m1506[0m
[2m2023-10-09 15:51:19[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009155054/model_1506.pt[0m


Epoch 3/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:51:32[0m [[32m[1minfo     [0m] [1mexp_0_20231009155054: epoch=3 step=2259[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002485147985329191, 'time_algorithm_update': 0.015088860257213335, 'loss': -38.73301384426841, 'time_step': 0.0154115026532258, 'observation_error': 0.017222171768532704, 'reward_error': 0.00010699930680568486, 'variance': 0.009088561310232167}[0m [36mstep[0m=[35m2259[0m
[2m2023-10-09 15:51:32[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009155054/model_2259.pt[0m


Epoch 4/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:51:46[0m [[32m[1minfo     [0m] [1mexp_0_20231009155054: epoch=4 step=3012[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025093761256649835, 'time_algorithm_update': 0.016007814432678628, 'loss': -46.18251926680485, 'time_step': 0.01632900415347075, 'observation_error': 0.012729382189208695, 'reward_error': 2.174508516136165e-05, 'variance': 0.003150883632174059}[0m [36mstep[0m=[35m3012[0m
[2m2023-10-09 15:51:46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009155054/model_3012.pt[0m


Epoch 5/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:52:02[0m [[32m[1minfo     [0m] [1mexp_0_20231009155054: epoch=5 step=3765[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002628990695137743, 'time_algorithm_update': 0.017029191393301307, 'loss': -52.447934054441816, 'time_step': 0.017364747654077698, 'observation_error': 0.013409418170760004, 'reward_error': 1.030145497648098e-05, 'variance': 0.003815113337648562}[0m [36mstep[0m=[35m3765[0m
[2m2023-10-09 15:52:02[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009155054/model_3765.pt[0m


Epoch 6/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:52:17[0m [[32m[1minfo     [0m] [1mexp_0_20231009155054: epoch=6 step=4518[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002514131832249453, 'time_algorithm_update': 0.01764295585601928, 'loss': -56.45580052252943, 'time_step': 0.017967312142826805, 'observation_error': 0.01923977918001856, 'reward_error': 1.1766856015212209e-05, 'variance': 0.007522488597348803}[0m [36mstep[0m=[35m4518[0m
[2m2023-10-09 15:52:17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009155054/model_4518.pt[0m


Epoch 7/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:52:33[0m [[32m[1minfo     [0m] [1mexp_0_20231009155054: epoch=7 step=5271[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002523260129242146, 'time_algorithm_update': 0.0176819507502623, 'loss': -59.74810801970848, 'time_step': 0.018005836532410398, 'observation_error': 0.02069871485901915, 'reward_error': 1.7362076491376456e-05, 'variance': 0.01735293172235416}[0m [36mstep[0m=[35m5271[0m
[2m2023-10-09 15:52:33[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009155054/model_5271.pt[0m


Epoch 8/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:52:49[0m [[32m[1minfo     [0m] [1mexp_0_20231009155054: epoch=8 step=6024[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025251282163825167, 'time_algorithm_update': 0.017813025559403825, 'loss': -62.24722935194038, 'time_step': 0.01813534278160389, 'observation_error': 0.03415007351099441, 'reward_error': 1.673447610818475e-05, 'variance': 0.02689704144574693}[0m [36mstep[0m=[35m6024[0m
[2m2023-10-09 15:52:49[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009155054/model_6024.pt[0m


Epoch 9/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:53:04[0m [[32m[1minfo     [0m] [1mexp_0_20231009155054: epoch=9 step=6777[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002489846699424632, 'time_algorithm_update': 0.017493178961603132, 'loss': -64.43771097099639, 'time_step': 0.017814113166069446, 'observation_error': 0.04668140894699467, 'reward_error': 1.4749318405284283e-05, 'variance': 0.038820318512300694}[0m [36mstep[0m=[35m6777[0m
[2m2023-10-09 15:53:04[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009155054/model_6777.pt[0m


Epoch 10/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:53:19[0m [[32m[1minfo     [0m] [1mexp_0_20231009155054: epoch=10 step=7530[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002425581335546486, 'time_algorithm_update': 0.01706669340095672, 'loss': -65.6988250028248, 'time_step': 0.017379642641085234, 'observation_error': 0.05125802174682381, 'reward_error': 1.6594432506793557e-05, 'variance': 0.050744502494667595}[0m [36mstep[0m=[35m7530[0m
[2m2023-10-09 15:53:19[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009155054/model_7530.pt[0m


Epoch 11/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:53:34[0m [[32m[1minfo     [0m] [1mexp_0_20231009155054: epoch=11 step=8283[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024729199301832383, 'time_algorithm_update': 0.017649182918695498, 'loss': -67.12712244050277, 'time_step': 0.017969413265922312, 'observation_error': 0.04700563045530173, 'reward_error': 9.618999807163773e-06, 'variance': 0.06487273494369072}[0m [36mstep[0m=[35m8283[0m
[2m2023-10-09 15:53:34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009155054/model_8283.pt[0m


Epoch 12/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:53:50[0m [[32m[1minfo     [0m] [1mexp_0_20231009155054: epoch=12 step=9036[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000253246441622021, 'time_algorithm_update': 0.01770715789490962, 'loss': -67.24092751638506, 'time_step': 0.018033594724191613, 'observation_error': 0.06263475360133128, 'reward_error': 9.703741234090024e-06, 'variance': 0.07317912920452092}[0m [36mstep[0m=[35m9036[0m
[2m2023-10-09 15:53:50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009155054/model_9036.pt[0m


Epoch 13/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:54:06[0m [[32m[1minfo     [0m] [1mexp_0_20231009155054: epoch=13 step=9789[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002561625573106338, 'time_algorithm_update': 0.018039728699135747, 'loss': -68.75969399683979, 'time_step': 0.01836831382862917, 'observation_error': 0.08006100721647748, 'reward_error': 7.879339144682586e-06, 'variance': 0.08750868316895452}[0m [36mstep[0m=[35m9789[0m
[2m2023-10-09 15:54:06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009155054/model_9789.pt[0m


Epoch 14/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:54:22[0m [[32m[1minfo     [0m] [1mexp_0_20231009155054: epoch=14 step=10542[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025615559156197474, 'time_algorithm_update': 0.01808138197636699, 'loss': -69.69766079698742, 'time_step': 0.018409814492639794, 'observation_error': 0.07556773906422055, 'reward_error': 6.293513429098979e-06, 'variance': 0.09070694631413108}[0m [36mstep[0m=[35m10542[0m
[2m2023-10-09 15:54:22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009155054/model_10542.pt[0m


Epoch 15/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:54:38[0m [[32m[1minfo     [0m] [1mexp_0_20231009155054: epoch=15 step=11295[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024623319922215434, 'time_algorithm_update': 0.017183452963354103, 'loss': -70.0688846051139, 'time_step': 0.017501742083079627, 'observation_error': 0.07768260684829477, 'reward_error': 8.041638994258824e-06, 'variance': 0.0890501427512254}[0m [36mstep[0m=[35m11295[0m
[2m2023-10-09 15:54:38[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009155054/model_11295.pt[0m


Epoch 16/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:54:53[0m [[32m[1minfo     [0m] [1mexp_0_20231009155054: epoch=16 step=12048[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024875954961080157, 'time_algorithm_update': 0.017571278302318073, 'loss': -70.39298172554331, 'time_step': 0.01789399953794036, 'observation_error': 0.07173954660180762, 'reward_error': 6.122068538966671e-06, 'variance': 0.0870163792029406}[0m [36mstep[0m=[35m12048[0m
[2m2023-10-09 15:54:53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009155054/model_12048.pt[0m


Epoch 17/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:55:08[0m [[32m[1minfo     [0m] [1mexp_0_20231009155054: epoch=17 step=12801[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002442086993618949, 'time_algorithm_update': 0.017169407797682966, 'loss': -69.72770449396465, 'time_step': 0.01748268031187425, 'observation_error': 0.07798281717381071, 'reward_error': 5.8014773669149105e-06, 'variance': 0.08053760945714811}[0m [36mstep[0m=[35m12801[0m
[2m2023-10-09 15:55:08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009155054/model_12801.pt[0m


Epoch 18/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:55:23[0m [[32m[1minfo     [0m] [1mexp_0_20231009155054: epoch=18 step=13554[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023775999922834702, 'time_algorithm_update': 0.01633729497749967, 'loss': -71.51005992914735, 'time_step': 0.01664420649666552, 'observation_error': 0.07677438975455511, 'reward_error': 5.460364426057098e-06, 'variance': 0.07935326144932481}[0m [36mstep[0m=[35m13554[0m
[2m2023-10-09 15:55:23[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009155054/model_13554.pt[0m


Epoch 19/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:55:38[0m [[32m[1minfo     [0m] [1mexp_0_20231009155054: epoch=19 step=14307[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002477226029354263, 'time_algorithm_update': 0.016839090413148027, 'loss': -71.39249892570425, 'time_step': 0.017160418815663454, 'observation_error': 0.07425223270938043, 'reward_error': 4.276999021440786e-06, 'variance': 0.07761244753595443}[0m [36mstep[0m=[35m14307[0m
[2m2023-10-09 15:55:38[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009155054/model_14307.pt[0m


Epoch 20/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:55:54[0m [[32m[1minfo     [0m] [1mexp_0_20231009155054: epoch=20 step=15060[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00026536462791412475, 'time_algorithm_update': 0.018026253141729957, 'loss': -71.7187762146452, 'time_step': 0.018367807228726694, 'observation_error': 0.08183407601836405, 'reward_error': 4.21553227379182e-06, 'variance': 0.07309641512788538}[0m [36mstep[0m=[35m15060[0m
[2m2023-10-09 15:55:54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009155054/model_15060.pt[0m


Epoch 21/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:56:09[0m [[32m[1minfo     [0m] [1mexp_0_20231009155054: epoch=21 step=15813[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023690637839267928, 'time_algorithm_update': 0.016245638390144663, 'loss': -72.80707370459163, 'time_step': 0.01655057195331629, 'observation_error': 0.07516380515401123, 'reward_error': 5.189538953325397e-06, 'variance': 0.07137473395263497}[0m [36mstep[0m=[35m15813[0m
[2m2023-10-09 15:56:09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009155054/model_15813.pt[0m


Epoch 22/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:56:25[0m [[32m[1minfo     [0m] [1mexp_0_20231009155054: epoch=22 step=16566[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025778905962251257, 'time_algorithm_update': 0.017697790862712885, 'loss': -72.53468050456459, 'time_step': 0.018028419489562908, 'observation_error': 0.06992920307797217, 'reward_error': 4.760023003829311e-06, 'variance': 0.07179794942494216}[0m [36mstep[0m=[35m16566[0m
[2m2023-10-09 15:56:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009155054/model_16566.pt[0m


Epoch 23/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:56:41[0m [[32m[1minfo     [0m] [1mexp_0_20231009155054: epoch=23 step=17319[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025190236875577117, 'time_algorithm_update': 0.017557767599544045, 'loss': -72.73932207723063, 'time_step': 0.017881278814389254, 'observation_error': 0.06350170949476423, 'reward_error': 7.33838236179271e-06, 'variance': 0.06567107101491208}[0m [36mstep[0m=[35m17319[0m
[2m2023-10-09 15:56:41[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009155054/model_17319.pt[0m


Epoch 24/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:56:56[0m [[32m[1minfo     [0m] [1mexp_0_20231009155054: epoch=24 step=18072[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024022492437881935, 'time_algorithm_update': 0.01698486801795909, 'loss': -72.67346894060314, 'time_step': 0.017296919309760468, 'observation_error': 0.059516418893637194, 'reward_error': 3.3665612789736995e-06, 'variance': 0.06007510645187478}[0m [36mstep[0m=[35m18072[0m
[2m2023-10-09 15:56:56[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009155054/model_18072.pt[0m


Epoch 25/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:57:12[0m [[32m[1minfo     [0m] [1mexp_0_20231009155054: epoch=25 step=18825[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002535063906969777, 'time_algorithm_update': 0.017755669268320597, 'loss': -72.49212782506449, 'time_step': 0.0180834571362175, 'observation_error': 0.056784967810388075, 'reward_error': 5.353043066695539e-06, 'variance': 0.061562570423441626}[0m [36mstep[0m=[35m18825[0m
[2m2023-10-09 15:57:12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009155054/model_18825.pt[0m


Epoch 26/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:57:28[0m [[32m[1minfo     [0m] [1mexp_0_20231009155054: epoch=26 step=19578[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025849260023707253, 'time_algorithm_update': 0.018129202474160975, 'loss': -74.00402784664159, 'time_step': 0.018460767044330816, 'observation_error': 0.055844557445397826, 'reward_error': 3.2457134479941904e-06, 'variance': 0.05412519495319281}[0m [36mstep[0m=[35m19578[0m
[2m2023-10-09 15:57:28[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009155054/model_19578.pt[0m


Epoch 27/100:   0%|          | 0/753 [00:00<?, ?it/s]

[2m2023-10-09 15:57:45[0m [[32m[1minfo     [0m] [1mexp_0_20231009155054: epoch=27 step=20331[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002626989625522973, 'time_algorithm_update': 0.01819068660457454, 'loss': -73.00813594377374, 'time_step': 0.018528317708576502, 'observation_error': 0.050444931779861195, 'reward_error': 3.13065876078094e-06, 'variance': 0.05150571730360545}[0m [36mstep[0m=[35m20331[0m
[2m2023-10-09 15:57:45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_0_20231009155054/model_20331.pt[0m


Epoch 28/100:   0%|          | 0/753 [00:00<?, ?it/s]

## Load Dynamics

In [19]:
# load trained dynamics model
dynamics_model_path = "d3rlpy_logs/ProbabilisticEnsembleDynamics_20231002230632"
dynamics = d3rlpy.dynamics.ProbabilisticEnsembleDynamics.from_json(dynamics_model_path + '/params.json')
dynamics.load_model(dynamics_model_path + '/model_31542.pt')

Using InvertedPendulumEncoderFactory


## Train Offline RL Algorithm

In [20]:
encoders = d3rlpy.models.encoders.DefaultEncoderFactory(dropout_rate=0.2)
# give COMBO as the generator argument.
combo = COMBO(dynamics=dynamics, critic_encoder_factory=encoders, actor_encoder_factory=encoders,
              use_gpu=use_gpu)

In [21]:
combo.fit(dataset = train_episodes, eval_episodes=test_episodes, n_steps=100000, n_steps_per_epoch=1000, tensorboard_dir="tensorboard_logs",
         scorers={
            'environment': d3rlpy.metrics.scorer.evaluate_on_environment(eval_env)
        })

[2m2023-10-02 23:16:08[0m [[32m[1mdebug    [0m] [1mRandomIterator is selected.[0m
[2m2023-10-02 23:16:08[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/COMBO_20231002231608[0m
[2m2023-10-02 23:16:08[0m [[32m[1mdebug    [0m] [1mBuilding models...[0m
[2m2023-10-02 23:16:08[0m [[32m[1mdebug    [0m] [1mModels have been built.[0m
[2m2023-10-02 23:16:08[0m [[32m[1minfo     [0m] [1mParameters are saved to d3rlpy_logs/COMBO_20231002231608/params.json[0m [36mparams[0m=[35m{'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': 0.2}}, 'actor_learning_rate': 0.0001, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 256, 'conservative_weight': 1.0, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': 0.2}}, 'c

Epoch 1/100:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-02 23:16:21[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m250000[0m [36mreal_transitions[0m=[35m75144[0m


KeyboardInterrupt: 

## Load the Policy

In [9]:
trained_policy = COMBO()
# initialize with dataset
trained_policy.build_with_dataset(dataset)
# Load entire model parameters.
trained_policy.load_model('d3rlpy_logs/COMBO_20230929153035/model_53000.pt')

## See the policy running

In [10]:
scorer = d3rlpy.metrics.scorer.evaluate_on_environment(eval_env, render=True)
mean_episode_return = scorer(trained_policy)