In [1]:
import d3rlpy
from d3rlpy.algos import COMBO
from sklearn.model_selection import train_test_split
#import gymnasium as gym
import gym
from gym.wrappers import TransformObservation
import numpy as np
import encoders
import os
import json

In [2]:
print(gym.version.VERSION)

0.23.1


## Params

In [3]:
seed = 1
d3rlpy.seed(seed)
use_gpu = True
# prepare environment
#env = gym.make("InvertedPendulum-v2")
#eval_env = gym.make("InvertedPendulum-v2")
env = gym.make("Reacher-v2")
eval_env = gym.make("Reacher-v2")
env.reset(seed=seed)
eval_env.reset(seed=seed)

array([ 9.99997205e-01,  9.95944393e-01,  2.36432274e-03,  8.99709127e-02,
       -7.52674192e-02, -3.06694204e-02,  3.27702594e-03, -9.08008636e-04,
        2.84797318e-01,  4.10616462e-02,  0.00000000e+00])

In [4]:
def observation_edit1(obs):
    new_obs = np.zeros(8)
    new_obs[0] = np.arctan2(obs[2], obs[0])
    new_obs[1] = np.arctan2(obs[3], obs[1])
    new_obs[2:] = obs[4:-1]
    return new_obs

In [5]:
env1 = TransformObservation(env, observation_edit1)
env1.observation_space = gym.spaces.Box(-np.inf, np.inf, shape=(8,), dtype= np.float64 )
print(env1.reset(seed=seed))

eval_env1 = TransformObservation(eval_env, observation_edit1)
eval_env1.observation_space = gym.spaces.Box(-np.inf, np.inf, shape=(8,), dtype= np.float64 )
print(env1.reset(seed=seed))

[ 0.00236432  0.09009274 -0.07526742 -0.03066942  0.00327703 -0.00090801
  0.28479732  0.04106165]
[ 0.00236432  0.09009274 -0.07526742 -0.03066942  0.00327703 -0.00090801
  0.28479732  0.04106165]


## Create dataset

In [6]:
actor_encoder = d3rlpy.models.encoders.DefaultEncoderFactory(dropout_rate=0.2)
# setup algorithm
sac = d3rlpy.algos.SAC(
    batch_size=256,
    actor_encoder_factory=actor_encoder,
    actor_learning_rate=3e-4,
    critic_learning_rate=3e-4,
    temp_learning_rate=3e-4,
    use_gpu=use_gpu
)

# prepare utilities
buffer = d3rlpy.online.buffers.ReplayBuffer(maxlen=1000000, env=env1)

# start training
sac.fit_online(
    env1,
    buffer,
    eval_env=eval_env1,
    n_steps=200000,
    n_steps_per_epoch=1000,
    update_interval=1,
    update_start_step=1000,
    tensorboard_dir='tensorboard_logs',
    experiment_name='exp_6_SAC_reacher',
    save_interval=10
)

[2m2023-10-24 13:17:31[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/exp_6_SAC_reacher_20231024131731[0m
[2m2023-10-24 13:17:31[0m [[32m[1mdebug    [0m] [1mBuilding model...[0m
[2m2023-10-24 13:17:31[0m [[32m[1mdebug    [0m] [1mModel has been built.[0m
[2m2023-10-24 13:17:31[0m [[32m[1minfo     [0m] [1mParameters are saved to d3rlpy_logs/exp_6_SAC_reacher_20231024131731/params.json[0m [36mparams[0m=[35m{'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': 0.2}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 256, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.99

  0%|          | 0/200000 [00:00<?, ?it/s]

[2m2023-10-24 13:17:34[0m [[32m[1minfo     [0m] [1mexp_6_SAC_reacher_20231024131731: epoch=1 step=1000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_inference': 0.0015537376403808594, 'time_environment_step': 0.00015189647674560547, 'time_step': 0.001757314443588257, 'rollout_return': -66.07874194082103, 'evaluation': -30.761665336165645}[0m [36mstep[0m=[35m1000[0m
[2m2023-10-24 13:17:48[0m [[32m[1minfo     [0m] [1mexp_6_SAC_reacher_20231024131731: epoch=2 step=2000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_inference': 0.001282780647277832, 'time_environment_step': 0.00016817498207092284, 'time_sample_batch': 0.00029226827621459963, 'time_algorithm_update': 0.011538589477539062, 'temp_loss': 2.8351960982084274, 'temp': 0.8643339533209801, 'critic_loss': 0.8371473073810339, 'actor_loss': -0.9836339109316469, 'time_step': 0.013360583305358886, 'rollout_return': -41.37339480470685, 'evaluation': -10.284890674989496}[0m [36mstep[0m=[35m200

In [7]:
# export replay buffer as MDPDataset
dataset = buffer.to_mdp_dataset()

# save MDPDataset
dataset.dump('d3rlpy_data/reacherv2_atan2.h5')

## Load the dataset

In [6]:
dataset = d3rlpy.dataset.MDPDataset.load('d3rlpy_data/reacherv2.h5')

In [7]:
train_episodes, test_episodes = train_test_split(dataset, random_state=seed)

## Dynamics learning

In [8]:
def inverted_pendulum_project(x):
    return x[:, 1:]
projection_size = 3

def reacher_project(x):
    return x[:, [1,4,5]]
projection_size = 3

In [7]:
encoder_factory = encoders.SymmetryEncoderFactory(project=inverted_pendulum_project, projection_size=projection_size)
dynamics = d3rlpy.dynamics.ProbabilisticEnsembleDynamics(learning_rate=1e-4, use_gpu=True, state_encoder_factory=encoder_factory)
#dynamics = d3rlpy.dynamics.ProbabilisticEnsembleDynamics(learning_rate=1e-4, use_gpu=True) # Baseline

Using SymmetryEncoderFactory


In [22]:
# same as algorithms
dynamics.fit(train_episodes,
             eval_episodes=test_episodes,
             n_epochs=100,
             scorers={
                'observation_error': d3rlpy.metrics.scorer.dynamics_observation_prediction_error_scorer,
                'reward_error': d3rlpy.metrics.scorer.dynamics_reward_prediction_error_scorer,
                'variance': d3rlpy.metrics.scorer.dynamics_prediction_variance_scorer,
             },
            tensorboard_dir='tensorboard_logs/dynamics',
            experiment_name='SeparateStateAndRewardEncoders')

NameError: name 'dynamics' is not defined

In [12]:
def experiment_dynamics_training(dataset, symmetry_project, projection_size, n_runs, experiment_name, seed=1, use_gpu=True):
    for i in range(n_runs):
        for exp_type in ['default', 'symmetry']:
            # use the same seeds for default and symmetric runs
            train_episodes, test_episodes = train_test_split(dataset, random_state=seed+i)
            if exp_type == 'symmetry':
                state_encoder_factory = encoders.SymmetryEncoderFactory(project=symmetry_project, projection_size=projection_size)
                train_episodes, test_episodes = train_test_split(dataset, random_state=seed+i+1) # remove this later
                dynamics = d3rlpy.dynamics.ProbabilisticEnsembleDynamics(learning_rate=1e-4, use_gpu=use_gpu, state_encoder_factory=state_encoder_factory)
            else:
                dynamics = d3rlpy.dynamics.ProbabilisticEnsembleDynamics(learning_rate=1e-4, use_gpu=use_gpu)
            dynamics.fit(train_episodes,
                 eval_episodes=test_episodes,
                 n_epochs=100,
                 scorers={
                    'observation_error': d3rlpy.metrics.scorer.dynamics_observation_prediction_error_scorer,
                    'reward_error': d3rlpy.metrics.scorer.dynamics_reward_prediction_error_scorer,
                    'variance': d3rlpy.metrics.scorer.dynamics_prediction_variance_scorer,
                 },
                tensorboard_dir='tensorboard_logs/dynamics',
                experiment_name=experiment_name + '_' + exp_type)

In [13]:
experiment_dynamics_training(dataset=dataset, symmetry_project=reacher_project, projection_size=3, n_runs=5, experiment_name="exp_5_dynamics_reacher", use_gpu=True)

[2m2023-10-22 00:05:48[0m [[32m[1mdebug    [0m] [1mRoundIterator is selected.[0m
[2m2023-10-22 00:05:48[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548[0m
[2m2023-10-22 00:05:48[0m [[32m[1mdebug    [0m] [1mBuilding models...[0m
[2m2023-10-22 00:05:49[0m [[32m[1mdebug    [0m] [1mModels have been built.[0m
[2m2023-10-22 00:05:49[0m [[32m[1minfo     [0m] [1mParameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/params.json[0m [36mparams[0m=[35m{'action_scaler': None, 'batch_size': 100, 'discrete_action': False, 'gamma': 1.0, 'generated_maxlen': 100000, 'learning_rate': 0.0001, 'n_ensembles': 5, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0.0001, 'amsgrad': False}, 'real_ratio': 1.0, 'reward_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dr

Epoch 1/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 00:08:11[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=1 step=4811[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000245681202503332, 'time_algorithm_update': 0.018561039167192865, 'loss': 158.64583251195856, 'time_step': 0.0188931956456719, 'observation_error': 1.9341882627994524, 'reward_error': 0.04554997230132077, 'variance': 1.8632635014254653}[0m [36mstep[0m=[35m4811[0m
[2m2023-10-22 00:08:11[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_4811.pt[0m


Epoch 2/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 00:10:32[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=2 step=9622[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002459282439950845, 'time_algorithm_update': 0.018409750858529694, 'loss': -25.502702346984922, 'time_step': 0.01873591287645028, 'observation_error': 1.4926990214166531, 'reward_error': 0.01139987107781643, 'variance': 1.1014125654333635}[0m [36mstep[0m=[35m9622[0m
[2m2023-10-22 00:10:32[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_9622.pt[0m


Epoch 3/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 00:12:56[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=3 step=14433[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002709355827082569, 'time_algorithm_update': 0.018925876137720177, 'loss': -49.40428891178972, 'time_step': 0.019278724385358468, 'observation_error': 1.2164142722557887, 'reward_error': 0.005565371273750783, 'variance': 0.8147383720817261}[0m [36mstep[0m=[35m14433[0m
[2m2023-10-22 00:12:56[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_14433.pt[0m


Epoch 4/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 00:15:19[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=4 step=19244[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00026170723256388863, 'time_algorithm_update': 0.01889953893773696, 'loss': -70.79778117544808, 'time_step': 0.01924041035615885, 'observation_error': 0.9160243358645376, 'reward_error': 0.003647304515320888, 'variance': 0.5120994633104732}[0m [36mstep[0m=[35m19244[0m
[2m2023-10-22 00:15:19[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_19244.pt[0m


Epoch 5/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 00:17:43[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=5 step=24055[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000266700890128168, 'time_algorithm_update': 0.018961704984025433, 'loss': -88.55281478019148, 'time_step': 0.019308509214136016, 'observation_error': 0.8513213811732471, 'reward_error': 0.001367815785238064, 'variance': 0.462642113868026}[0m [36mstep[0m=[35m24055[0m
[2m2023-10-22 00:17:43[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_24055.pt[0m


Epoch 6/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 00:20:01[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=6 step=28866[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024771928242104784, 'time_algorithm_update': 0.017803706440898837, 'loss': -95.88213217082526, 'time_step': 0.018123873354020374, 'observation_error': 0.7961667937769205, 'reward_error': 0.0010894167344271057, 'variance': 0.43205060251242927}[0m [36mstep[0m=[35m28866[0m
[2m2023-10-22 00:20:01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_28866.pt[0m


Epoch 7/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 00:22:19[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=7 step=33677[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002470878770757281, 'time_algorithm_update': 0.01770529928387785, 'loss': -99.7008161019496, 'time_step': 0.018027688381057894, 'observation_error': 0.7804561566097565, 'reward_error': 0.0008647681738803186, 'variance': 0.4444213260394151}[0m [36mstep[0m=[35m33677[0m
[2m2023-10-22 00:22:19[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_33677.pt[0m


Epoch 8/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 00:24:37[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=8 step=38488[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002450289336739546, 'time_algorithm_update': 0.01767838836286153, 'loss': -102.77468332256157, 'time_step': 0.017995587953334787, 'observation_error': 0.7373419811617201, 'reward_error': 0.0006475892375562558, 'variance': 0.42292179155891546}[0m [36mstep[0m=[35m38488[0m
[2m2023-10-22 00:24:37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_38488.pt[0m


Epoch 9/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 00:26:55[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=9 step=43299[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002470275166871716, 'time_algorithm_update': 0.01783849979433144, 'loss': -105.1470117023843, 'time_step': 0.018158803980257824, 'observation_error': 0.7137153507255417, 'reward_error': 0.0005791089186257839, 'variance': 0.4200437504742625}[0m [36mstep[0m=[35m43299[0m
[2m2023-10-22 00:26:55[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_43299.pt[0m


Epoch 10/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 00:29:14[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=10 step=48110[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025228600669516716, 'time_algorithm_update': 0.017684604590857408, 'loss': -107.49467083541055, 'time_step': 0.018008110256111813, 'observation_error': 0.6743806468274459, 'reward_error': 0.0004715057268499132, 'variance': 0.39511682178807306}[0m [36mstep[0m=[35m48110[0m
[2m2023-10-22 00:29:14[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_48110.pt[0m


Epoch 11/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 00:31:32[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=11 step=52921[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000245318742830177, 'time_algorithm_update': 0.017697789866769172, 'loss': -109.51474149676623, 'time_step': 0.018015388589072836, 'observation_error': 0.6568043773444605, 'reward_error': 0.0003960329202583707, 'variance': 0.39026188636991}[0m [36mstep[0m=[35m52921[0m
[2m2023-10-22 00:31:32[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_52921.pt[0m


Epoch 12/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 00:33:50[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=12 step=57732[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024866289667269467, 'time_algorithm_update': 0.017673613598430463, 'loss': -111.23705514873345, 'time_step': 0.017994302941121917, 'observation_error': 0.6305858407444942, 'reward_error': 0.0003745510550301389, 'variance': 0.37714617929433253}[0m [36mstep[0m=[35m57732[0m
[2m2023-10-22 00:33:50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_57732.pt[0m


Epoch 13/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 00:36:07[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=13 step=62543[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024589018424269415, 'time_algorithm_update': 0.017690933957414105, 'loss': -112.70188544619681, 'time_step': 0.018009835135983816, 'observation_error': 0.5849571927322517, 'reward_error': 0.00032662184529190546, 'variance': 0.3362105491722726}[0m [36mstep[0m=[35m62543[0m
[2m2023-10-22 00:36:07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_62543.pt[0m


Epoch 14/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 00:38:25[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=14 step=67354[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024486148067483086, 'time_algorithm_update': 0.017670358696689578, 'loss': -114.38148931464839, 'time_step': 0.01798845863818032, 'observation_error': 0.5782179653097957, 'reward_error': 0.00026612267802301673, 'variance': 0.3371935502943038}[0m [36mstep[0m=[35m67354[0m
[2m2023-10-22 00:38:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_67354.pt[0m


Epoch 15/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 00:40:43[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=15 step=72165[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025006952168812153, 'time_algorithm_update': 0.01775548835227901, 'loss': -115.88341384884524, 'time_step': 0.018079988625906433, 'observation_error': 0.5580396547052996, 'reward_error': 0.00026712873904843133, 'variance': 0.3449653436649671}[0m [36mstep[0m=[35m72165[0m
[2m2023-10-22 00:40:43[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_72165.pt[0m


Epoch 16/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 00:43:01[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=16 step=76976[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002461631440293687, 'time_algorithm_update': 0.01769076318409804, 'loss': -117.12665229913128, 'time_step': 0.018009679527100347, 'observation_error': 0.5304326516937375, 'reward_error': 0.0002136231003895371, 'variance': 0.3163739566553463}[0m [36mstep[0m=[35m76976[0m
[2m2023-10-22 00:43:01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_76976.pt[0m


Epoch 17/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 00:45:19[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=17 step=81787[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025000395781779285, 'time_algorithm_update': 0.01772412389510636, 'loss': -118.5566385237845, 'time_step': 0.0180480618458036, 'observation_error': 0.49699861381200694, 'reward_error': 0.00020940889495317164, 'variance': 0.29806017938786}[0m [36mstep[0m=[35m81787[0m
[2m2023-10-22 00:45:19[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_81787.pt[0m


Epoch 18/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 00:47:37[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=18 step=86598[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002505422951748455, 'time_algorithm_update': 0.017696242797302867, 'loss': -120.44403450584689, 'time_step': 0.01801979415408293, 'observation_error': 0.5074880529042529, 'reward_error': 0.00018320799468462813, 'variance': 0.34398894862779217}[0m [36mstep[0m=[35m86598[0m
[2m2023-10-22 00:47:37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_86598.pt[0m


Epoch 19/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 00:49:56[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=19 step=91409[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002468941093259283, 'time_algorithm_update': 0.017722261990214355, 'loss': -122.34372990127909, 'time_step': 0.018043238664213573, 'observation_error': 0.47835627490971894, 'reward_error': 0.00017225009338343424, 'variance': 0.31668500967631064}[0m [36mstep[0m=[35m91409[0m
[2m2023-10-22 00:49:56[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_91409.pt[0m


Epoch 20/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 00:52:15[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=20 step=96220[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025055790562016186, 'time_algorithm_update': 0.01776609785515661, 'loss': -124.4201246890772, 'time_step': 0.01809030539576662, 'observation_error': 0.4783866694595278, 'reward_error': 0.0001569812449478675, 'variance': 0.3218486684234432}[0m [36mstep[0m=[35m96220[0m
[2m2023-10-22 00:52:15[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_96220.pt[0m


Epoch 21/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 00:54:32[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=21 step=101031[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000257968208790321, 'time_algorithm_update': 0.017565200245075626, 'loss': -126.32550214863795, 'time_step': 0.017896873443990456, 'observation_error': 0.4852703571561365, 'reward_error': 0.00015528977457436667, 'variance': 0.33406655595253476}[0m [36mstep[0m=[35m101031[0m
[2m2023-10-22 00:54:32[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_101031.pt[0m


Epoch 22/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 00:56:50[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=22 step=105842[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002477509493244039, 'time_algorithm_update': 0.017775549657675364, 'loss': -127.73407548084946, 'time_step': 0.018097218791206026, 'observation_error': 0.5017089253693343, 'reward_error': 0.00014240534719899025, 'variance': 0.35873118154191813}[0m [36mstep[0m=[35m105842[0m
[2m2023-10-22 00:56:50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_105842.pt[0m


Epoch 23/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 00:59:08[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=23 step=110653[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002464174703956022, 'time_algorithm_update': 0.017681201365107535, 'loss': -129.22929708686596, 'time_step': 0.01800195359559298, 'observation_error': 0.5009970137701966, 'reward_error': 0.00013859392139251465, 'variance': 0.34485112843652693}[0m [36mstep[0m=[35m110653[0m
[2m2023-10-22 00:59:08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_110653.pt[0m


Epoch 24/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 01:01:25[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=24 step=115464[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002480370417079319, 'time_algorithm_update': 0.01771573548603395, 'loss': -130.74440280911926, 'time_step': 0.018037578663640644, 'observation_error': 0.5227029956840573, 'reward_error': 0.0001244976792777442, 'variance': 0.366110267356999}[0m [36mstep[0m=[35m115464[0m
[2m2023-10-22 01:01:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_115464.pt[0m


Epoch 25/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 01:03:43[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=25 step=120275[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024633679164964975, 'time_algorithm_update': 0.017599855037222185, 'loss': -132.15851211230688, 'time_step': 0.017920260616707668, 'observation_error': 0.523558350849462, 'reward_error': 0.00011445687313798809, 'variance': 0.3701581733376338}[0m [36mstep[0m=[35m120275[0m
[2m2023-10-22 01:03:43[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_120275.pt[0m


Epoch 26/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 01:05:58[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=26 step=125086[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024066925841739634, 'time_algorithm_update': 0.017082557933936617, 'loss': -133.08752792983213, 'time_step': 0.017394242775313845, 'observation_error': 0.5085206033243104, 'reward_error': 0.00010535665867343169, 'variance': 0.3612686768497896}[0m [36mstep[0m=[35m125086[0m
[2m2023-10-22 01:05:58[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_125086.pt[0m


Epoch 27/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 01:08:13[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=27 step=129897[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024482148820063947, 'time_algorithm_update': 0.017040099455397927, 'loss': -134.2590624298432, 'time_step': 0.017357526809701897, 'observation_error': 0.5206013539565228, 'reward_error': 0.00010575871468816863, 'variance': 0.35790901283974874}[0m [36mstep[0m=[35m129897[0m
[2m2023-10-22 01:08:13[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_129897.pt[0m


Epoch 28/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 01:10:27[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=28 step=134708[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002442024720976343, 'time_algorithm_update': 0.01703037256214282, 'loss': -135.55993599853008, 'time_step': 0.017346324258573187, 'observation_error': 0.5350602935582005, 'reward_error': 9.955892552625768e-05, 'variance': 0.3715421197658753}[0m [36mstep[0m=[35m134708[0m
[2m2023-10-22 01:10:27[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_134708.pt[0m


Epoch 29/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 01:12:43[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=29 step=139519[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00023987674336501798, 'time_algorithm_update': 0.017068792098129993, 'loss': -136.8686244871829, 'time_step': 0.017381114242131884, 'observation_error': 0.5320338702761895, 'reward_error': 8.833727803296491e-05, 'variance': 0.37685556055536895}[0m [36mstep[0m=[35m139519[0m
[2m2023-10-22 01:12:43[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_139519.pt[0m


Epoch 30/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 01:14:58[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=30 step=144330[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002423198523924817, 'time_algorithm_update': 0.017083097857116685, 'loss': -137.77093219459738, 'time_step': 0.017397546688897324, 'observation_error': 0.5461106197928602, 'reward_error': 8.680136333402402e-05, 'variance': 0.38455931026867163}[0m [36mstep[0m=[35m144330[0m
[2m2023-10-22 01:14:58[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_144330.pt[0m


Epoch 31/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 01:17:13[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=31 step=149141[0m [36mepoch[0m=[35m31[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002426182349043858, 'time_algorithm_update': 0.01710210726628356, 'loss': -138.8485918221467, 'time_step': 0.017418477173977497, 'observation_error': 0.5190098596985738, 'reward_error': 8.146323291348088e-05, 'variance': 0.3738513885432089}[0m [36mstep[0m=[35m149141[0m
[2m2023-10-22 01:17:13[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_149141.pt[0m


Epoch 32/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 01:19:28[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=32 step=153952[0m [36mepoch[0m=[35m32[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002419553608835871, 'time_algorithm_update': 0.017018457976479807, 'loss': -139.90298185029363, 'time_step': 0.0173316578735212, 'observation_error': 0.5379967637095034, 'reward_error': 8.56339146701257e-05, 'variance': 0.3884062068317655}[0m [36mstep[0m=[35m153952[0m
[2m2023-10-22 01:19:28[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_153952.pt[0m


Epoch 33/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 01:21:42[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=33 step=158763[0m [36mepoch[0m=[35m33[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002412680057199748, 'time_algorithm_update': 0.017117517451887145, 'loss': -140.56378307513006, 'time_step': 0.017431626025516858, 'observation_error': 0.5302687577973901, 'reward_error': 6.863718181609235e-05, 'variance': 0.3813403636385621}[0m [36mstep[0m=[35m158763[0m
[2m2023-10-22 01:21:42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_158763.pt[0m


Epoch 34/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 01:23:58[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=34 step=163574[0m [36mepoch[0m=[35m34[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024002273819645286, 'time_algorithm_update': 0.017108852564483242, 'loss': -141.42335811084533, 'time_step': 0.01742054885352037, 'observation_error': 0.5232811930456763, 'reward_error': 6.730257405589361e-05, 'variance': 0.3811893578093561}[0m [36mstep[0m=[35m163574[0m
[2m2023-10-22 01:23:58[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_163574.pt[0m


Epoch 35/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 01:26:12[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=35 step=168385[0m [36mepoch[0m=[35m35[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024427824470363025, 'time_algorithm_update': 0.01705243240099542, 'loss': -141.768947954164, 'time_step': 0.017369223196029265, 'observation_error': 0.5183266038998932, 'reward_error': 7.070903299962292e-05, 'variance': 0.37481122996925426}[0m [36mstep[0m=[35m168385[0m
[2m2023-10-22 01:26:12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_168385.pt[0m


Epoch 36/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 01:28:27[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=36 step=173196[0m [36mepoch[0m=[35m36[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024300463059369256, 'time_algorithm_update': 0.01704966712211081, 'loss': -142.49755388464854, 'time_step': 0.017364883740016164, 'observation_error': 0.5295219092664988, 'reward_error': 6.171565756356866e-05, 'variance': 0.3989368064522984}[0m [36mstep[0m=[35m173196[0m
[2m2023-10-22 01:28:27[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_173196.pt[0m


Epoch 37/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 01:30:42[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=37 step=178007[0m [36mepoch[0m=[35m37[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002442796818557387, 'time_algorithm_update': 0.017063975854515658, 'loss': -143.27941126834327, 'time_step': 0.01738136554552299, 'observation_error': 0.5181670340093827, 'reward_error': 5.184445365465741e-05, 'variance': 0.38103681647928633}[0m [36mstep[0m=[35m178007[0m
[2m2023-10-22 01:30:42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_178007.pt[0m


Epoch 38/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 01:32:56[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=38 step=182818[0m [36mepoch[0m=[35m38[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000243158604097178, 'time_algorithm_update': 0.017052861713020104, 'loss': -143.3165306284779, 'time_step': 0.01736886152926762, 'observation_error': 0.5149403249000267, 'reward_error': 6.259348323527897e-05, 'variance': 0.3820519818358291}[0m [36mstep[0m=[35m182818[0m
[2m2023-10-22 01:32:56[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_182818.pt[0m


Epoch 39/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 01:35:11[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=39 step=187629[0m [36mepoch[0m=[35m39[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024296047533408342, 'time_algorithm_update': 0.017030555625587262, 'loss': -144.4123500513946, 'time_step': 0.017346238971029094, 'observation_error': 0.4989106882529386, 'reward_error': 4.874169795321045e-05, 'variance': 0.37604445584439583}[0m [36mstep[0m=[35m187629[0m
[2m2023-10-22 01:35:11[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_187629.pt[0m


Epoch 40/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 01:37:26[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=40 step=192440[0m [36mepoch[0m=[35m40[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024397490649480033, 'time_algorithm_update': 0.017081301268310166, 'loss': -144.98573535228564, 'time_step': 0.017397330917853174, 'observation_error': 0.49867020710108856, 'reward_error': 5.274614195372204e-05, 'variance': 0.3731591394914245}[0m [36mstep[0m=[35m192440[0m
[2m2023-10-22 01:37:26[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_192440.pt[0m


Epoch 41/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 01:39:40[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=41 step=197251[0m [36mepoch[0m=[35m41[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002427385592257449, 'time_algorithm_update': 0.01700084201017454, 'loss': -145.17846828477346, 'time_step': 0.017315789385065915, 'observation_error': 0.5057090800084332, 'reward_error': 4.664372263007678e-05, 'variance': 0.3729844969599668}[0m [36mstep[0m=[35m197251[0m
[2m2023-10-22 01:39:40[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_197251.pt[0m


Epoch 42/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 01:41:55[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=42 step=202062[0m [36mepoch[0m=[35m42[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024297098141156618, 'time_algorithm_update': 0.017072305736807375, 'loss': -145.54635529804565, 'time_step': 0.01738850041105801, 'observation_error': 0.5031104553472976, 'reward_error': 4.842643899405946e-05, 'variance': 0.3721262201769105}[0m [36mstep[0m=[35m202062[0m
[2m2023-10-22 01:41:55[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_202062.pt[0m


Epoch 43/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 01:44:09[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=43 step=206873[0m [36mepoch[0m=[35m43[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024379258640489914, 'time_algorithm_update': 0.01703245673004195, 'loss': -146.54052048506546, 'time_step': 0.017349990384044976, 'observation_error': 0.5059800234044786, 'reward_error': 4.67695596546109e-05, 'variance': 0.38195209156341164}[0m [36mstep[0m=[35m206873[0m
[2m2023-10-22 01:44:09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_206873.pt[0m


Epoch 44/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 01:46:24[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=44 step=211684[0m [36mepoch[0m=[35m44[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024355738902879937, 'time_algorithm_update': 0.017038517299597388, 'loss': -146.7151680825233, 'time_step': 0.017354379843040058, 'observation_error': 0.5020625479185581, 'reward_error': 4.2113104517276766e-05, 'variance': 0.3740119892329965}[0m [36mstep[0m=[35m211684[0m
[2m2023-10-22 01:46:24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_211684.pt[0m


Epoch 45/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 01:48:39[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=45 step=216495[0m [36mepoch[0m=[35m45[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024323720145042165, 'time_algorithm_update': 0.0170350194695932, 'loss': -147.48906186582937, 'time_step': 0.01735218823563158, 'observation_error': 0.49156238206836694, 'reward_error': 4.173624017628478e-05, 'variance': 0.37590064943482726}[0m [36mstep[0m=[35m216495[0m
[2m2023-10-22 01:48:39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_216495.pt[0m


Epoch 46/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 01:50:54[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=46 step=221306[0m [36mepoch[0m=[35m46[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002441820546262999, 'time_algorithm_update': 0.017086123855216566, 'loss': -147.8509297708889, 'time_step': 0.017403376967216624, 'observation_error': 0.48654434386721235, 'reward_error': 4.0174872642704454e-05, 'variance': 0.38595579358428245}[0m [36mstep[0m=[35m221306[0m
[2m2023-10-22 01:50:54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_221306.pt[0m


Epoch 47/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 01:53:09[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=47 step=226117[0m [36mepoch[0m=[35m47[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024449391663384227, 'time_algorithm_update': 0.017081404941489856, 'loss': -148.35198040599263, 'time_step': 0.01739916066027213, 'observation_error': 0.4899134865005394, 'reward_error': 3.410842458934471e-05, 'variance': 0.37887329365510986}[0m [36mstep[0m=[35m226117[0m
[2m2023-10-22 01:53:09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_226117.pt[0m


Epoch 48/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 01:55:24[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=48 step=230928[0m [36mepoch[0m=[35m48[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024243844221991676, 'time_algorithm_update': 0.01709343484622033, 'loss': -149.03587745059133, 'time_step': 0.017408041665618965, 'observation_error': 0.4920249773884958, 'reward_error': 3.310054886594641e-05, 'variance': 0.39033651098313765}[0m [36mstep[0m=[35m230928[0m
[2m2023-10-22 01:55:24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_230928.pt[0m


Epoch 49/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 01:57:39[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=49 step=235739[0m [36mepoch[0m=[35m49[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024351318421222097, 'time_algorithm_update': 0.017156475622573185, 'loss': -149.2377369543094, 'time_step': 0.017472834330392067, 'observation_error': 0.48665733516908566, 'reward_error': 3.035030209356729e-05, 'variance': 0.3988035594223897}[0m [36mstep[0m=[35m235739[0m
[2m2023-10-22 01:57:39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_235739.pt[0m


Epoch 50/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 01:59:54[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=50 step=240550[0m [36mepoch[0m=[35m50[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024293113760828254, 'time_algorithm_update': 0.017023662052935557, 'loss': -149.68219121640794, 'time_step': 0.017339260755073898, 'observation_error': 0.4837068994543159, 'reward_error': 3.472757021022284e-05, 'variance': 0.39426622909877196}[0m [36mstep[0m=[35m240550[0m
[2m2023-10-22 01:59:54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_240550.pt[0m


Epoch 51/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 02:02:09[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=51 step=245361[0m [36mepoch[0m=[35m51[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002438864473046743, 'time_algorithm_update': 0.01712410694297548, 'loss': -150.04426170837556, 'time_step': 0.017441085112910093, 'observation_error': 0.4869844263868977, 'reward_error': 2.799116073409747e-05, 'variance': 0.411828731953083}[0m [36mstep[0m=[35m245361[0m
[2m2023-10-22 02:02:09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_245361.pt[0m


Epoch 52/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 02:04:24[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=52 step=250172[0m [36mepoch[0m=[35m52[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024328299209001628, 'time_algorithm_update': 0.017045000292758787, 'loss': -150.58787362973473, 'time_step': 0.017361717396579348, 'observation_error': 0.4846072079718778, 'reward_error': 2.9629807816557713e-05, 'variance': 0.40949060252540476}[0m [36mstep[0m=[35m250172[0m
[2m2023-10-22 02:04:24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_250172.pt[0m


Epoch 53/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 02:06:39[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=53 step=254983[0m [36mepoch[0m=[35m53[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024823482357224166, 'time_algorithm_update': 0.017098760931934395, 'loss': -150.83866716764297, 'time_step': 0.017421712798057338, 'observation_error': 0.48075169911126076, 'reward_error': 2.469415706075149e-05, 'variance': 0.4294018338971028}[0m [36mstep[0m=[35m254983[0m
[2m2023-10-22 02:06:39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_254983.pt[0m


Epoch 54/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 02:08:54[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=54 step=259794[0m [36mepoch[0m=[35m54[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024159245519770874, 'time_algorithm_update': 0.017074930719912014, 'loss': -151.19070048779218, 'time_step': 0.017388770000970777, 'observation_error': 0.4850258770417026, 'reward_error': 2.6632359147593764e-05, 'variance': 0.4306371512631173}[0m [36mstep[0m=[35m259794[0m
[2m2023-10-22 02:08:54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_259794.pt[0m


Epoch 55/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 02:11:08[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=55 step=264605[0m [36mepoch[0m=[35m55[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002438295559039658, 'time_algorithm_update': 0.017053547680588577, 'loss': -151.57216938140115, 'time_step': 0.01737001407570166, 'observation_error': 0.47857222720828907, 'reward_error': 2.6897698787249687e-05, 'variance': 0.4365831034178772}[0m [36mstep[0m=[35m264605[0m
[2m2023-10-22 02:11:08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_264605.pt[0m


Epoch 56/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 02:13:23[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=56 step=269416[0m [36mepoch[0m=[35m56[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000244576379430688, 'time_algorithm_update': 0.017055085284673754, 'loss': -151.7012525497814, 'time_step': 0.017372293844958447, 'observation_error': 0.4837785307453688, 'reward_error': 2.066532068206256e-05, 'variance': 0.4555715535806948}[0m [36mstep[0m=[35m269416[0m
[2m2023-10-22 02:13:23[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_269416.pt[0m


Epoch 57/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 02:15:38[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=57 step=274227[0m [36mepoch[0m=[35m57[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002416667410946269, 'time_algorithm_update': 0.017075070966135017, 'loss': -152.50457359366297, 'time_step': 0.017390500183438493, 'observation_error': 0.47780266233740015, 'reward_error': 2.168960522266895e-05, 'variance': 0.44870741406857667}[0m [36mstep[0m=[35m274227[0m
[2m2023-10-22 02:15:38[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_274227.pt[0m


Epoch 58/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 02:17:53[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=58 step=279038[0m [36mepoch[0m=[35m58[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024809740209648845, 'time_algorithm_update': 0.01703926610540288, 'loss': -152.58280214869885, 'time_step': 0.01736072308554815, 'observation_error': 0.4777437909438294, 'reward_error': 2.174117825815817e-05, 'variance': 0.4689622828501423}[0m [36mstep[0m=[35m279038[0m
[2m2023-10-22 02:17:53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_279038.pt[0m


Epoch 59/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 02:20:08[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=59 step=283849[0m [36mepoch[0m=[35m59[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002473529573042907, 'time_algorithm_update': 0.017079481982411725, 'loss': -152.85290318094403, 'time_step': 0.01740162919202482, 'observation_error': 0.4640339000681519, 'reward_error': 1.9401542699238494e-05, 'variance': 0.4675238106009912}[0m [36mstep[0m=[35m283849[0m
[2m2023-10-22 02:20:08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_283849.pt[0m


Epoch 60/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 02:22:23[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=60 step=288660[0m [36mepoch[0m=[35m60[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024103414638204502, 'time_algorithm_update': 0.017070780918420264, 'loss': -153.14901866131825, 'time_step': 0.01738452708193379, 'observation_error': 0.45999011264361483, 'reward_error': 2.1298613419009386e-05, 'variance': 0.47136030438860127}[0m [36mstep[0m=[35m288660[0m
[2m2023-10-22 02:22:23[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_288660.pt[0m


Epoch 61/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 02:24:37[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=61 step=293471[0m [36mepoch[0m=[35m61[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002442152082387337, 'time_algorithm_update': 0.01704674856352028, 'loss': -153.6849754010781, 'time_step': 0.017364831804312385, 'observation_error': 0.4760716493172437, 'reward_error': 2.047879197548206e-05, 'variance': 0.48934054854261855}[0m [36mstep[0m=[35m293471[0m
[2m2023-10-22 02:24:37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_293471.pt[0m


Epoch 62/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 02:26:52[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=62 step=298282[0m [36mepoch[0m=[35m62[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024332937741324204, 'time_algorithm_update': 0.017083076547619907, 'loss': -154.11252268900424, 'time_step': 0.017399144108244398, 'observation_error': 0.44425851369930847, 'reward_error': 1.6923711703066235e-05, 'variance': 0.48143919332128676}[0m [36mstep[0m=[35m298282[0m
[2m2023-10-22 02:26:52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_298282.pt[0m


Epoch 63/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 02:29:06[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=63 step=303093[0m [36mepoch[0m=[35m63[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024282815822616383, 'time_algorithm_update': 0.01706174346172148, 'loss': -154.18815349863513, 'time_step': 0.017378011331615713, 'observation_error': 0.44505354889484366, 'reward_error': 1.6120381677498466e-05, 'variance': 0.4919742371748283}[0m [36mstep[0m=[35m303093[0m
[2m2023-10-22 02:29:06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_303093.pt[0m


Epoch 64/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 02:31:21[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=64 step=307904[0m [36mepoch[0m=[35m64[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000245865405758065, 'time_algorithm_update': 0.01708012572744239, 'loss': -153.96628656666832, 'time_step': 0.017398447931940255, 'observation_error': 0.4316943134500156, 'reward_error': 1.555796291589905e-05, 'variance': 0.4908817552453947}[0m [36mstep[0m=[35m307904[0m
[2m2023-10-22 02:31:22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_307904.pt[0m


Epoch 65/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 02:33:37[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=65 step=312715[0m [36mepoch[0m=[35m65[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024290388127519048, 'time_algorithm_update': 0.01706701632325056, 'loss': -155.6661950672176, 'time_step': 0.0173832670959904, 'observation_error': 0.42810189017652606, 'reward_error': 1.5711537410139023e-05, 'variance': 0.4880599751891563}[0m [36mstep[0m=[35m312715[0m
[2m2023-10-22 02:33:37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_312715.pt[0m


Epoch 66/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 02:35:52[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=66 step=317526[0m [36mepoch[0m=[35m66[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024209679647385022, 'time_algorithm_update': 0.017069221459711646, 'loss': -155.8988092422882, 'time_step': 0.0173848146610264, 'observation_error': 0.4413227946187963, 'reward_error': 1.4451514028095638e-05, 'variance': 0.5076045598399096}[0m [36mstep[0m=[35m317526[0m
[2m2023-10-22 02:35:52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_317526.pt[0m


Epoch 67/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 02:38:07[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=67 step=322337[0m [36mepoch[0m=[35m67[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002459853831806393, 'time_algorithm_update': 0.017091644154693154, 'loss': -156.13982278446824, 'time_step': 0.017410756891964627, 'observation_error': 0.4250480607602687, 'reward_error': 1.5219436669013615e-05, 'variance': 0.4774736609339079}[0m [36mstep[0m=[35m322337[0m
[2m2023-10-22 02:38:07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_322337.pt[0m


Epoch 68/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 02:40:23[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=68 step=327148[0m [36mepoch[0m=[35m68[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002447625154072221, 'time_algorithm_update': 0.01719820472163833, 'loss': -156.6234533912153, 'time_step': 0.01751667559704397, 'observation_error': 0.4211159797162647, 'reward_error': 1.321422034867825e-05, 'variance': 0.4861853002902097}[0m [36mstep[0m=[35m327148[0m
[2m2023-10-22 02:40:23[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_327148.pt[0m


Epoch 69/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 02:42:37[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=69 step=331959[0m [36mepoch[0m=[35m69[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002509500994748718, 'time_algorithm_update': 0.017114633830959944, 'loss': -157.1065652522593, 'time_step': 0.01744054791536333, 'observation_error': 0.42362619501088017, 'reward_error': 1.2286070642148653e-05, 'variance': 0.4655883448922428}[0m [36mstep[0m=[35m331959[0m
[2m2023-10-22 02:42:37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_331959.pt[0m


Epoch 70/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 02:44:51[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=70 step=336770[0m [36mepoch[0m=[35m70[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002434035146392524, 'time_algorithm_update': 0.016958041241649183, 'loss': -156.94702529272897, 'time_step': 0.017273193435494504, 'observation_error': 0.41824234410277433, 'reward_error': 1.3491313587392918e-05, 'variance': 0.46605838031959756}[0m [36mstep[0m=[35m336770[0m
[2m2023-10-22 02:44:51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_336770.pt[0m


Epoch 71/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 02:47:05[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=71 step=341581[0m [36mepoch[0m=[35m71[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024477341794045893, 'time_algorithm_update': 0.01702622043147352, 'loss': -158.02099572583907, 'time_step': 0.017343929715375597, 'observation_error': 0.41770657374078035, 'reward_error': 1.3462311007443763e-05, 'variance': 0.456538361824431}[0m [36mstep[0m=[35m341581[0m
[2m2023-10-22 02:47:05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_341581.pt[0m


Epoch 72/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 02:49:19[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=72 step=346392[0m [36mepoch[0m=[35m72[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024874521079863265, 'time_algorithm_update': 0.016980431427258857, 'loss': -158.30024800035963, 'time_step': 0.017302950462812296, 'observation_error': 0.41688041229837963, 'reward_error': 1.2095581078973445e-05, 'variance': 0.447704696070215}[0m [36mstep[0m=[35m346392[0m
[2m2023-10-22 02:49:19[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_346392.pt[0m


Epoch 73/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 02:51:34[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=73 step=351203[0m [36mepoch[0m=[35m73[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024518315496228633, 'time_algorithm_update': 0.0170477955536098, 'loss': -158.44698994473393, 'time_step': 0.017366701786990374, 'observation_error': 0.4153180754179307, 'reward_error': 1.238400492219246e-05, 'variance': 0.43775859415755397}[0m [36mstep[0m=[35m351203[0m
[2m2023-10-22 02:51:34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_351203.pt[0m


Epoch 74/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 02:53:48[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=74 step=356014[0m [36mepoch[0m=[35m74[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002431200487750951, 'time_algorithm_update': 0.016969440236604034, 'loss': -158.96930610372877, 'time_step': 0.017285261152192526, 'observation_error': 0.413875073205725, 'reward_error': 1.0796195881517385e-05, 'variance': 0.43779753198089744}[0m [36mstep[0m=[35m356014[0m
[2m2023-10-22 02:53:48[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_356014.pt[0m


Epoch 75/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 02:56:02[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=75 step=360825[0m [36mepoch[0m=[35m75[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024483348098719996, 'time_algorithm_update': 0.01698155423951134, 'loss': -158.84034449106144, 'time_step': 0.017299677522334606, 'observation_error': 0.41620281929249336, 'reward_error': 1.0688750581209427e-05, 'variance': 0.4351700967590625}[0m [36mstep[0m=[35m360825[0m
[2m2023-10-22 02:56:02[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_360825.pt[0m


Epoch 76/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 02:58:17[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=76 step=365636[0m [36mepoch[0m=[35m76[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024498051651498926, 'time_algorithm_update': 0.016961918232025166, 'loss': -158.66602331941328, 'time_step': 0.01727975205259099, 'observation_error': 0.41060754469347305, 'reward_error': 1.0207132142015562e-05, 'variance': 0.44245043697205155}[0m [36mstep[0m=[35m365636[0m
[2m2023-10-22 02:58:17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_365636.pt[0m


Epoch 77/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 03:00:31[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=77 step=370447[0m [36mepoch[0m=[35m77[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024236668372843077, 'time_algorithm_update': 0.01699178002277597, 'loss': -159.71333313860484, 'time_step': 0.01730793192936214, 'observation_error': 0.4186782476031779, 'reward_error': 1.0657300904945207e-05, 'variance': 0.4449453395560112}[0m [36mstep[0m=[35m370447[0m
[2m2023-10-22 03:00:31[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_370447.pt[0m


Epoch 78/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 03:02:45[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=78 step=375258[0m [36mepoch[0m=[35m78[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024442741118109766, 'time_algorithm_update': 0.01707462837284257, 'loss': -159.80157668534545, 'time_step': 0.01739243652289832, 'observation_error': 0.40926472919461465, 'reward_error': 1.0486645931074062e-05, 'variance': 0.43374914002749737}[0m [36mstep[0m=[35m375258[0m
[2m2023-10-22 03:02:45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_375258.pt[0m


Epoch 79/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 03:05:00[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=79 step=380069[0m [36mepoch[0m=[35m79[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024433984401641827, 'time_algorithm_update': 0.017037361829302163, 'loss': -159.52122276653446, 'time_step': 0.017355445070094264, 'observation_error': 0.4152655399506308, 'reward_error': 9.503138495633434e-06, 'variance': 0.4328619275084083}[0m [36mstep[0m=[35m380069[0m
[2m2023-10-22 03:05:00[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_380069.pt[0m


Epoch 80/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 03:07:15[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=80 step=384880[0m [36mepoch[0m=[35m80[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024533479928821666, 'time_algorithm_update': 0.016991736263972115, 'loss': -160.64718937601276, 'time_step': 0.017309755328489027, 'observation_error': 0.4071721156354962, 'reward_error': 9.709536490985686e-06, 'variance': 0.4295773281393282}[0m [36mstep[0m=[35m384880[0m
[2m2023-10-22 03:07:15[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_384880.pt[0m


Epoch 81/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 03:09:29[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=81 step=389691[0m [36mepoch[0m=[35m81[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024513810767723057, 'time_algorithm_update': 0.016975410364690576, 'loss': -160.59497552918089, 'time_step': 0.01729335524242451, 'observation_error': 0.4083795517679427, 'reward_error': 9.55141126616351e-06, 'variance': 0.4257222061655551}[0m [36mstep[0m=[35m389691[0m
[2m2023-10-22 03:09:29[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_389691.pt[0m


Epoch 82/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 03:11:43[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=82 step=394502[0m [36mepoch[0m=[35m82[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002441008307536856, 'time_algorithm_update': 0.017060375689369953, 'loss': -160.63796061751898, 'time_step': 0.017378560819290848, 'observation_error': 0.4053698225026513, 'reward_error': 8.899494350187807e-06, 'variance': 0.4263516701089733}[0m [36mstep[0m=[35m394502[0m
[2m2023-10-22 03:11:43[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_394502.pt[0m


Epoch 83/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 03:13:58[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=83 step=399313[0m [36mepoch[0m=[35m83[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024669612923374146, 'time_algorithm_update': 0.017078286668313215, 'loss': -160.8405978001222, 'time_step': 0.017399232567434524, 'observation_error': 0.4057778895776972, 'reward_error': 8.892290884403562e-06, 'variance': 0.41830163291362027}[0m [36mstep[0m=[35m399313[0m
[2m2023-10-22 03:13:58[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_399313.pt[0m


Epoch 84/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 03:16:13[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=84 step=404124[0m [36mepoch[0m=[35m84[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024217940794160376, 'time_algorithm_update': 0.017027212859339884, 'loss': -160.8371695782335, 'time_step': 0.017342254888042544, 'observation_error': 0.39733186586713654, 'reward_error': 8.302014950476557e-06, 'variance': 0.4089523231685932}[0m [36mstep[0m=[35m404124[0m
[2m2023-10-22 03:16:13[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_404124.pt[0m


Epoch 85/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 03:18:28[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=85 step=408935[0m [36mepoch[0m=[35m85[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024213852344196567, 'time_algorithm_update': 0.01701422645554575, 'loss': -161.1037644017786, 'time_step': 0.017329809596795743, 'observation_error': 0.3899044134034857, 'reward_error': 8.061343043700682e-06, 'variance': 0.4101971321220467}[0m [36mstep[0m=[35m408935[0m
[2m2023-10-22 03:18:28[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_408935.pt[0m


Epoch 86/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 03:20:42[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=86 step=413746[0m [36mepoch[0m=[35m86[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002473203488185187, 'time_algorithm_update': 0.01694107278670424, 'loss': -161.03182661278137, 'time_step': 0.017261487930684792, 'observation_error': 0.3874547908983238, 'reward_error': 8.655300076944093e-06, 'variance': 0.40586539238343267}[0m [36mstep[0m=[35m413746[0m
[2m2023-10-22 03:20:42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_413746.pt[0m


Epoch 87/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 03:22:57[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=87 step=418557[0m [36mepoch[0m=[35m87[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024589657709172847, 'time_algorithm_update': 0.016985641599221824, 'loss': -161.06563648595753, 'time_step': 0.01730509697337875, 'observation_error': 0.39259825843872453, 'reward_error': 7.334939652071019e-06, 'variance': 0.4137329658427163}[0m [36mstep[0m=[35m418557[0m
[2m2023-10-22 03:22:57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_418557.pt[0m


Epoch 88/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 03:25:11[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=88 step=423368[0m [36mepoch[0m=[35m88[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024734051850500686, 'time_algorithm_update': 0.017036337536304562, 'loss': -162.23351066698777, 'time_step': 0.01735785076360994, 'observation_error': 0.38263242186659707, 'reward_error': 7.854048079334225e-06, 'variance': 0.4039668560396806}[0m [36mstep[0m=[35m423368[0m
[2m2023-10-22 03:25:11[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_423368.pt[0m


Epoch 89/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 03:27:25[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=89 step=428179[0m [36mepoch[0m=[35m89[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002486390597704814, 'time_algorithm_update': 0.017006766397178456, 'loss': -161.7269225813351, 'time_step': 0.0173307145070544, 'observation_error': 0.3790529974339781, 'reward_error': 7.11505527458218e-06, 'variance': 0.40035598017143315}[0m [36mstep[0m=[35m428179[0m
[2m2023-10-22 03:27:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_428179.pt[0m


Epoch 90/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 03:29:40[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=90 step=432990[0m [36mepoch[0m=[35m90[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000244613596714601, 'time_algorithm_update': 0.017036453995182322, 'loss': -161.73531256711203, 'time_step': 0.017354964962176093, 'observation_error': 0.3762347685002244, 'reward_error': 7.189714054897791e-06, 'variance': 0.39925201196020904}[0m [36mstep[0m=[35m432990[0m
[2m2023-10-22 03:29:40[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_432990.pt[0m


Epoch 91/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 03:31:54[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=91 step=437801[0m [36mepoch[0m=[35m91[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002429593355237905, 'time_algorithm_update': 0.01709790404237895, 'loss': -161.5597259273303, 'time_step': 0.017413691556570163, 'observation_error': 0.37770424818423964, 'reward_error': 7.63692252480906e-06, 'variance': 0.4105830675935722}[0m [36mstep[0m=[35m437801[0m
[2m2023-10-22 03:31:54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_437801.pt[0m


Epoch 92/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 03:34:08[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=92 step=442612[0m [36mepoch[0m=[35m92[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002470488757409219, 'time_algorithm_update': 0.016945262035543522, 'loss': -162.74188004935564, 'time_step': 0.017265588076093345, 'observation_error': 0.36360991819053834, 'reward_error': 6.98719924318885e-06, 'variance': 0.3908862918612961}[0m [36mstep[0m=[35m442612[0m
[2m2023-10-22 03:34:08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_442612.pt[0m


Epoch 93/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 03:36:23[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=93 step=447423[0m [36mepoch[0m=[35m93[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024669915220886624, 'time_algorithm_update': 0.017015639473410213, 'loss': -163.23307211635466, 'time_step': 0.01733648055954154, 'observation_error': 0.3690257785923379, 'reward_error': 6.26903657669059e-06, 'variance': 0.3948991572457993}[0m [36mstep[0m=[35m447423[0m
[2m2023-10-22 03:36:23[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_447423.pt[0m


Epoch 94/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 03:38:37[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=94 step=452234[0m [36mepoch[0m=[35m94[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002426225959176805, 'time_algorithm_update': 0.0169441455665831, 'loss': -163.19709411665497, 'time_step': 0.017260011380785742, 'observation_error': 0.36032939868554753, 'reward_error': 6.509043436489201e-06, 'variance': 0.3901282668048261}[0m [36mstep[0m=[35m452234[0m
[2m2023-10-22 03:38:37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_452234.pt[0m


Epoch 95/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 03:40:52[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=95 step=457045[0m [36mepoch[0m=[35m95[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002469454999030491, 'time_algorithm_update': 0.01709474969172869, 'loss': -163.35771677226936, 'time_step': 0.017416421600449632, 'observation_error': 0.35643320007776286, 'reward_error': 6.393834995567898e-06, 'variance': 0.38964653544770117}[0m [36mstep[0m=[35m457045[0m
[2m2023-10-22 03:40:52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_457045.pt[0m


Epoch 96/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 03:43:07[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=96 step=461856[0m [36mepoch[0m=[35m96[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002443649693998322, 'time_algorithm_update': 0.017102232595858816, 'loss': -163.70854146811106, 'time_step': 0.017420627302202705, 'observation_error': 0.3533546518481034, 'reward_error': 6.227139450159783e-06, 'variance': 0.38264177518607007}[0m [36mstep[0m=[35m461856[0m
[2m2023-10-22 03:43:07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_461856.pt[0m


Epoch 97/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 03:45:22[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=97 step=466667[0m [36mepoch[0m=[35m97[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002446203860193894, 'time_algorithm_update': 0.017039420277134242, 'loss': -163.67455094630245, 'time_step': 0.01735805518610813, 'observation_error': 0.3522604296078048, 'reward_error': 6.435410702015515e-06, 'variance': 0.38164313881685685}[0m [36mstep[0m=[35m466667[0m
[2m2023-10-22 03:45:22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_466667.pt[0m


Epoch 98/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 03:47:36[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=98 step=471478[0m [36mepoch[0m=[35m98[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024815359969962734, 'time_algorithm_update': 0.01710598569381165, 'loss': -164.22542073438728, 'time_step': 0.0174283193367431, 'observation_error': 0.33688656106180975, 'reward_error': 6.004890536020797e-06, 'variance': 0.37874813434688925}[0m [36mstep[0m=[35m471478[0m
[2m2023-10-22 03:47:36[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_471478.pt[0m


Epoch 99/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 03:49:50[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=99 step=476289[0m [36mepoch[0m=[35m99[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002461765739680377, 'time_algorithm_update': 0.017029225318304492, 'loss': -164.13289446484842, 'time_step': 0.017349673665454448, 'observation_error': 0.3276467464665215, 'reward_error': 6.001483473113089e-06, 'variance': 0.37538832580656745}[0m [36mstep[0m=[35m476289[0m
[2m2023-10-22 03:49:50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_476289.pt[0m


Epoch 100/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 03:52:04[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022000548: epoch=100 step=481100[0m [36mepoch[0m=[35m100[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002444425260567214, 'time_algorithm_update': 0.01694683145520296, 'loss': -163.1308151658224, 'time_step': 0.017264595995125764, 'observation_error': 0.33325230646852677, 'reward_error': 5.887090065088848e-06, 'variance': 0.38128434382669163}[0m [36mstep[0m=[35m481100[0m
[2m2023-10-22 03:52:04[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548/model_481100.pt[0m
Using SymmetryEncoderFactory
[2m2023-10-22 03:52:04[0m [[32m[1mdebug    [0m] [1mRoundIterator is selected.[0m
[2m2023-10-22 03:52:04[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204[0m
[2m2023-10-22 03:52:04[0m [[32m[1mdebug    [0m] [1mBuilding models...[0m
[2m2023-10-

  elif isinstance(obj, collections.Callable):


Epoch 1/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 03:54:24[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=1 step=4812[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002434570791319025, 'time_algorithm_update': 0.017319512486160545, 'loss': 143.67144511975133, 'time_step': 0.017649599590206386, 'observation_error': 2.212758618995561, 'reward_error': 0.03588702326201808, 'variance': 1.8064413930988767}[0m [36mstep[0m=[35m4812[0m
[2m2023-10-22 03:54:24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_4812.pt[0m


Epoch 2/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 03:56:45[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=2 step=9624[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002450858763823981, 'time_algorithm_update': 0.0175834159898639, 'loss': -33.39877769338619, 'time_step': 0.017907656735414678, 'observation_error': 1.6329147209289885, 'reward_error': 0.028284202584684077, 'variance': 1.4145964574348628}[0m [36mstep[0m=[35m9624[0m
[2m2023-10-22 03:56:45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_9624.pt[0m


Epoch 3/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 03:59:05[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=3 step=14436[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024841194438220853, 'time_algorithm_update': 0.017608971815751377, 'loss': -61.8944583749236, 'time_step': 0.017936928066133166, 'observation_error': 1.0337712378768573, 'reward_error': 0.01406851573209735, 'variance': 0.7197120413860436}[0m [36mstep[0m=[35m14436[0m
[2m2023-10-22 03:59:05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_14436.pt[0m


Epoch 4/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 04:01:26[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=4 step=19248[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025346917305404905, 'time_algorithm_update': 0.0175256843083319, 'loss': -80.27327877684424, 'time_step': 0.017855208165825948, 'observation_error': 0.8717294653886177, 'reward_error': 0.006816975930335551, 'variance': 0.522279524542036}[0m [36mstep[0m=[35m19248[0m
[2m2023-10-22 04:01:26[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_19248.pt[0m


Epoch 5/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 04:03:46[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=5 step=24060[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024698500026788497, 'time_algorithm_update': 0.01759776113831987, 'loss': -87.50909470600182, 'time_step': 0.017919699796912873, 'observation_error': 0.8016397362493975, 'reward_error': 0.003333574516059085, 'variance': 0.49664683011350885}[0m [36mstep[0m=[35m24060[0m
[2m2023-10-22 04:03:46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_24060.pt[0m


Epoch 6/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 04:06:07[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=6 step=28872[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024498390933422873, 'time_algorithm_update': 0.017520116600312496, 'loss': -91.8488061457798, 'time_step': 0.017837944421189484, 'observation_error': 0.702248164639803, 'reward_error': 0.0018765833168874284, 'variance': 0.43564272144270927}[0m [36mstep[0m=[35m28872[0m
[2m2023-10-22 04:06:07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_28872.pt[0m


Epoch 7/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 04:08:26[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=7 step=33684[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024361810581147026, 'time_algorithm_update': 0.01756472906666008, 'loss': -94.78122004368656, 'time_step': 0.01788361599716859, 'observation_error': 0.6111001891552109, 'reward_error': 0.001587889007254458, 'variance': 0.38363105208578085}[0m [36mstep[0m=[35m33684[0m
[2m2023-10-22 04:08:26[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_33684.pt[0m


Epoch 8/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 04:10:46[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=8 step=38496[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002452594383696368, 'time_algorithm_update': 0.01748708869057304, 'loss': -97.5810258822548, 'time_step': 0.017806611503052492, 'observation_error': 0.5724477253107606, 'reward_error': 0.0009033816885008312, 'variance': 0.38336386772549724}[0m [36mstep[0m=[35m38496[0m
[2m2023-10-22 04:10:46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_38496.pt[0m


Epoch 9/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 04:13:05[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=9 step=43308[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024532771368177355, 'time_algorithm_update': 0.017510945759311083, 'loss': -100.26912606763324, 'time_step': 0.01783012159645607, 'observation_error': 0.5149479387599917, 'reward_error': 0.0007796242728923445, 'variance': 0.3309732076161729}[0m [36mstep[0m=[35m43308[0m
[2m2023-10-22 04:13:05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_43308.pt[0m


Epoch 10/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 04:15:25[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=10 step=48120[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002466510061611261, 'time_algorithm_update': 0.01746305469067416, 'loss': -104.45227757594235, 'time_step': 0.01778184322149478, 'observation_error': 0.49913309313688314, 'reward_error': 0.0005743724258987891, 'variance': 0.3309473104145594}[0m [36mstep[0m=[35m48120[0m
[2m2023-10-22 04:15:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_48120.pt[0m


Epoch 11/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 04:17:44[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=11 step=52932[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024371229403234975, 'time_algorithm_update': 0.017472715548248165, 'loss': -106.87936232829233, 'time_step': 0.017788901144726914, 'observation_error': 0.4452421907742558, 'reward_error': 0.0005280031057245261, 'variance': 0.2886266917087819}[0m [36mstep[0m=[35m52932[0m
[2m2023-10-22 04:17:44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_52932.pt[0m


Epoch 12/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 04:20:03[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=12 step=57744[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024956246762105255, 'time_algorithm_update': 0.017409125121553442, 'loss': -108.595598831042, 'time_step': 0.01772970431859753, 'observation_error': 0.41975800145869996, 'reward_error': 0.00039688703419652914, 'variance': 0.2848794917051917}[0m [36mstep[0m=[35m57744[0m
[2m2023-10-22 04:20:03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_57744.pt[0m


Epoch 13/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 04:22:22[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=13 step=62556[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024434262677618395, 'time_algorithm_update': 0.017496981377018956, 'loss': -110.62757319246643, 'time_step': 0.01781339350087585, 'observation_error': 0.37644631506286913, 'reward_error': 0.0003854260875751255, 'variance': 0.2663277535612278}[0m [36mstep[0m=[35m62556[0m
[2m2023-10-22 04:22:22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_62556.pt[0m


Epoch 14/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 04:24:42[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=14 step=67368[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024530333671982053, 'time_algorithm_update': 0.01744342107923449, 'loss': -112.12465776806559, 'time_step': 0.017759936557446336, 'observation_error': 0.32422297764922037, 'reward_error': 0.00035080836130350077, 'variance': 0.25065944903718235}[0m [36mstep[0m=[35m67368[0m
[2m2023-10-22 04:24:42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_67368.pt[0m


Epoch 15/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 04:27:02[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=15 step=72180[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024568687749721563, 'time_algorithm_update': 0.017462931369010944, 'loss': -113.58215226358111, 'time_step': 0.017780497979840337, 'observation_error': 0.27213635643751277, 'reward_error': 0.00030991690960114395, 'variance': 0.23502617850890406}[0m [36mstep[0m=[35m72180[0m
[2m2023-10-22 04:27:02[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_72180.pt[0m


Epoch 16/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 04:29:20[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=16 step=76992[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024848814716164707, 'time_algorithm_update': 0.01745491328084855, 'loss': -115.85464232263224, 'time_step': 0.017775965698143888, 'observation_error': 0.21809513953354834, 'reward_error': 0.00025852949215814255, 'variance': 0.20188120003278978}[0m [36mstep[0m=[35m76992[0m
[2m2023-10-22 04:29:20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_76992.pt[0m


Epoch 17/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 04:31:39[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=17 step=81804[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024225443279554917, 'time_algorithm_update': 0.0174483972000065, 'loss': -117.57112793117787, 'time_step': 0.01776323423520387, 'observation_error': 0.18645672487800738, 'reward_error': 0.0002505184927780541, 'variance': 0.20024286630901864}[0m [36mstep[0m=[35m81804[0m
[2m2023-10-22 04:31:39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_81804.pt[0m


Epoch 18/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 04:33:59[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=18 step=86616[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002452438311683864, 'time_algorithm_update': 0.017494798251617382, 'loss': -119.88982184371251, 'time_step': 0.017811397067329236, 'observation_error': 0.15376524313859466, 'reward_error': 0.00023401445779102872, 'variance': 0.16164169181090668}[0m [36mstep[0m=[35m86616[0m
[2m2023-10-22 04:33:59[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_86616.pt[0m


Epoch 19/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 04:36:18[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=19 step=91428[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024731166146738966, 'time_algorithm_update': 0.017397041629972007, 'loss': -121.29637067831266, 'time_step': 0.017717066647505025, 'observation_error': 0.15905592118514922, 'reward_error': 0.0002243833402881405, 'variance': 0.1453981601734154}[0m [36mstep[0m=[35m91428[0m
[2m2023-10-22 04:36:18[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_91428.pt[0m


Epoch 20/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 04:38:37[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=20 step=96240[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002493992608879768, 'time_algorithm_update': 0.017376674034153532, 'loss': -122.52389275779946, 'time_step': 0.017698230390635908, 'observation_error': 0.14960070885218155, 'reward_error': 0.000202542741730942, 'variance': 0.1218345845143769}[0m [36mstep[0m=[35m96240[0m
[2m2023-10-22 04:38:37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_96240.pt[0m


Epoch 21/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 04:40:57[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=21 step=101052[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002448742625521107, 'time_algorithm_update': 0.017499958487817473, 'loss': -123.80154408322507, 'time_step': 0.017818014074739374, 'observation_error': 0.12992461906513966, 'reward_error': 0.00018407584081819595, 'variance': 0.09148070080809995}[0m [36mstep[0m=[35m101052[0m
[2m2023-10-22 04:40:57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_101052.pt[0m


Epoch 22/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 04:43:16[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=22 step=105864[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024747011172008437, 'time_algorithm_update': 0.017526628915607582, 'loss': -125.0707085546809, 'time_step': 0.017846556673026145, 'observation_error': 0.13330259530759697, 'reward_error': 0.00018021938670120406, 'variance': 0.09027834285696419}[0m [36mstep[0m=[35m105864[0m
[2m2023-10-22 04:43:16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_105864.pt[0m


Epoch 23/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 04:45:35[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=23 step=110676[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002453596712843339, 'time_algorithm_update': 0.01742551778617345, 'loss': -126.06088999124339, 'time_step': 0.01774265874155541, 'observation_error': 0.1291626684129292, 'reward_error': 0.00016795413533866787, 'variance': 0.08224206914590215}[0m [36mstep[0m=[35m110676[0m
[2m2023-10-22 04:45:35[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_110676.pt[0m


Epoch 24/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 04:47:55[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=24 step=115488[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024972924171441413, 'time_algorithm_update': 0.01753839580396365, 'loss': -126.9238806099864, 'time_step': 0.017860765072671154, 'observation_error': 0.12533723832354912, 'reward_error': 0.00013944493626410665, 'variance': 0.08324487728965682}[0m [36mstep[0m=[35m115488[0m
[2m2023-10-22 04:47:55[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_115488.pt[0m


Epoch 25/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 04:50:14[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=25 step=120300[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024504044208542466, 'time_algorithm_update': 0.0174520015221086, 'loss': -127.51020294769744, 'time_step': 0.017769231017390984, 'observation_error': 0.13887779600295672, 'reward_error': 0.0001437210273807189, 'variance': 0.10607501713329302}[0m [36mstep[0m=[35m120300[0m
[2m2023-10-22 04:50:14[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_120300.pt[0m


Epoch 26/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 04:52:33[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=26 step=125112[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002469961978154488, 'time_algorithm_update': 0.017490350050621002, 'loss': -128.39777268039515, 'time_step': 0.017809773621119167, 'observation_error': 0.13321081828491904, 'reward_error': 0.0001283528472895463, 'variance': 0.08068538964951975}[0m [36mstep[0m=[35m125112[0m
[2m2023-10-22 04:52:33[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_125112.pt[0m


Epoch 27/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 04:54:52[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=27 step=129924[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024984344679023065, 'time_algorithm_update': 0.01741478890056721, 'loss': -128.81406731736334, 'time_step': 0.01773671794710611, 'observation_error': 0.13802719064967517, 'reward_error': 0.00012481300560427154, 'variance': 0.08588667623309708}[0m [36mstep[0m=[35m129924[0m
[2m2023-10-22 04:54:52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_129924.pt[0m


Epoch 28/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 04:57:11[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=28 step=134736[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024431998394770316, 'time_algorithm_update': 0.017466498233830046, 'loss': -129.74316063743774, 'time_step': 0.01778285932461619, 'observation_error': 0.12435525820501317, 'reward_error': 0.00011803150614834178, 'variance': 0.08159994095513604}[0m [36mstep[0m=[35m134736[0m
[2m2023-10-22 04:57:11[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_134736.pt[0m


Epoch 29/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 04:59:30[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=29 step=139548[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000245103316810462, 'time_algorithm_update': 0.017419447625367124, 'loss': -130.18535091910675, 'time_step': 0.017736090488069175, 'observation_error': 0.12945728895567016, 'reward_error': 0.00010206127048454344, 'variance': 0.07569142857180342}[0m [36mstep[0m=[35m139548[0m
[2m2023-10-22 04:59:30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_139548.pt[0m


Epoch 30/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 05:01:50[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=30 step=144360[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002493838023267383, 'time_algorithm_update': 0.0175312901672877, 'loss': -130.63577809773776, 'time_step': 0.017853306168233565, 'observation_error': 0.12658430337681675, 'reward_error': 0.00010086551414065565, 'variance': 0.07492753408353772}[0m [36mstep[0m=[35m144360[0m
[2m2023-10-22 05:01:50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_144360.pt[0m


Epoch 31/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 05:04:09[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=31 step=149172[0m [36mepoch[0m=[35m31[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002515167369509576, 'time_algorithm_update': 0.017462809236467827, 'loss': -130.97438822006643, 'time_step': 0.01778731391713308, 'observation_error': 0.1203289578899609, 'reward_error': 9.128135028992457e-05, 'variance': 0.06916946393632888}[0m [36mstep[0m=[35m149172[0m
[2m2023-10-22 05:04:09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_149172.pt[0m


Epoch 32/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 05:06:29[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=32 step=153984[0m [36mepoch[0m=[35m32[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024966403829585686, 'time_algorithm_update': 0.017496265675361614, 'loss': -131.23914503298099, 'time_step': 0.01781910682656026, 'observation_error': 0.14286855093630424, 'reward_error': 8.332415476282959e-05, 'variance': 0.11269108032629221}[0m [36mstep[0m=[35m153984[0m
[2m2023-10-22 05:06:29[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_153984.pt[0m


Epoch 33/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 05:08:48[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=33 step=158796[0m [36mepoch[0m=[35m33[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024894288650474645, 'time_algorithm_update': 0.01748763296074998, 'loss': -132.03264053167152, 'time_step': 0.017809704602606974, 'observation_error': 0.12112341072281667, 'reward_error': 7.644263151738136e-05, 'variance': 0.06915230551165759}[0m [36mstep[0m=[35m158796[0m
[2m2023-10-22 05:08:48[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_158796.pt[0m


Epoch 34/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 05:11:08[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=34 step=163608[0m [36mepoch[0m=[35m34[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002470111113633103, 'time_algorithm_update': 0.017415849546145522, 'loss': -132.27475131994868, 'time_step': 0.017734934811976584, 'observation_error': 0.12471320745953435, 'reward_error': 8.402471575457346e-05, 'variance': 0.06827110261005209}[0m [36mstep[0m=[35m163608[0m
[2m2023-10-22 05:11:08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_163608.pt[0m


Epoch 35/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 05:13:27[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=35 step=168420[0m [36mepoch[0m=[35m35[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002541763031373694, 'time_algorithm_update': 0.017426021279440457, 'loss': -133.00655383461233, 'time_step': 0.017753004036837584, 'observation_error': 0.12506225663096848, 'reward_error': 7.980867607231374e-05, 'variance': 0.07674615648431522}[0m [36mstep[0m=[35m168420[0m
[2m2023-10-22 05:13:27[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_168420.pt[0m


Epoch 36/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 05:15:47[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=36 step=173232[0m [36mepoch[0m=[35m36[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002455938783964314, 'time_algorithm_update': 0.017462323282722226, 'loss': -133.25192944565518, 'time_step': 0.01778019688472288, 'observation_error': 0.12054714237732697, 'reward_error': 7.382196122278371e-05, 'variance': 0.0662062431972275}[0m [36mstep[0m=[35m173232[0m
[2m2023-10-22 05:15:47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_173232.pt[0m


Epoch 37/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 05:18:06[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=37 step=178044[0m [36mepoch[0m=[35m37[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024500788992281664, 'time_algorithm_update': 0.01750870818211848, 'loss': -133.68773075945657, 'time_step': 0.017826315421217796, 'observation_error': 0.11337816310229258, 'reward_error': 6.684818247168767e-05, 'variance': 0.06248419960450976}[0m [36mstep[0m=[35m178044[0m
[2m2023-10-22 05:18:06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_178044.pt[0m


Epoch 38/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 05:20:26[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=38 step=182856[0m [36mepoch[0m=[35m38[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002483708701922511, 'time_algorithm_update': 0.01745410002179673, 'loss': -133.78494668779825, 'time_step': 0.01777532733883941, 'observation_error': 0.11349012317482271, 'reward_error': 6.134152625211661e-05, 'variance': 0.07161301578592245}[0m [36mstep[0m=[35m182856[0m
[2m2023-10-22 05:20:26[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_182856.pt[0m


Epoch 39/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 05:22:45[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=39 step=187668[0m [36mepoch[0m=[35m39[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024609524115659946, 'time_algorithm_update': 0.017463404985635558, 'loss': -134.38955252703684, 'time_step': 0.01778115249135944, 'observation_error': 0.13346239097507193, 'reward_error': 5.832493947141404e-05, 'variance': 0.09158163216231288}[0m [36mstep[0m=[35m187668[0m
[2m2023-10-22 05:22:45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_187668.pt[0m


Epoch 40/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 05:25:05[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=40 step=192480[0m [36mepoch[0m=[35m40[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002487977643064529, 'time_algorithm_update': 0.01749273136270512, 'loss': -134.73384886826463, 'time_step': 0.017813496756137456, 'observation_error': 0.11868678735309761, 'reward_error': 6.380431223733229e-05, 'variance': 0.0706588584727847}[0m [36mstep[0m=[35m192480[0m
[2m2023-10-22 05:25:05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_192480.pt[0m


Epoch 41/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 05:27:24[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=41 step=197292[0m [36mepoch[0m=[35m41[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024491340442191336, 'time_algorithm_update': 0.01757112474909249, 'loss': -134.48222679984838, 'time_step': 0.017889559368044757, 'observation_error': 0.11445532292178714, 'reward_error': 5.601815783324104e-05, 'variance': 0.0619106523603463}[0m [36mstep[0m=[35m197292[0m
[2m2023-10-22 05:27:24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_197292.pt[0m


Epoch 42/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 05:29:43[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=42 step=202104[0m [36mepoch[0m=[35m42[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024362984837241107, 'time_algorithm_update': 0.017453415980461927, 'loss': -135.25501961640686, 'time_step': 0.017769096993646916, 'observation_error': 0.12343355501321437, 'reward_error': 5.391372607375869e-05, 'variance': 0.07387847613157869}[0m [36mstep[0m=[35m202104[0m
[2m2023-10-22 05:29:43[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_202104.pt[0m


Epoch 43/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 05:32:03[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=43 step=206916[0m [36mepoch[0m=[35m43[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002490548124337137, 'time_algorithm_update': 0.017427953748235282, 'loss': -135.47873546278487, 'time_step': 0.017750625400273678, 'observation_error': 0.10897298407191408, 'reward_error': 4.8453438288408135e-05, 'variance': 0.05716121884199121}[0m [36mstep[0m=[35m206916[0m
[2m2023-10-22 05:32:03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_206916.pt[0m


Epoch 44/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 05:34:22[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=44 step=211728[0m [36mepoch[0m=[35m44[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024396513069260646, 'time_algorithm_update': 0.01746689718362201, 'loss': -135.97632688695157, 'time_step': 0.017782215812457966, 'observation_error': 0.11777640290731212, 'reward_error': 4.6683181797309784e-05, 'variance': 0.0697759239627071}[0m [36mstep[0m=[35m211728[0m
[2m2023-10-22 05:34:22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_211728.pt[0m


Epoch 45/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 05:36:41[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=45 step=216540[0m [36mepoch[0m=[35m45[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002434298780097232, 'time_algorithm_update': 0.017456181378336818, 'loss': -135.85142245019165, 'time_step': 0.017771498966692688, 'observation_error': 0.1132912375054556, 'reward_error': 4.4655794707156316e-05, 'variance': 0.06287441394102086}[0m [36mstep[0m=[35m216540[0m
[2m2023-10-22 05:36:41[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_216540.pt[0m


Epoch 46/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 05:39:00[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=46 step=221352[0m [36mepoch[0m=[35m46[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024510039355689453, 'time_algorithm_update': 0.01740504202997298, 'loss': -136.22366702784524, 'time_step': 0.017721171440527227, 'observation_error': 0.11762734746354463, 'reward_error': 3.730376312443088e-05, 'variance': 0.05859746265973619}[0m [36mstep[0m=[35m221352[0m
[2m2023-10-22 05:39:00[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_221352.pt[0m


Epoch 47/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 05:41:19[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=47 step=226164[0m [36mepoch[0m=[35m47[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024853130231177123, 'time_algorithm_update': 0.01747162725562764, 'loss': -136.5453218525881, 'time_step': 0.017793094824476233, 'observation_error': 0.10729477068420884, 'reward_error': 3.582298110036879e-05, 'variance': 0.061401337140562624}[0m [36mstep[0m=[35m226164[0m
[2m2023-10-22 05:41:19[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_226164.pt[0m


Epoch 48/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 05:43:39[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=48 step=230976[0m [36mepoch[0m=[35m48[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002524216573434578, 'time_algorithm_update': 0.017507622168644803, 'loss': -136.735289800395, 'time_step': 0.017832065015065107, 'observation_error': 0.12144061526902636, 'reward_error': 3.9003833462052276e-05, 'variance': 0.06478982891842955}[0m [36mstep[0m=[35m230976[0m
[2m2023-10-22 05:43:39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_230976.pt[0m


Epoch 49/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 05:45:57[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=49 step=235788[0m [36mepoch[0m=[35m49[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002468352702292223, 'time_algorithm_update': 0.01741751149011867, 'loss': -136.96642580531778, 'time_step': 0.01773679568583234, 'observation_error': 0.12662006327693817, 'reward_error': 3.603746209934272e-05, 'variance': 0.09976382369650813}[0m [36mstep[0m=[35m235788[0m
[2m2023-10-22 05:45:57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_235788.pt[0m


Epoch 50/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 05:48:18[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=50 step=240600[0m [36mepoch[0m=[35m50[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002480982148637399, 'time_algorithm_update': 0.01752812215316722, 'loss': -136.5171618861946, 'time_step': 0.017849141572379906, 'observation_error': 0.10242001408567916, 'reward_error': 3.6628557341136955e-05, 'variance': 0.050687835074659134}[0m [36mstep[0m=[35m240600[0m
[2m2023-10-22 05:48:18[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_240600.pt[0m


Epoch 51/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 05:50:37[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=51 step=245412[0m [36mepoch[0m=[35m51[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025096280517324446, 'time_algorithm_update': 0.017447686799842918, 'loss': -137.27650714098962, 'time_step': 0.017771914712805998, 'observation_error': 0.11464141273564683, 'reward_error': 3.589567781567889e-05, 'variance': 0.06229530349897012}[0m [36mstep[0m=[35m245412[0m
[2m2023-10-22 05:50:37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_245412.pt[0m


Epoch 52/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 05:52:56[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=52 step=250224[0m [36mepoch[0m=[35m52[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002443886060865344, 'time_algorithm_update': 0.017416222880308765, 'loss': -137.96999051505492, 'time_step': 0.01773198321771344, 'observation_error': 0.11436840593660384, 'reward_error': 3.1203897386751286e-05, 'variance': 0.05655254531503818}[0m [36mstep[0m=[35m250224[0m
[2m2023-10-22 05:52:56[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_250224.pt[0m


Epoch 53/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 05:55:14[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=53 step=255036[0m [36mepoch[0m=[35m53[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002467803229714867, 'time_algorithm_update': 0.017439998841146973, 'loss': -137.74477076867373, 'time_step': 0.01775972900644304, 'observation_error': 0.1153471825503235, 'reward_error': 2.878512053820361e-05, 'variance': 0.06396833903646698}[0m [36mstep[0m=[35m255036[0m
[2m2023-10-22 05:55:15[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_255036.pt[0m


Epoch 54/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 05:57:34[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=54 step=259848[0m [36mepoch[0m=[35m54[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024745341449207994, 'time_algorithm_update': 0.01754130439171668, 'loss': -138.0586935199506, 'time_step': 0.017861652899462288, 'observation_error': 0.10989976321043245, 'reward_error': 3.207654131916355e-05, 'variance': 0.05708753532773744}[0m [36mstep[0m=[35m259848[0m
[2m2023-10-22 05:57:35[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_259848.pt[0m


Epoch 55/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 05:59:54[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=55 step=264660[0m [36mepoch[0m=[35m55[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024765214618800187, 'time_algorithm_update': 0.017496731414065596, 'loss': -138.3584905895509, 'time_step': 0.017817339942738698, 'observation_error': 0.1169677679157647, 'reward_error': 2.750144922946344e-05, 'variance': 0.058016775568032086}[0m [36mstep[0m=[35m264660[0m
[2m2023-10-22 05:59:54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_264660.pt[0m


Epoch 56/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 06:02:13[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=56 step=269472[0m [36mepoch[0m=[35m56[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024693272853036375, 'time_algorithm_update': 0.01747455145058192, 'loss': -138.31941498822206, 'time_step': 0.017793387546206352, 'observation_error': 0.10058394308458701, 'reward_error': 2.88394695468522e-05, 'variance': 0.05251637143724549}[0m [36mstep[0m=[35m269472[0m
[2m2023-10-22 06:02:13[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_269472.pt[0m


Epoch 57/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 06:04:33[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=57 step=274284[0m [36mepoch[0m=[35m57[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002486760776833703, 'time_algorithm_update': 0.017382994207459892, 'loss': -138.43927475500385, 'time_step': 0.017704073082677345, 'observation_error': 0.11269981225028643, 'reward_error': 2.4859562387994695e-05, 'variance': 0.05687659903074234}[0m [36mstep[0m=[35m274284[0m
[2m2023-10-22 06:04:33[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_274284.pt[0m


Epoch 58/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 06:06:52[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=58 step=279096[0m [36mepoch[0m=[35m58[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024980232305360256, 'time_algorithm_update': 0.01747865000072362, 'loss': -139.0590669754041, 'time_step': 0.01780130262684049, 'observation_error': 0.11044768762827638, 'reward_error': 2.3697084564619867e-05, 'variance': 0.0597422405440867}[0m [36mstep[0m=[35m279096[0m
[2m2023-10-22 06:06:52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_279096.pt[0m


Epoch 59/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 06:09:12[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=59 step=283908[0m [36mepoch[0m=[35m59[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024819715956500045, 'time_algorithm_update': 0.017453098435849818, 'loss': -139.4525372238825, 'time_step': 0.01777381666085171, 'observation_error': 0.11137145689882272, 'reward_error': 2.434340881303697e-05, 'variance': 0.05978264345612477}[0m [36mstep[0m=[35m283908[0m
[2m2023-10-22 06:09:12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_283908.pt[0m


Epoch 60/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 06:11:32[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=60 step=288720[0m [36mepoch[0m=[35m60[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025011783625221413, 'time_algorithm_update': 0.017519942691498563, 'loss': -138.5995248245975, 'time_step': 0.0178434810634464, 'observation_error': 0.12329134480085788, 'reward_error': 2.2250258172583686e-05, 'variance': 0.07092645158575041}[0m [36mstep[0m=[35m288720[0m
[2m2023-10-22 06:11:32[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_288720.pt[0m


Epoch 61/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 06:13:52[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=61 step=293532[0m [36mepoch[0m=[35m61[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025045633910600085, 'time_algorithm_update': 0.017485879305889483, 'loss': -139.84938340016632, 'time_step': 0.017809827131523456, 'observation_error': 0.11260358106332523, 'reward_error': 2.5727033970822753e-05, 'variance': 0.05898699757249464}[0m [36mstep[0m=[35m293532[0m
[2m2023-10-22 06:13:52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_293532.pt[0m


Epoch 62/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 06:16:11[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=62 step=298344[0m [36mepoch[0m=[35m62[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024844905483851506, 'time_algorithm_update': 0.01745854420951279, 'loss': -139.61372733968352, 'time_step': 0.017779611143982618, 'observation_error': 0.10664817380672305, 'reward_error': 2.55826021688332e-05, 'variance': 0.05295409275251968}[0m [36mstep[0m=[35m298344[0m
[2m2023-10-22 06:16:12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_298344.pt[0m


Epoch 63/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 06:18:30[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=63 step=303156[0m [36mepoch[0m=[35m63[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024957049418169563, 'time_algorithm_update': 0.017454883552846173, 'loss': -139.40632991441962, 'time_step': 0.017777605346115253, 'observation_error': 0.11067829213360106, 'reward_error': 2.3370507879639913e-05, 'variance': 0.05708565128084652}[0m [36mstep[0m=[35m303156[0m
[2m2023-10-22 06:18:30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_303156.pt[0m


Epoch 64/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 06:20:50[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=64 step=307968[0m [36mepoch[0m=[35m64[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024573800966131223, 'time_algorithm_update': 0.017443827114200353, 'loss': -140.1188121674364, 'time_step': 0.017761567187923644, 'observation_error': 0.11411401515595992, 'reward_error': 2.2821873951464475e-05, 'variance': 0.06039597357966541}[0m [36mstep[0m=[35m307968[0m
[2m2023-10-22 06:20:50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_307968.pt[0m


Epoch 65/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 06:23:10[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=65 step=312780[0m [36mepoch[0m=[35m65[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002495942270369304, 'time_algorithm_update': 0.017447171861294995, 'loss': -140.04619325525246, 'time_step': 0.017770232603337897, 'observation_error': 0.11167925441857852, 'reward_error': 2.0424231370224886e-05, 'variance': 0.05473327428725563}[0m [36mstep[0m=[35m312780[0m
[2m2023-10-22 06:23:10[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_312780.pt[0m


Epoch 66/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 06:25:29[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=66 step=317592[0m [36mepoch[0m=[35m66[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002521125356653583, 'time_algorithm_update': 0.01747989064935635, 'loss': -140.5091671547291, 'time_step': 0.017805683493911477, 'observation_error': 0.10490691150620986, 'reward_error': 1.8786306870893862e-05, 'variance': 0.04935367819218436}[0m [36mstep[0m=[35m317592[0m
[2m2023-10-22 06:25:29[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_317592.pt[0m


Epoch 67/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 06:27:48[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=67 step=322404[0m [36mepoch[0m=[35m67[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002484020350480813, 'time_algorithm_update': 0.017525440637805714, 'loss': -141.00658552704112, 'time_step': 0.017846611521190537, 'observation_error': 0.10369351818525334, 'reward_error': 2.1569139328768425e-05, 'variance': 0.045528542367818926}[0m [36mstep[0m=[35m322404[0m
[2m2023-10-22 06:27:48[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_322404.pt[0m


Epoch 68/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 06:30:08[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=68 step=327216[0m [36mepoch[0m=[35m68[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025126949906448276, 'time_algorithm_update': 0.017460754090116506, 'loss': -140.27481140182698, 'time_step': 0.017785835890401332, 'observation_error': 0.12130095808705652, 'reward_error': 2.1030425933815203e-05, 'variance': 0.07191346968845336}[0m [36mstep[0m=[35m327216[0m
[2m2023-10-22 06:30:08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_327216.pt[0m


Epoch 69/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 06:32:28[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=69 step=332028[0m [36mepoch[0m=[35m69[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002510941533971011, 'time_algorithm_update': 0.017509676324062712, 'loss': -140.45633816263225, 'time_step': 0.017834423634774072, 'observation_error': 0.11253948794725131, 'reward_error': 1.991697892364511e-05, 'variance': 0.05894778556620232}[0m [36mstep[0m=[35m332028[0m
[2m2023-10-22 06:32:28[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_332028.pt[0m


Epoch 70/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 06:34:48[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=70 step=336840[0m [36mepoch[0m=[35m70[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002481791741235595, 'time_algorithm_update': 0.017506922668748762, 'loss': -140.89535747165, 'time_step': 0.01782758703651967, 'observation_error': 0.10685367059405443, 'reward_error': 1.7277670855550355e-05, 'variance': 0.05354760750903245}[0m [36mstep[0m=[35m336840[0m
[2m2023-10-22 06:34:48[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_336840.pt[0m


Epoch 71/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 06:37:06[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=71 step=341652[0m [36mepoch[0m=[35m71[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002506278201330729, 'time_algorithm_update': 0.0173979880704745, 'loss': -141.43705452509155, 'time_step': 0.017721884367571012, 'observation_error': 0.10559155371873852, 'reward_error': 1.7465181884280582e-05, 'variance': 0.05510331065296231}[0m [36mstep[0m=[35m341652[0m
[2m2023-10-22 06:37:06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_341652.pt[0m


Epoch 72/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 06:39:26[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=72 step=346464[0m [36mepoch[0m=[35m72[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002519141507961308, 'time_algorithm_update': 0.017477212949088487, 'loss': -140.77602226419046, 'time_step': 0.017802683839377817, 'observation_error': 0.10448967036160363, 'reward_error': 1.92731234936706e-05, 'variance': 0.04982628953642261}[0m [36mstep[0m=[35m346464[0m
[2m2023-10-22 06:39:26[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_346464.pt[0m


Epoch 73/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 06:41:45[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=73 step=351276[0m [36mepoch[0m=[35m73[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002464406805442753, 'time_algorithm_update': 0.017553285618969926, 'loss': -141.64246815319171, 'time_step': 0.017873292651061506, 'observation_error': 0.11154273439220402, 'reward_error': 1.8701888974705524e-05, 'variance': 0.057851528542707475}[0m [36mstep[0m=[35m351276[0m
[2m2023-10-22 06:41:45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_351276.pt[0m


Epoch 74/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 06:44:05[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=74 step=356088[0m [36mepoch[0m=[35m74[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002488892274604474, 'time_algorithm_update': 0.017486398158624385, 'loss': -142.13070550285968, 'time_step': 0.017808643461561958, 'observation_error': 0.11225374582920425, 'reward_error': 1.9595391558501353e-05, 'variance': 0.051126109893989795}[0m [36mstep[0m=[35m356088[0m
[2m2023-10-22 06:44:05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_356088.pt[0m


Epoch 75/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 06:46:24[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=75 step=360900[0m [36mepoch[0m=[35m75[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002516074569048925, 'time_algorithm_update': 0.017448798131665288, 'loss': -142.4617048106586, 'time_step': 0.017772816759491602, 'observation_error': 0.11127234915666598, 'reward_error': 1.6177022398293125e-05, 'variance': 0.053061994109962964}[0m [36mstep[0m=[35m360900[0m
[2m2023-10-22 06:46:24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_360900.pt[0m


Epoch 76/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 06:48:44[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=76 step=365712[0m [36mepoch[0m=[35m76[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002453184484543646, 'time_algorithm_update': 0.017410970487301287, 'loss': -141.91604983162503, 'time_step': 0.017729307400219062, 'observation_error': 0.1112084535706155, 'reward_error': 1.7156645447685408e-05, 'variance': 0.050455094077909546}[0m [36mstep[0m=[35m365712[0m
[2m2023-10-22 06:48:44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_365712.pt[0m


Epoch 77/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 06:51:04[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=77 step=370524[0m [36mepoch[0m=[35m77[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000248001499762658, 'time_algorithm_update': 0.01751957094282878, 'loss': -142.08931697059054, 'time_step': 0.017840496619740628, 'observation_error': 0.10906315062350005, 'reward_error': 1.4585352769356893e-05, 'variance': 0.055355401875821254}[0m [36mstep[0m=[35m370524[0m
[2m2023-10-22 06:51:04[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_370524.pt[0m


Epoch 78/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 06:53:24[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=78 step=375336[0m [36mepoch[0m=[35m78[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002480290477115317, 'time_algorithm_update': 0.01748984630962064, 'loss': -142.2959038354116, 'time_step': 0.01781139756279594, 'observation_error': 0.10602839487181741, 'reward_error': 1.3225786793784082e-05, 'variance': 0.050262936157878677}[0m [36mstep[0m=[35m375336[0m
[2m2023-10-22 06:53:24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_375336.pt[0m


Epoch 79/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 06:55:43[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=79 step=380148[0m [36mepoch[0m=[35m79[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024996543069333706, 'time_algorithm_update': 0.0174885294086618, 'loss': -141.97494674620782, 'time_step': 0.017812046227807913, 'observation_error': 0.12132276257052851, 'reward_error': 1.5422197813403203e-05, 'variance': 0.08707124170580854}[0m [36mstep[0m=[35m380148[0m
[2m2023-10-22 06:55:43[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_380148.pt[0m


Epoch 80/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 06:58:02[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=80 step=384960[0m [36mepoch[0m=[35m80[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000246563853567477, 'time_algorithm_update': 0.017462196096418702, 'loss': -142.3100947382444, 'time_step': 0.01778133784545429, 'observation_error': 0.11715145827115664, 'reward_error': 1.3780407358394372e-05, 'variance': 0.06645711657080353}[0m [36mstep[0m=[35m384960[0m
[2m2023-10-22 06:58:02[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_384960.pt[0m


Epoch 81/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 07:00:22[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=81 step=389772[0m [36mepoch[0m=[35m81[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002503480796306607, 'time_algorithm_update': 0.017475266062212506, 'loss': -142.80715787400825, 'time_step': 0.017799613977211866, 'observation_error': 0.10118897714612676, 'reward_error': 1.6704276236085822e-05, 'variance': 0.04604566398599408}[0m [36mstep[0m=[35m389772[0m
[2m2023-10-22 07:00:22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_389772.pt[0m


Epoch 82/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 07:02:41[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=82 step=394584[0m [36mepoch[0m=[35m82[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000247117388971825, 'time_algorithm_update': 0.01739511203488408, 'loss': -142.72952687730415, 'time_step': 0.01771600466416661, 'observation_error': 0.11114008228641487, 'reward_error': 1.6714378737820114e-05, 'variance': 0.05091655721273473}[0m [36mstep[0m=[35m394584[0m
[2m2023-10-22 07:02:41[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_394584.pt[0m


Epoch 83/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 07:05:00[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=83 step=399396[0m [36mepoch[0m=[35m83[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000244512324123113, 'time_algorithm_update': 0.01741256058860202, 'loss': -143.07575892252618, 'time_step': 0.017729524265996437, 'observation_error': 0.11086351178131948, 'reward_error': 1.2071899389417878e-05, 'variance': 0.053287060890748354}[0m [36mstep[0m=[35m399396[0m
[2m2023-10-22 07:05:00[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_399396.pt[0m


Epoch 84/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 07:07:19[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=84 step=404208[0m [36mepoch[0m=[35m84[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024727128093082117, 'time_algorithm_update': 0.017488919093226356, 'loss': -143.18389860987168, 'time_step': 0.017809399444662522, 'observation_error': 0.10704235718769316, 'reward_error': 1.386229494474334e-05, 'variance': 0.05116343496127059}[0m [36mstep[0m=[35m404208[0m
[2m2023-10-22 07:07:19[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_404208.pt[0m


Epoch 85/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 07:09:43[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=85 step=409020[0m [36mepoch[0m=[35m85[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025328307575913934, 'time_algorithm_update': 0.017470715745527946, 'loss': -142.66337686524426, 'time_step': 0.017798460134346092, 'observation_error': 0.1031737437266018, 'reward_error': 1.3078081109472926e-05, 'variance': 0.0450235387887358}[0m [36mstep[0m=[35m409020[0m
[2m2023-10-22 07:09:43[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_409020.pt[0m


Epoch 86/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 07:12:02[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=86 step=413832[0m [36mepoch[0m=[35m86[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025133118466942486, 'time_algorithm_update': 0.01744479570206462, 'loss': -143.22606627106765, 'time_step': 0.017770208969576005, 'observation_error': 0.10372719322468497, 'reward_error': 1.2583474826404403e-05, 'variance': 0.05474164868669267}[0m [36mstep[0m=[35m413832[0m
[2m2023-10-22 07:12:02[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_413832.pt[0m


Epoch 87/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 07:14:19[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=87 step=418644[0m [36mepoch[0m=[35m87[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024839633718095813, 'time_algorithm_update': 0.017219980805492956, 'loss': -143.3165144625051, 'time_step': 0.017541455162235427, 'observation_error': 0.1122402673256669, 'reward_error': 1.2972087543502802e-05, 'variance': 0.05616673368265919}[0m [36mstep[0m=[35m418644[0m
[2m2023-10-22 07:14:19[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_418644.pt[0m


Epoch 88/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 07:16:37[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=88 step=423456[0m [36mepoch[0m=[35m88[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002501904221246962, 'time_algorithm_update': 0.017168082550774986, 'loss': -143.69036142447544, 'time_step': 0.017491554084264133, 'observation_error': 0.10688189728873515, 'reward_error': 1.1212286180868068e-05, 'variance': 0.05180413105643056}[0m [36mstep[0m=[35m423456[0m
[2m2023-10-22 07:16:37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_423456.pt[0m


Epoch 89/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 07:18:55[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=89 step=428268[0m [36mepoch[0m=[35m89[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002496160275720103, 'time_algorithm_update': 0.017197944428260784, 'loss': -144.07899454763702, 'time_step': 0.017521065518148523, 'observation_error': 0.1228909912596739, 'reward_error': 1.3745462122087765e-05, 'variance': 0.0780776499561207}[0m [36mstep[0m=[35m428268[0m
[2m2023-10-22 07:18:55[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_428268.pt[0m


Epoch 90/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 07:21:13[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=90 step=433080[0m [36mepoch[0m=[35m90[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002487985570531831, 'time_algorithm_update': 0.01730387332730757, 'loss': -143.9106572687081, 'time_step': 0.017626100248430338, 'observation_error': 0.10750514734250925, 'reward_error': 1.1971100602062789e-05, 'variance': 0.0472967962851165}[0m [36mstep[0m=[35m433080[0m
[2m2023-10-22 07:21:13[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_433080.pt[0m


Epoch 91/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 07:23:31[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=91 step=437892[0m [36mepoch[0m=[35m91[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002492648903172113, 'time_algorithm_update': 0.017274312198112533, 'loss': -144.70620179830345, 'time_step': 0.017596026509380896, 'observation_error': 0.1070841690579979, 'reward_error': 1.2681643430450956e-05, 'variance': 0.046915448026853955}[0m [36mstep[0m=[35m437892[0m
[2m2023-10-22 07:23:31[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_437892.pt[0m


Epoch 92/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 07:25:50[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=92 step=442704[0m [36mepoch[0m=[35m92[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002513302928293534, 'time_algorithm_update': 0.01727763093320509, 'loss': -144.37813333739663, 'time_step': 0.017602839969340107, 'observation_error': 0.1098068544128226, 'reward_error': 1.0384303617321242e-05, 'variance': 0.06284035516097672}[0m [36mstep[0m=[35m442704[0m
[2m2023-10-22 07:25:50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_442704.pt[0m


Epoch 93/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 07:28:08[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=93 step=447516[0m [36mepoch[0m=[35m93[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024782015894812934, 'time_algorithm_update': 0.01730665557105048, 'loss': -143.87727495163358, 'time_step': 0.017627663297248897, 'observation_error': 0.10554438844110432, 'reward_error': 1.1565318177815589e-05, 'variance': 0.05162250339876627}[0m [36mstep[0m=[35m447516[0m
[2m2023-10-22 07:28:08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_447516.pt[0m


Epoch 94/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 07:30:25[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=94 step=452328[0m [36mepoch[0m=[35m94[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000250679695497229, 'time_algorithm_update': 0.017178010762183744, 'loss': -144.088160403847, 'time_step': 0.017501795182897958, 'observation_error': 0.11172847478807955, 'reward_error': 1.3682002081584226e-05, 'variance': 0.05975097935957992}[0m [36mstep[0m=[35m452328[0m
[2m2023-10-22 07:30:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_452328.pt[0m


Epoch 95/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 07:32:44[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=95 step=457140[0m [36mepoch[0m=[35m95[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002466193953852602, 'time_algorithm_update': 0.017224863431697473, 'loss': -144.29069175585448, 'time_step': 0.017544237851898372, 'observation_error': 0.11433844456361571, 'reward_error': 1.0066461821553304e-05, 'variance': 0.05741431296176043}[0m [36mstep[0m=[35m457140[0m
[2m2023-10-22 07:32:44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_457140.pt[0m


Epoch 96/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 07:35:01[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=96 step=461952[0m [36mepoch[0m=[35m96[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002494710540137287, 'time_algorithm_update': 0.01726582757849943, 'loss': -144.62441059399524, 'time_step': 0.01758838651186213, 'observation_error': 0.10776990364610449, 'reward_error': 1.079704290490591e-05, 'variance': 0.05127790669230767}[0m [36mstep[0m=[35m461952[0m
[2m2023-10-22 07:35:01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_461952.pt[0m


Epoch 97/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 07:37:19[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=97 step=466764[0m [36mepoch[0m=[35m97[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025307240331559405, 'time_algorithm_update': 0.017219657563013725, 'loss': -145.11906330761866, 'time_step': 0.017546049426816843, 'observation_error': 0.10968400174606738, 'reward_error': 9.59775482172352e-06, 'variance': 0.05546432445434633}[0m [36mstep[0m=[35m466764[0m
[2m2023-10-22 07:37:19[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_466764.pt[0m


Epoch 98/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 07:39:36[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=98 step=471576[0m [36mepoch[0m=[35m98[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025303405419252164, 'time_algorithm_update': 0.01711757967894214, 'loss': -145.36802277759224, 'time_step': 0.017444336255787812, 'observation_error': 0.10252341247620522, 'reward_error': 9.495355487608349e-06, 'variance': 0.0482074690423822}[0m [36mstep[0m=[35m471576[0m
[2m2023-10-22 07:39:37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_471576.pt[0m


Epoch 99/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 07:41:54[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=99 step=476388[0m [36mepoch[0m=[35m99[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002472426429551934, 'time_algorithm_update': 0.01725166590136483, 'loss': -144.83246143401314, 'time_step': 0.017571693792604745, 'observation_error': 0.10510752879228889, 'reward_error': 1.0839389921691593e-05, 'variance': 0.05086065984891257}[0m [36mstep[0m=[35m476388[0m
[2m2023-10-22 07:41:54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_476388.pt[0m


Epoch 100/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 07:44:11[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022035204: epoch=100 step=481200[0m [36mepoch[0m=[35m100[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024871853918010555, 'time_algorithm_update': 0.017212223877831487, 'loss': -145.5137870194807, 'time_step': 0.017534719986015822, 'observation_error': 0.10675487597471463, 'reward_error': 1.0083300210606608e-05, 'variance': 0.04844209633289091}[0m [36mstep[0m=[35m481200[0m
[2m2023-10-22 07:44:12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204/model_481200.pt[0m
[2m2023-10-22 07:44:12[0m [[32m[1mdebug    [0m] [1mRoundIterator is selected.[0m
[2m2023-10-22 07:44:12[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412[0m
[2m2023-10-22 07:44:12[0m [[32m[1mdebug    [0m] [1mBuilding models...[0m
[2m2023-10-22 07:44:12[0m [[32m[1

Epoch 1/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 07:46:24[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=1 step=4812[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025663233160080754, 'time_algorithm_update': 0.016473395816504907, 'loss': 8.30896885546601, 'time_step': 0.01681904325065074, 'observation_error': 1.7282812089203425, 'reward_error': 0.03628989785605075, 'variance': 1.262460495924966}[0m [36mstep[0m=[35m4812[0m
[2m2023-10-22 07:46:24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_4812.pt[0m


Epoch 2/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 07:48:38[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=2 step=9624[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025142155779666535, 'time_algorithm_update': 0.016948211520092744, 'loss': -33.70267205607961, 'time_step': 0.01728041258238795, 'observation_error': 1.2568154662449058, 'reward_error': 0.013665085121010005, 'variance': 0.7799274178904478}[0m [36mstep[0m=[35m9624[0m
[2m2023-10-22 07:48:38[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_9624.pt[0m


Epoch 3/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 07:50:52[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=3 step=14436[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024889794767447935, 'time_algorithm_update': 0.016904370247673613, 'loss': -61.81366618583327, 'time_step': 0.017232847233563785, 'observation_error': 1.1426997479010004, 'reward_error': 0.004954715970867182, 'variance': 0.6503738651279127}[0m [36mstep[0m=[35m14436[0m
[2m2023-10-22 07:50:52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_14436.pt[0m


Epoch 4/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 07:53:05[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=4 step=19248[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002495815926359181, 'time_algorithm_update': 0.016801584391225307, 'loss': -84.10795435663668, 'time_step': 0.017124232013128442, 'observation_error': 1.0669236525288956, 'reward_error': 0.003005293925529324, 'variance': 0.5906684076977031}[0m [36mstep[0m=[35m19248[0m
[2m2023-10-22 07:53:05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_19248.pt[0m


Epoch 5/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 07:55:18[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=5 step=24060[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024450593260260094, 'time_algorithm_update': 0.01682943819169689, 'loss': -93.35295606154952, 'time_step': 0.017147266854867275, 'observation_error': 1.04354243958213, 'reward_error': 0.001972698804024689, 'variance': 0.60232378797743}[0m [36mstep[0m=[35m24060[0m
[2m2023-10-22 07:55:18[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_24060.pt[0m


Epoch 6/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 07:57:31[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=6 step=28872[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002478630168182297, 'time_algorithm_update': 0.016924269875187924, 'loss': -97.68625746621555, 'time_step': 0.01724415026598936, 'observation_error': 1.019286733419714, 'reward_error': 0.0014572387667025762, 'variance': 0.5836459575934486}[0m [36mstep[0m=[35m28872[0m
[2m2023-10-22 07:57:31[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_28872.pt[0m


Epoch 7/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 07:59:44[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=7 step=33684[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024901923792419685, 'time_algorithm_update': 0.016843829319463207, 'loss': -100.81295606560838, 'time_step': 0.017164706044264465, 'observation_error': 0.9980447450306479, 'reward_error': 0.0010856939542870454, 'variance': 0.5483988695603254}[0m [36mstep[0m=[35m33684[0m
[2m2023-10-22 07:59:44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_33684.pt[0m


Epoch 8/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 08:01:57[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=8 step=38496[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000247359424457883, 'time_algorithm_update': 0.016825420105348304, 'loss': -103.44854932612215, 'time_step': 0.017143164241898583, 'observation_error': 1.005965581848591, 'reward_error': 0.000829038704856126, 'variance': 0.5605371428963052}[0m [36mstep[0m=[35m38496[0m
[2m2023-10-22 08:01:57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_38496.pt[0m


Epoch 9/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 08:04:10[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=9 step=43308[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024931334696109357, 'time_algorithm_update': 0.01687977324895629, 'loss': -105.69405490719866, 'time_step': 0.017200706209228717, 'observation_error': 0.99807523645757, 'reward_error': 0.0006223694224763354, 'variance': 0.5680732683213072}[0m [36mstep[0m=[35m43308[0m
[2m2023-10-22 08:04:10[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_43308.pt[0m


Epoch 10/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 08:06:24[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=10 step=48120[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000248659231815354, 'time_algorithm_update': 0.01695465476732896, 'loss': -107.63972355183817, 'time_step': 0.01727397800581909, 'observation_error': 0.9864465213016652, 'reward_error': 0.0005403173696890193, 'variance': 0.563088072021531}[0m [36mstep[0m=[35m48120[0m
[2m2023-10-22 08:06:24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_48120.pt[0m


Epoch 11/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 08:08:37[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=11 step=52932[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024616401193544254, 'time_algorithm_update': 0.016871659932092935, 'loss': -109.58649196926, 'time_step': 0.017188476604515578, 'observation_error': 0.9738140219351088, 'reward_error': 0.00046934669974420457, 'variance': 0.5535899227164227}[0m [36mstep[0m=[35m52932[0m
[2m2023-10-22 08:08:37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_52932.pt[0m


Epoch 12/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 08:10:50[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=12 step=57744[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025053437511225296, 'time_algorithm_update': 0.01679441325385077, 'loss': -111.08800513330144, 'time_step': 0.0171163110364405, 'observation_error': 0.9588901130924531, 'reward_error': 0.0003965710663196082, 'variance': 0.5493611417253277}[0m [36mstep[0m=[35m57744[0m
[2m2023-10-22 08:10:50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_57744.pt[0m


Epoch 13/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 08:13:04[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=13 step=62556[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024693238170366927, 'time_algorithm_update': 0.01686215881298506, 'loss': -112.8640409709014, 'time_step': 0.01717964491047467, 'observation_error': 0.9342597114748585, 'reward_error': 0.0003385086325187577, 'variance': 0.5356431958544924}[0m [36mstep[0m=[35m62556[0m
[2m2023-10-22 08:13:04[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_62556.pt[0m


Epoch 14/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 08:15:17[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=14 step=67368[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024296884623945303, 'time_algorithm_update': 0.016915384125729353, 'loss': -114.17438243888164, 'time_step': 0.017228535880471702, 'observation_error': 0.9144418257292953, 'reward_error': 0.00031983295579499457, 'variance': 0.5120079656871178}[0m [36mstep[0m=[35m67368[0m
[2m2023-10-22 08:15:18[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_67368.pt[0m


Epoch 15/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 08:17:31[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=15 step=72180[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000248807970920604, 'time_algorithm_update': 0.016825709755184843, 'loss': -115.31566102050883, 'time_step': 0.01714538403183643, 'observation_error': 0.8812208854293052, 'reward_error': 0.00028248470056108565, 'variance': 0.49414025842437465}[0m [36mstep[0m=[35m72180[0m
[2m2023-10-22 08:17:31[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_72180.pt[0m


Epoch 16/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 08:19:44[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=16 step=76992[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002479710285602167, 'time_algorithm_update': 0.016932871127663705, 'loss': -116.5779418834328, 'time_step': 0.017251091754545495, 'observation_error': 0.8681150590847385, 'reward_error': 0.00026849939210922145, 'variance': 0.48811016925741524}[0m [36mstep[0m=[35m76992[0m
[2m2023-10-22 08:19:44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_76992.pt[0m


Epoch 17/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 08:21:57[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=17 step=81804[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024626905087719137, 'time_algorithm_update': 0.016814935385734957, 'loss': -117.79782648554269, 'time_step': 0.017132507298058108, 'observation_error': 0.8416138343639956, 'reward_error': 0.0002590514041610747, 'variance': 0.46950515702772777}[0m [36mstep[0m=[35m81804[0m
[2m2023-10-22 08:21:57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_81804.pt[0m


Epoch 18/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 08:24:10[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=18 step=86616[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024232295584103908, 'time_algorithm_update': 0.016907531226166863, 'loss': -118.90615900099921, 'time_step': 0.017220324659387172, 'observation_error': 0.8547235349244251, 'reward_error': 0.00021893587761733036, 'variance': 0.49072328288595574}[0m [36mstep[0m=[35m86616[0m
[2m2023-10-22 08:24:10[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_86616.pt[0m


Epoch 19/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 08:26:23[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=19 step=91428[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024437314752529584, 'time_algorithm_update': 0.016894861052458424, 'loss': -119.99973401286061, 'time_step': 0.017210443963235553, 'observation_error': 0.836641498144563, 'reward_error': 0.00021091275391978197, 'variance': 0.48984463331177824}[0m [36mstep[0m=[35m91428[0m
[2m2023-10-22 08:26:23[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_91428.pt[0m


Epoch 20/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 08:28:37[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=20 step=96240[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002455914010628996, 'time_algorithm_update': 0.016953364671118937, 'loss': -120.83753698147642, 'time_step': 0.017269552645837873, 'observation_error': 0.8184735498399247, 'reward_error': 0.00020543877270563337, 'variance': 0.48613993001451633}[0m [36mstep[0m=[35m96240[0m
[2m2023-10-22 08:28:37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_96240.pt[0m


Epoch 21/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 08:30:51[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=21 step=101052[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024593143986347606, 'time_algorithm_update': 0.01696260254876572, 'loss': -122.17532650669317, 'time_step': 0.01727950781063546, 'observation_error': 0.8025590097012074, 'reward_error': 0.00018716089704574787, 'variance': 0.4820131982692303}[0m [36mstep[0m=[35m101052[0m
[2m2023-10-22 08:30:51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_101052.pt[0m


Epoch 22/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 08:33:05[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=22 step=105864[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025053809111255063, 'time_algorithm_update': 0.01686061315504789, 'loss': -122.65006148844883, 'time_step': 0.017181491960809318, 'observation_error': 0.7971400866635266, 'reward_error': 0.00016873775805093276, 'variance': 0.4767710254592132}[0m [36mstep[0m=[35m105864[0m
[2m2023-10-22 08:33:05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_105864.pt[0m


Epoch 23/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 08:35:18[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=23 step=110676[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002422436811680211, 'time_algorithm_update': 0.016868511439360686, 'loss': -123.84070122053697, 'time_step': 0.017181086916776864, 'observation_error': 0.7614485498804512, 'reward_error': 0.0001621674722209316, 'variance': 0.4576178368013084}[0m [36mstep[0m=[35m110676[0m
[2m2023-10-22 08:35:18[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_110676.pt[0m


Epoch 24/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 08:37:30[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=24 step=115488[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025322976354153474, 'time_algorithm_update': 0.016639138992290546, 'loss': -124.7926202498172, 'time_step': 0.016962067097896154, 'observation_error': 0.7603261931341474, 'reward_error': 0.00015401966943848002, 'variance': 0.46926030902794996}[0m [36mstep[0m=[35m115488[0m
[2m2023-10-22 08:37:30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_115488.pt[0m


Epoch 25/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 08:39:42[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=25 step=120300[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024381906710757085, 'time_algorithm_update': 0.016830160631701438, 'loss': -126.0029590698648, 'time_step': 0.017144923396439523, 'observation_error': 0.7487565645154232, 'reward_error': 0.00014914784017478687, 'variance': 0.4711684228423568}[0m [36mstep[0m=[35m120300[0m
[2m2023-10-22 08:39:42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_120300.pt[0m


Epoch 26/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 08:41:56[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=26 step=125112[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002472592410898565, 'time_algorithm_update': 0.01690245898485382, 'loss': -127.10876762203048, 'time_step': 0.01722105790056592, 'observation_error': 0.7366789386653767, 'reward_error': 0.000129494922713517, 'variance': 0.48217170674949056}[0m [36mstep[0m=[35m125112[0m
[2m2023-10-22 08:41:56[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_125112.pt[0m


Epoch 27/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 08:44:09[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=27 step=129924[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002470325155250251, 'time_algorithm_update': 0.016824905612720417, 'loss': -127.89878027417316, 'time_step': 0.01714413248293616, 'observation_error': 0.7081624490561232, 'reward_error': 0.00012380929732243258, 'variance': 0.4695714348731017}[0m [36mstep[0m=[35m129924[0m
[2m2023-10-22 08:44:09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_129924.pt[0m


Epoch 28/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 08:46:22[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=28 step=134736[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002525398261529252, 'time_algorithm_update': 0.01673599237813022, 'loss': -129.43566351420466, 'time_step': 0.017059689398517434, 'observation_error': 0.6889511198695394, 'reward_error': 0.00011142618508633214, 'variance': 0.48512209304674303}[0m [36mstep[0m=[35m134736[0m
[2m2023-10-22 08:46:22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_134736.pt[0m


Epoch 29/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 08:48:36[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=29 step=139548[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002457165064062561, 'time_algorithm_update': 0.016885496087601457, 'loss': -130.7909097342519, 'time_step': 0.017202298688769636, 'observation_error': 0.6929191283571684, 'reward_error': 0.00011458267049649591, 'variance': 0.4952626898680406}[0m [36mstep[0m=[35m139548[0m
[2m2023-10-22 08:48:36[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_139548.pt[0m


Epoch 30/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 08:50:49[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=30 step=144360[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024719884369835094, 'time_algorithm_update': 0.01682415656615374, 'loss': -131.41904557434995, 'time_step': 0.01714137278292839, 'observation_error': 0.6511356001649615, 'reward_error': 0.00011013579518956348, 'variance': 0.485836001027339}[0m [36mstep[0m=[35m144360[0m
[2m2023-10-22 08:50:49[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_144360.pt[0m


Epoch 31/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 08:53:03[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=31 step=149172[0m [36mepoch[0m=[35m31[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024882159625502895, 'time_algorithm_update': 0.016981230808711507, 'loss': -132.4105226678444, 'time_step': 0.01730180554655524, 'observation_error': 0.6412319926060635, 'reward_error': 9.299249671439216e-05, 'variance': 0.4933451272014961}[0m [36mstep[0m=[35m149172[0m
[2m2023-10-22 08:53:03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_149172.pt[0m


Epoch 32/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 08:55:16[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=32 step=153984[0m [36mepoch[0m=[35m32[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024729134733242884, 'time_algorithm_update': 0.01684975906500495, 'loss': -133.63261968496928, 'time_step': 0.017167916123508317, 'observation_error': 0.6269607308101802, 'reward_error': 8.806995344098668e-05, 'variance': 0.5103105888056303}[0m [36mstep[0m=[35m153984[0m
[2m2023-10-22 08:55:16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_153984.pt[0m


Epoch 33/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 08:57:30[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=33 step=158796[0m [36mepoch[0m=[35m33[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024769282400459425, 'time_algorithm_update': 0.016851830016744205, 'loss': -134.5300992797636, 'time_step': 0.017171045590319043, 'observation_error': 0.5999613624612657, 'reward_error': 8.65338767207446e-05, 'variance': 0.49563152029804736}[0m [36mstep[0m=[35m158796[0m
[2m2023-10-22 08:57:30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_158796.pt[0m


Epoch 34/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 08:59:44[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=34 step=163608[0m [36mepoch[0m=[35m34[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002469022077812518, 'time_algorithm_update': 0.01687897688531915, 'loss': -135.77916736436308, 'time_step': 0.017197040548348367, 'observation_error': 0.5791180913630227, 'reward_error': 7.658324618785878e-05, 'variance': 0.49693227987138455}[0m [36mstep[0m=[35m163608[0m
[2m2023-10-22 08:59:44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_163608.pt[0m


Epoch 35/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 09:01:57[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=35 step=168420[0m [36mepoch[0m=[35m35[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024438216501935166, 'time_algorithm_update': 0.01692298681460513, 'loss': -136.9271265951079, 'time_step': 0.017238639338454504, 'observation_error': 0.5525570078690855, 'reward_error': 8.46871531600145e-05, 'variance': 0.4934790639149753}[0m [36mstep[0m=[35m168420[0m
[2m2023-10-22 09:01:57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_168420.pt[0m


Epoch 36/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 09:04:11[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=36 step=173232[0m [36mepoch[0m=[35m36[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000248557760233891, 'time_algorithm_update': 0.016874942250382573, 'loss': -138.3408361123387, 'time_step': 0.017194363640827232, 'observation_error': 0.5352207957050629, 'reward_error': 7.504796768485644e-05, 'variance': 0.4990252405759436}[0m [36mstep[0m=[35m173232[0m
[2m2023-10-22 09:04:11[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_173232.pt[0m


Epoch 37/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 09:06:25[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=37 step=178044[0m [36mepoch[0m=[35m37[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024245514635829656, 'time_algorithm_update': 0.016964403966616713, 'loss': -139.12770746711485, 'time_step': 0.017277040336891106, 'observation_error': 0.520348100564314, 'reward_error': 7.0676552381741e-05, 'variance': 0.5027779887292713}[0m [36mstep[0m=[35m178044[0m
[2m2023-10-22 09:06:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_178044.pt[0m


Epoch 38/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 09:08:38[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=38 step=182856[0m [36mepoch[0m=[35m38[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002487912241459289, 'time_algorithm_update': 0.016863320434678124, 'loss': -140.5679227574508, 'time_step': 0.017181923165483864, 'observation_error': 0.4976973369057922, 'reward_error': 6.520600844394761e-05, 'variance': 0.49655180456184494}[0m [36mstep[0m=[35m182856[0m
[2m2023-10-22 09:08:38[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_182856.pt[0m


Epoch 39/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 09:10:51[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=39 step=187668[0m [36mepoch[0m=[35m39[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024760676143769907, 'time_algorithm_update': 0.016806114046948213, 'loss': -141.14001376016478, 'time_step': 0.01712481250192162, 'observation_error': 0.4816964357702497, 'reward_error': 6.084800982490037e-05, 'variance': 0.4877205844497816}[0m [36mstep[0m=[35m187668[0m
[2m2023-10-22 09:10:51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_187668.pt[0m


Epoch 40/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 09:13:03[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=40 step=192480[0m [36mepoch[0m=[35m40[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002491570767619069, 'time_algorithm_update': 0.016746856972067334, 'loss': -142.04291997546468, 'time_step': 0.017066366406945716, 'observation_error': 0.4623624609405195, 'reward_error': 5.864704096773269e-05, 'variance': 0.46813554699822346}[0m [36mstep[0m=[35m192480[0m
[2m2023-10-22 09:13:03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_192480.pt[0m


Epoch 41/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 09:15:16[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=41 step=197292[0m [36mepoch[0m=[35m41[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002465847622624855, 'time_algorithm_update': 0.016858680884439748, 'loss': -142.79288722789, 'time_step': 0.01717604034063921, 'observation_error': 0.4280764759970507, 'reward_error': 5.205495068558091e-05, 'variance': 0.4491211305009491}[0m [36mstep[0m=[35m197292[0m
[2m2023-10-22 09:15:16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_197292.pt[0m


Epoch 42/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 09:17:29[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=42 step=202104[0m [36mepoch[0m=[35m42[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000248867080098673, 'time_algorithm_update': 0.016773794803238866, 'loss': -144.42533097897385, 'time_step': 0.017093027024495038, 'observation_error': 0.41522404003966334, 'reward_error': 4.9941790664467514e-05, 'variance': 0.4381030606456019}[0m [36mstep[0m=[35m202104[0m
[2m2023-10-22 09:17:29[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_202104.pt[0m


Epoch 43/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 09:19:42[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=43 step=206916[0m [36mepoch[0m=[35m43[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024978230619866553, 'time_algorithm_update': 0.01676602469616301, 'loss': -145.14897765938878, 'time_step': 0.01708694299062093, 'observation_error': 0.4160881767703723, 'reward_error': 4.8302558975684e-05, 'variance': 0.44340405958671497}[0m [36mstep[0m=[35m206916[0m
[2m2023-10-22 09:19:42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_206916.pt[0m


Epoch 44/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 09:21:56[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=44 step=211728[0m [36mepoch[0m=[35m44[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024996691709345615, 'time_algorithm_update': 0.016882282094170625, 'loss': -145.7051375314581, 'time_step': 0.017204264898847167, 'observation_error': 0.4255599195592787, 'reward_error': 5.018819500190406e-05, 'variance': 0.44205784063316456}[0m [36mstep[0m=[35m211728[0m
[2m2023-10-22 09:21:56[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_211728.pt[0m


Epoch 45/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 09:24:08[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=45 step=216540[0m [36mepoch[0m=[35m45[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024904752907313016, 'time_algorithm_update': 0.016734274694152604, 'loss': -147.10027029389454, 'time_step': 0.017053630732538693, 'observation_error': 0.42274598978981254, 'reward_error': 4.490399940250123e-05, 'variance': 0.44957644680447467}[0m [36mstep[0m=[35m216540[0m
[2m2023-10-22 09:24:08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_216540.pt[0m


Epoch 46/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 09:26:21[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=46 step=221352[0m [36mepoch[0m=[35m46[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024784299996329263, 'time_algorithm_update': 0.016827981717766866, 'loss': -147.83393356469108, 'time_step': 0.01714630465852352, 'observation_error': 0.4189538092868311, 'reward_error': 4.3940943750501985e-05, 'variance': 0.44932708819073697}[0m [36mstep[0m=[35m221352[0m
[2m2023-10-22 09:26:21[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_221352.pt[0m


Epoch 47/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 09:28:34[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=47 step=226164[0m [36mepoch[0m=[35m47[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024700694944297685, 'time_algorithm_update': 0.016822171330154685, 'loss': -148.2774840055261, 'time_step': 0.017140682449166417, 'observation_error': 0.4194474503652224, 'reward_error': 4.523864616980289e-05, 'variance': 0.4571198822438464}[0m [36mstep[0m=[35m226164[0m
[2m2023-10-22 09:28:34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_226164.pt[0m


Epoch 48/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 09:30:48[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=48 step=230976[0m [36mepoch[0m=[35m48[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002467559955562043, 'time_algorithm_update': 0.016911264171425938, 'loss': -149.2732472800257, 'time_step': 0.017229104329423914, 'observation_error': 0.42494992464427167, 'reward_error': 3.463290259730848e-05, 'variance': 0.4792468928627878}[0m [36mstep[0m=[35m230976[0m
[2m2023-10-22 09:30:48[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_230976.pt[0m


Epoch 49/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 09:33:01[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=49 step=235788[0m [36mepoch[0m=[35m49[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024788427233993264, 'time_algorithm_update': 0.016835064265694304, 'loss': -149.80695074712446, 'time_step': 0.017154881286303995, 'observation_error': 0.4163494322422842, 'reward_error': 3.6728083184466034e-05, 'variance': 0.4553277367383537}[0m [36mstep[0m=[35m235788[0m
[2m2023-10-22 09:33:01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_235788.pt[0m


Epoch 50/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 09:35:14[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=50 step=240600[0m [36mepoch[0m=[35m50[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002466738471762894, 'time_algorithm_update': 0.016871966725077514, 'loss': -151.00918255502347, 'time_step': 0.017189698425413465, 'observation_error': 0.4136503741343002, 'reward_error': 3.5822789771075907e-05, 'variance': 0.46432220132309787}[0m [36mstep[0m=[35m240600[0m
[2m2023-10-22 09:35:14[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_240600.pt[0m


Epoch 51/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 09:37:28[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=51 step=245412[0m [36mepoch[0m=[35m51[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002488325460692395, 'time_algorithm_update': 0.016929084721547014, 'loss': -150.9968788324548, 'time_step': 0.01725100638563199, 'observation_error': 0.4093913569381997, 'reward_error': 3.555699706693886e-05, 'variance': 0.46052308451161594}[0m [36mstep[0m=[35m245412[0m
[2m2023-10-22 09:37:28[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_245412.pt[0m


Epoch 52/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 09:39:41[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=52 step=250224[0m [36mepoch[0m=[35m52[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025096235925320875, 'time_algorithm_update': 0.016747387864643203, 'loss': -152.2539009179855, 'time_step': 0.017069292385580137, 'observation_error': 0.41604776834715096, 'reward_error': 3.0006530610639953e-05, 'variance': 0.45885321736395207}[0m [36mstep[0m=[35m250224[0m
[2m2023-10-22 09:39:41[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_250224.pt[0m


Epoch 53/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 09:41:55[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=53 step=255036[0m [36mepoch[0m=[35m53[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024726637581042813, 'time_algorithm_update': 0.016904467854614764, 'loss': -152.5931156008618, 'time_step': 0.017223612279170568, 'observation_error': 0.4088063342204885, 'reward_error': 2.993248323720522e-05, 'variance': 0.45472482972738376}[0m [36mstep[0m=[35m255036[0m
[2m2023-10-22 09:41:55[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_255036.pt[0m


Epoch 54/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 09:44:09[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=54 step=259848[0m [36mepoch[0m=[35m54[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002481032686241448, 'time_algorithm_update': 0.016870692236068737, 'loss': -153.14394695602252, 'time_step': 0.017190213611694743, 'observation_error': 0.40410388041541356, 'reward_error': 2.920722233679369e-05, 'variance': 0.45725925877661516}[0m [36mstep[0m=[35m259848[0m
[2m2023-10-22 09:44:09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_259848.pt[0m


Epoch 55/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 09:46:22[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=55 step=264660[0m [36mepoch[0m=[35m55[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000244913453968584, 'time_algorithm_update': 0.01684174221550933, 'loss': -152.98669948165653, 'time_step': 0.01715787043694348, 'observation_error': 0.3884947357283675, 'reward_error': 3.2372047710459804e-05, 'variance': 0.4409527833732195}[0m [36mstep[0m=[35m264660[0m
[2m2023-10-22 09:46:22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_264660.pt[0m


Epoch 56/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 09:48:35[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=56 step=269472[0m [36mepoch[0m=[35m56[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024657480338168757, 'time_algorithm_update': 0.01691007594317074, 'loss': -153.97540177133612, 'time_step': 0.017228238699541227, 'observation_error': 0.38107191596116585, 'reward_error': 2.6059779855088356e-05, 'variance': 0.44327858306841894}[0m [36mstep[0m=[35m269472[0m
[2m2023-10-22 09:48:35[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_269472.pt[0m


Epoch 57/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 09:50:48[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=57 step=274284[0m [36mepoch[0m=[35m57[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025039331574095157, 'time_algorithm_update': 0.01685061002907313, 'loss': -154.07266352043882, 'time_step': 0.017172980387807388, 'observation_error': 0.3770459351294618, 'reward_error': 2.5918508649086173e-05, 'variance': 0.4360223721380731}[0m [36mstep[0m=[35m274284[0m
[2m2023-10-22 09:50:48[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_274284.pt[0m


Epoch 58/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 09:53:01[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=58 step=279096[0m [36mepoch[0m=[35m58[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025139569443459323, 'time_algorithm_update': 0.01680803343542199, 'loss': -154.07489477389075, 'time_step': 0.01713142569423812, 'observation_error': 0.37888896081954815, 'reward_error': 2.497516104634554e-05, 'variance': 0.4400137434683642}[0m [36mstep[0m=[35m279096[0m
[2m2023-10-22 09:53:01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_279096.pt[0m


Epoch 59/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 09:55:14[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=59 step=283908[0m [36mepoch[0m=[35m59[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024787461073915855, 'time_algorithm_update': 0.01686836780356251, 'loss': -154.25114446903206, 'time_step': 0.0171887823074734, 'observation_error': 0.3674113767885988, 'reward_error': 2.775257207591646e-05, 'variance': 0.43425224757035763}[0m [36mstep[0m=[35m283908[0m
[2m2023-10-22 09:55:14[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_283908.pt[0m


Epoch 60/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 09:57:27[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=60 step=288720[0m [36mepoch[0m=[35m60[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000246138346760053, 'time_algorithm_update': 0.016863204792748863, 'loss': -154.56918411817733, 'time_step': 0.01718113165742045, 'observation_error': 0.36777019125165356, 'reward_error': 2.504775504454175e-05, 'variance': 0.43443562765572946}[0m [36mstep[0m=[35m288720[0m
[2m2023-10-22 09:57:27[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_288720.pt[0m


Epoch 61/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 09:59:40[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=61 step=293532[0m [36mepoch[0m=[35m61[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025429769248042815, 'time_algorithm_update': 0.016715590793295692, 'loss': -155.34283918553555, 'time_step': 0.01704137922819713, 'observation_error': 0.3600691472997211, 'reward_error': 2.5305201014540295e-05, 'variance': 0.42217175071364305}[0m [36mstep[0m=[35m293532[0m
[2m2023-10-22 09:59:40[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_293532.pt[0m


Epoch 62/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 10:01:53[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=62 step=298344[0m [36mepoch[0m=[35m62[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002497819098253004, 'time_algorithm_update': 0.016830916069788628, 'loss': -155.0252802667277, 'time_step': 0.017152748302133106, 'observation_error': 0.3558402405758255, 'reward_error': 2.1742536508299758e-05, 'variance': 0.43326020828476625}[0m [36mstep[0m=[35m298344[0m
[2m2023-10-22 10:01:53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_298344.pt[0m


Epoch 63/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 10:04:07[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=63 step=303156[0m [36mepoch[0m=[35m63[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002473237013083543, 'time_algorithm_update': 0.01691861025809449, 'loss': -156.051852457343, 'time_step': 0.01723888831047445, 'observation_error': 0.3450320009423022, 'reward_error': 2.1499110897488023e-05, 'variance': 0.41805872771319763}[0m [36mstep[0m=[35m303156[0m
[2m2023-10-22 10:04:07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_303156.pt[0m


Epoch 64/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 10:06:20[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=64 step=307968[0m [36mepoch[0m=[35m64[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002483224135483689, 'time_algorithm_update': 0.016808743340118865, 'loss': -157.09788865004592, 'time_step': 0.017129102203118633, 'observation_error': 0.3381803488223716, 'reward_error': 2.2483886715166463e-05, 'variance': 0.4181520622215371}[0m [36mstep[0m=[35m307968[0m
[2m2023-10-22 10:06:20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_307968.pt[0m


Epoch 65/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 10:08:34[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=65 step=312780[0m [36mepoch[0m=[35m65[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025210976105180267, 'time_algorithm_update': 0.016787279375572554, 'loss': -157.42065039497558, 'time_step': 0.017112335163855297, 'observation_error': 0.33768346917547903, 'reward_error': 2.429705248147334e-05, 'variance': 0.4275669029796854}[0m [36mstep[0m=[35m312780[0m
[2m2023-10-22 10:08:34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_312780.pt[0m


Epoch 66/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 10:10:47[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=66 step=317592[0m [36mepoch[0m=[35m66[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002510746320088704, 'time_algorithm_update': 0.01686664229121093, 'loss': -156.73382380399124, 'time_step': 0.017190754760431432, 'observation_error': 0.3238462913918291, 'reward_error': 2.0289332515582273e-05, 'variance': 0.4020944266235698}[0m [36mstep[0m=[35m317592[0m
[2m2023-10-22 10:10:47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_317592.pt[0m


Epoch 67/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 10:12:59[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=67 step=322404[0m [36mepoch[0m=[35m67[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002512333299949183, 'time_algorithm_update': 0.016755302548903974, 'loss': -158.05685488224427, 'time_step': 0.017078217376001854, 'observation_error': 0.31844132525980845, 'reward_error': 2.076037192435817e-05, 'variance': 0.41156914960043167}[0m [36mstep[0m=[35m322404[0m
[2m2023-10-22 10:12:59[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_322404.pt[0m


Epoch 68/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 10:15:12[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=68 step=327216[0m [36mepoch[0m=[35m68[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025146005555974973, 'time_algorithm_update': 0.016805641371710343, 'loss': -158.45574860481648, 'time_step': 0.01712925837422448, 'observation_error': 0.31503143824948576, 'reward_error': 2.0301556056348784e-05, 'variance': 0.4033727298468989}[0m [36mstep[0m=[35m327216[0m
[2m2023-10-22 10:15:12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_327216.pt[0m


Epoch 69/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 10:17:25[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=69 step=332028[0m [36mepoch[0m=[35m69[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002486612632288501, 'time_algorithm_update': 0.01693672090397214, 'loss': -159.02307239851154, 'time_step': 0.017258404942224744, 'observation_error': 0.30579897507424375, 'reward_error': 1.7331796654768708e-05, 'variance': 0.40049631883950576}[0m [36mstep[0m=[35m332028[0m
[2m2023-10-22 10:17:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_332028.pt[0m


Epoch 70/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 10:19:38[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=70 step=336840[0m [36mepoch[0m=[35m70[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002500999994507851, 'time_algorithm_update': 0.016768320292506928, 'loss': -159.65609780531176, 'time_step': 0.017090122697755682, 'observation_error': 0.2951414367596547, 'reward_error': 1.878028027550768e-05, 'variance': 0.39437319582574254}[0m [36mstep[0m=[35m336840[0m
[2m2023-10-22 10:19:38[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_336840.pt[0m


Epoch 71/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 10:21:51[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=71 step=341652[0m [36mepoch[0m=[35m71[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025165482352202073, 'time_algorithm_update': 0.016778199997725132, 'loss': -159.35376698516947, 'time_step': 0.017102180177335034, 'observation_error': 0.29774581684193197, 'reward_error': 1.714206611363994e-05, 'variance': 0.39970481450375167}[0m [36mstep[0m=[35m341652[0m
[2m2023-10-22 10:21:51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_341652.pt[0m


Epoch 72/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 10:24:05[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=72 step=346464[0m [36mepoch[0m=[35m72[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002463431726964632, 'time_algorithm_update': 0.0169710278114674, 'loss': -160.66650912648723, 'time_step': 0.017290231147014588, 'observation_error': 0.2985725272473763, 'reward_error': 1.6818095837744642e-05, 'variance': 0.39305785557347}[0m [36mstep[0m=[35m346464[0m
[2m2023-10-22 10:24:05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_346464.pt[0m


Epoch 73/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 10:26:25[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=73 step=351276[0m [36mepoch[0m=[35m73[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002526992673390326, 'time_algorithm_update': 0.017153214784037144, 'loss': -159.91486952409878, 'time_step': 0.01747929717933547, 'observation_error': 0.29599031978452833, 'reward_error': 2.1565811258398764e-05, 'variance': 0.39773679315414145}[0m [36mstep[0m=[35m351276[0m
[2m2023-10-22 10:26:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_351276.pt[0m


Epoch 74/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 10:28:43[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=74 step=356088[0m [36mepoch[0m=[35m74[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002481675802026306, 'time_algorithm_update': 0.01769920413135392, 'loss': -160.68542159564873, 'time_step': 0.01802091021787496, 'observation_error': 0.2892673241744147, 'reward_error': 1.6778787809844993e-05, 'variance': 0.3881871236280169}[0m [36mstep[0m=[35m356088[0m
[2m2023-10-22 10:28:43[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_356088.pt[0m


Epoch 75/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 10:31:00[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=75 step=360900[0m [36mepoch[0m=[35m75[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024429243599882945, 'time_algorithm_update': 0.017511944917471132, 'loss': -161.6404068579004, 'time_step': 0.01782902839871515, 'observation_error': 0.28576137250096123, 'reward_error': 1.5979896831748565e-05, 'variance': 0.3899734777548896}[0m [36mstep[0m=[35m360900[0m
[2m2023-10-22 10:31:00[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_360900.pt[0m


Epoch 76/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 10:33:17[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=76 step=365712[0m [36mepoch[0m=[35m76[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002469710281067656, 'time_algorithm_update': 0.017445553815672026, 'loss': -161.54813944768233, 'time_step': 0.01776569274100084, 'observation_error': 0.28074721298204874, 'reward_error': 1.4372578776481942e-05, 'variance': 0.38418451983493085}[0m [36mstep[0m=[35m365712[0m
[2m2023-10-22 10:33:17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_365712.pt[0m


Epoch 77/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 10:35:32[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=77 step=370524[0m [36mepoch[0m=[35m77[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002497500017694107, 'time_algorithm_update': 0.017021036752540672, 'loss': -161.4213896788663, 'time_step': 0.01734508233375581, 'observation_error': 0.27748475112326854, 'reward_error': 1.5581907537215424e-05, 'variance': 0.3819006038786808}[0m [36mstep[0m=[35m370524[0m
[2m2023-10-22 10:35:32[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_370524.pt[0m


Epoch 78/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 10:37:46[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=78 step=375336[0m [36mepoch[0m=[35m78[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002480613521407865, 'time_algorithm_update': 0.016956909586762946, 'loss': -162.75049005779542, 'time_step': 0.017278059710092775, 'observation_error': 0.2675259301626273, 'reward_error': 1.5775871652971966e-05, 'variance': 0.37597466741826324}[0m [36mstep[0m=[35m375336[0m
[2m2023-10-22 10:37:46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_375336.pt[0m


Epoch 79/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 10:40:02[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=79 step=380148[0m [36mepoch[0m=[35m79[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002482148477264176, 'time_algorithm_update': 0.01743762327647665, 'loss': -162.32336574183438, 'time_step': 0.01775853934133439, 'observation_error': 0.2616878452884609, 'reward_error': 1.6292179994858773e-05, 'variance': 0.37306213955836953}[0m [36mstep[0m=[35m380148[0m
[2m2023-10-22 10:40:02[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_380148.pt[0m


Epoch 80/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 10:42:18[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=80 step=384960[0m [36mepoch[0m=[35m80[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002539865889355032, 'time_algorithm_update': 0.017367615267721098, 'loss': -162.9557030325816, 'time_step': 0.01769397685850846, 'observation_error': 0.2605608592196139, 'reward_error': 1.4334670533729016e-05, 'variance': 0.3616340528070436}[0m [36mstep[0m=[35m384960[0m
[2m2023-10-22 10:42:18[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_384960.pt[0m


Epoch 81/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 10:44:33[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=81 step=389772[0m [36mepoch[0m=[35m81[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025571135808702124, 'time_algorithm_update': 0.0171758758456927, 'loss': -162.99095073542986, 'time_step': 0.01750489457487961, 'observation_error': 0.25623952390366866, 'reward_error': 1.2794277289874762e-05, 'variance': 0.3662410342081121}[0m [36mstep[0m=[35m389772[0m
[2m2023-10-22 10:44:33[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_389772.pt[0m


Epoch 82/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 10:46:49[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=82 step=394584[0m [36mepoch[0m=[35m82[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002549955573363395, 'time_algorithm_update': 0.01728715890660845, 'loss': -164.57901488436528, 'time_step': 0.017615203696890663, 'observation_error': 0.24748803194014565, 'reward_error': 1.4532175613418458e-05, 'variance': 0.360961536241397}[0m [36mstep[0m=[35m394584[0m
[2m2023-10-22 10:46:49[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_394584.pt[0m


Epoch 83/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 10:49:03[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=83 step=399396[0m [36mepoch[0m=[35m83[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025719448812584625, 'time_algorithm_update': 0.017138056525169365, 'loss': -164.7114181169746, 'time_step': 0.01746794360930585, 'observation_error': 0.24857193892284385, 'reward_error': 1.3697769732732419e-05, 'variance': 0.3541587291026511}[0m [36mstep[0m=[35m399396[0m
[2m2023-10-22 10:49:03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_399396.pt[0m


Epoch 84/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 10:51:18[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=84 step=404208[0m [36mepoch[0m=[35m84[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025702067840525434, 'time_algorithm_update': 0.01720772518877773, 'loss': -164.65383882950667, 'time_step': 0.01753696504475569, 'observation_error': 0.25435835213821145, 'reward_error': 1.2798873546340806e-05, 'variance': 0.3633678834502315}[0m [36mstep[0m=[35m404208[0m
[2m2023-10-22 10:51:18[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_404208.pt[0m


Epoch 85/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 10:53:33[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=85 step=409020[0m [36mepoch[0m=[35m85[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025493243487794895, 'time_algorithm_update': 0.017226690762457208, 'loss': -163.69614862741676, 'time_step': 0.01755461857305005, 'observation_error': 0.2547647296214842, 'reward_error': 1.2592734490236814e-05, 'variance': 0.3542426010951211}[0m [36mstep[0m=[35m409020[0m
[2m2023-10-22 10:53:33[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_409020.pt[0m


Epoch 86/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 10:55:48[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=86 step=413832[0m [36mepoch[0m=[35m86[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002571454369219164, 'time_algorithm_update': 0.01709879004549009, 'loss': -165.46343817913026, 'time_step': 0.01742850490539944, 'observation_error': 0.25386559471060033, 'reward_error': 1.4365468975188369e-05, 'variance': 0.35947175536707177}[0m [36mstep[0m=[35m413832[0m
[2m2023-10-22 10:55:48[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_413832.pt[0m


Epoch 87/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 10:58:02[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=87 step=418644[0m [36mepoch[0m=[35m87[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002511582172422338, 'time_algorithm_update': 0.01721622516785873, 'loss': -166.17752797783956, 'time_step': 0.01753927406824735, 'observation_error': 0.24393312600321804, 'reward_error': 1.3301922575770428e-05, 'variance': 0.35115945740921306}[0m [36mstep[0m=[35m418644[0m
[2m2023-10-22 10:58:02[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_418644.pt[0m


Epoch 88/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 11:00:17[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=88 step=423456[0m [36mepoch[0m=[35m88[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002553265290961896, 'time_algorithm_update': 0.017267257891787357, 'loss': -164.80934964986216, 'time_step': 0.017595625478628765, 'observation_error': 0.24709685155592312, 'reward_error': 1.1709749407378346e-05, 'variance': 0.34635630698674463}[0m [36mstep[0m=[35m423456[0m
[2m2023-10-22 11:00:17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_423456.pt[0m


Epoch 89/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 11:02:35[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=89 step=428268[0m [36mepoch[0m=[35m89[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025447383089454, 'time_algorithm_update': 0.01789828780682406, 'loss': -165.71719212861032, 'time_step': 0.01822780249818404, 'observation_error': 0.24914675284882778, 'reward_error': 1.1314802972931052e-05, 'variance': 0.3517118711838813}[0m [36mstep[0m=[35m428268[0m
[2m2023-10-22 11:02:35[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_428268.pt[0m


Epoch 90/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 11:04:49[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=90 step=433080[0m [36mepoch[0m=[35m90[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024788006087292857, 'time_algorithm_update': 0.01691559504590625, 'loss': -164.79172501401513, 'time_step': 0.017236233451221748, 'observation_error': 0.24105693613155554, 'reward_error': 1.112724002511039e-05, 'variance': 0.34839445914375783}[0m [36mstep[0m=[35m433080[0m
[2m2023-10-22 11:04:49[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_433080.pt[0m


Epoch 91/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 11:07:03[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=91 step=437892[0m [36mepoch[0m=[35m91[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002505772825290239, 'time_algorithm_update': 0.016946296095352618, 'loss': -165.4264953958919, 'time_step': 0.017269503545087272, 'observation_error': 0.24575503821300965, 'reward_error': 1.2246613098919316e-05, 'variance': 0.3530143974914295}[0m [36mstep[0m=[35m437892[0m
[2m2023-10-22 11:07:03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_437892.pt[0m


Epoch 92/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 11:09:17[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=92 step=442704[0m [36mepoch[0m=[35m92[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002466476865341935, 'time_algorithm_update': 0.016922628592176428, 'loss': -166.6710837670197, 'time_step': 0.017242616846079837, 'observation_error': 0.2361263892015613, 'reward_error': 1.1313094568035624e-05, 'variance': 0.34352376276339963}[0m [36mstep[0m=[35m442704[0m
[2m2023-10-22 11:09:17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_442704.pt[0m


Epoch 93/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 11:11:31[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=93 step=447516[0m [36mepoch[0m=[35m93[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002471082228377573, 'time_algorithm_update': 0.016951339203223326, 'loss': -166.99040593213076, 'time_step': 0.017270778083642722, 'observation_error': 0.2493571809286821, 'reward_error': 1.0274875631067544e-05, 'variance': 0.34892998116883717}[0m [36mstep[0m=[35m447516[0m
[2m2023-10-22 11:11:31[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_447516.pt[0m


Epoch 94/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 11:13:46[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=94 step=452328[0m [36mepoch[0m=[35m94[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002461792227633279, 'time_algorithm_update': 0.01701736861432679, 'loss': -167.43124424489656, 'time_step': 0.017336651720013703, 'observation_error': 0.24838780628710322, 'reward_error': 1.1776876177238525e-05, 'variance': 0.34033552030378855}[0m [36mstep[0m=[35m452328[0m
[2m2023-10-22 11:13:46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_452328.pt[0m


Epoch 95/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 11:16:01[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=95 step=457140[0m [36mepoch[0m=[35m95[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024594590749130185, 'time_algorithm_update': 0.01708261781400774, 'loss': -167.09306535062845, 'time_step': 0.017402118132298725, 'observation_error': 0.24480475400335788, 'reward_error': 9.656236377155487e-06, 'variance': 0.3376825931386394}[0m [36mstep[0m=[35m457140[0m
[2m2023-10-22 11:16:01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_457140.pt[0m


Epoch 96/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 11:18:15[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=96 step=461952[0m [36mepoch[0m=[35m96[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002481590581752812, 'time_algorithm_update': 0.01696350499182469, 'loss': -167.5770194409593, 'time_step': 0.017285116047831447, 'observation_error': 0.2452558181065145, 'reward_error': 1.1152646628411851e-05, 'variance': 0.3549176326299741}[0m [36mstep[0m=[35m461952[0m
[2m2023-10-22 11:18:15[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_461952.pt[0m


Epoch 97/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 11:20:30[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=97 step=466764[0m [36mepoch[0m=[35m97[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024724789490228083, 'time_algorithm_update': 0.016947674037809682, 'loss': -167.2605358644614, 'time_step': 0.01726912248164341, 'observation_error': 0.248743357340028, 'reward_error': 1.0028947668296585e-05, 'variance': 0.3487406936576154}[0m [36mstep[0m=[35m466764[0m
[2m2023-10-22 11:20:30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_466764.pt[0m


Epoch 98/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 11:22:44[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=98 step=471576[0m [36mepoch[0m=[35m98[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002515750533822964, 'time_algorithm_update': 0.017043903581519377, 'loss': -168.11558473179565, 'time_step': 0.01736836624660793, 'observation_error': 0.23578944135122942, 'reward_error': 9.853449081639096e-06, 'variance': 0.3359059152545422}[0m [36mstep[0m=[35m471576[0m
[2m2023-10-22 11:22:44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_471576.pt[0m


Epoch 99/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 11:24:58[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=99 step=476388[0m [36mepoch[0m=[35m99[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002503584844314943, 'time_algorithm_update': 0.01695719492603914, 'loss': -167.2169373287128, 'time_step': 0.01728015414695391, 'observation_error': 0.23192149514582097, 'reward_error': 9.13440218828678e-06, 'variance': 0.3379140165205819}[0m [36mstep[0m=[35m476388[0m
[2m2023-10-22 11:24:59[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_476388.pt[0m


Epoch 100/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 11:27:13[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022074412: epoch=100 step=481200[0m [36mepoch[0m=[35m100[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024574653168866166, 'time_algorithm_update': 0.016974883087456274, 'loss': -168.84471134969024, 'time_step': 0.017294180710317686, 'observation_error': 0.24173130546956934, 'reward_error': 9.641367714132785e-06, 'variance': 0.33933290979344466}[0m [36mstep[0m=[35m481200[0m
[2m2023-10-22 11:27:13[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412/model_481200.pt[0m
Using SymmetryEncoderFactory
[2m2023-10-22 11:27:13[0m [[32m[1mdebug    [0m] [1mRoundIterator is selected.[0m
[2m2023-10-22 11:27:13[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713[0m
[2m2023-10-22 11:27:13[0m [[32m[1mdebug    [0m] [1mBuilding models...[0m
[2m2023-

Epoch 1/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 11:29:32[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=1 step=4811[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002535658649832314, 'time_algorithm_update': 0.017193804260996072, 'loss': 4849.698843713665, 'time_step': 0.017536953862851732, 'observation_error': 1.894259482232975, 'reward_error': 0.06274056467126152, 'variance': 1.4076315579499694}[0m [36mstep[0m=[35m4811[0m
[2m2023-10-22 11:29:32[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_4811.pt[0m


Epoch 2/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 11:31:52[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=2 step=9622[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002533651592577354, 'time_algorithm_update': 0.01751811740750757, 'loss': -22.485643534371917, 'time_step': 0.01785745612798424, 'observation_error': 1.412731186361278, 'reward_error': 0.015106594142273914, 'variance': 1.0987107180277806}[0m [36mstep[0m=[35m9622[0m
[2m2023-10-22 11:31:52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_9622.pt[0m


Epoch 3/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 11:34:11[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=3 step=14433[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024877796795531236, 'time_algorithm_update': 0.017369521479427233, 'loss': -51.15408645581119, 'time_step': 0.017695723935834733, 'observation_error': 1.118732395206919, 'reward_error': 0.007548223859862571, 'variance': 0.7332281605465943}[0m [36mstep[0m=[35m14433[0m
[2m2023-10-22 11:34:11[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_14433.pt[0m


Epoch 4/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 11:36:30[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=4 step=19244[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024986524786083894, 'time_algorithm_update': 0.017358648878599843, 'loss': -69.27764670225915, 'time_step': 0.017684005595769985, 'observation_error': 0.9107608512761339, 'reward_error': 0.0055103441926292, 'variance': 0.5098964852728091}[0m [36mstep[0m=[35m19244[0m
[2m2023-10-22 11:36:30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_19244.pt[0m


Epoch 5/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 11:38:48[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=5 step=24055[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025370982797892665, 'time_algorithm_update': 0.017377188289470272, 'loss': -83.05154341160693, 'time_step': 0.017706239676926493, 'observation_error': 0.8868362638947933, 'reward_error': 0.0026155589368111335, 'variance': 0.5797443314634357}[0m [36mstep[0m=[35m24055[0m
[2m2023-10-22 11:38:48[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_24055.pt[0m


Epoch 6/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 11:41:08[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=6 step=28866[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024934608905088923, 'time_algorithm_update': 0.017387262428408972, 'loss': -89.17677315950047, 'time_step': 0.017710111959390398, 'observation_error': 0.7868605875101851, 'reward_error': 0.0014689207277512454, 'variance': 0.5086266281391252}[0m [36mstep[0m=[35m28866[0m
[2m2023-10-22 11:41:08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_28866.pt[0m


Epoch 7/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 11:43:28[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=7 step=33677[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002556589035798546, 'time_algorithm_update': 0.01748508560610422, 'loss': -92.85157443252528, 'time_step': 0.017815889327993415, 'observation_error': 0.6839329011341662, 'reward_error': 0.0012055659129966833, 'variance': 0.4273425340221453}[0m [36mstep[0m=[35m33677[0m
[2m2023-10-22 11:43:28[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_33677.pt[0m


Epoch 8/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 11:45:47[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=8 step=38488[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025067059816825515, 'time_algorithm_update': 0.01743015462954261, 'loss': -95.33483132104868, 'time_step': 0.01775461942038004, 'observation_error': 0.6213896544123607, 'reward_error': 0.0008948818549213623, 'variance': 0.3868427954459685}[0m [36mstep[0m=[35m38488[0m
[2m2023-10-22 11:45:47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_38488.pt[0m


Epoch 9/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 11:48:08[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=9 step=43299[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025021640854500305, 'time_algorithm_update': 0.017720356425632435, 'loss': -98.02149951326652, 'time_step': 0.018045594106962416, 'observation_error': 0.561151699721557, 'reward_error': 0.0007415543640287043, 'variance': 0.33560044302864445}[0m [36mstep[0m=[35m43299[0m
[2m2023-10-22 11:48:08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_43299.pt[0m


Epoch 10/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 11:50:27[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=10 step=48110[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002463707877305609, 'time_algorithm_update': 0.017327413765116952, 'loss': -100.37166204791484, 'time_step': 0.017647008295243558, 'observation_error': 0.49151510568430146, 'reward_error': 0.0005868838951846328, 'variance': 0.3122213619026682}[0m [36mstep[0m=[35m48110[0m
[2m2023-10-22 11:50:27[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_48110.pt[0m


Epoch 11/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 11:52:53[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=11 step=52921[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025288648849166966, 'time_algorithm_update': 0.01751491176539413, 'loss': -102.88262868408253, 'time_step': 0.017841269731571165, 'observation_error': 0.3948771329341738, 'reward_error': 0.0005109160668974115, 'variance': 0.2670205079811474}[0m [36mstep[0m=[35m52921[0m
[2m2023-10-22 11:52:53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_52921.pt[0m


Epoch 12/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 11:55:11[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=12 step=57732[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025038886679802186, 'time_algorithm_update': 0.017331814671771934, 'loss': -106.02730508763486, 'time_step': 0.01765338523649074, 'observation_error': 0.3033078294144571, 'reward_error': 0.00045548373796070563, 'variance': 0.24299710639299604}[0m [36mstep[0m=[35m57732[0m
[2m2023-10-22 11:55:11[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_57732.pt[0m


Epoch 13/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 11:57:30[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=13 step=62543[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025172977927221226, 'time_algorithm_update': 0.017368135073655262, 'loss': -109.98746641448867, 'time_step': 0.017692427901809367, 'observation_error': 0.24435806733007712, 'reward_error': 0.00037748106142253724, 'variance': 0.25868884119849}[0m [36mstep[0m=[35m62543[0m
[2m2023-10-22 11:57:30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_62543.pt[0m


Epoch 14/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 11:59:48[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=14 step=67354[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002535885125181824, 'time_algorithm_update': 0.0173146079469188, 'loss': -113.67597091601857, 'time_step': 0.01763958381878019, 'observation_error': 0.22629205646174538, 'reward_error': 0.00034268477903709704, 'variance': 0.24720860931722016}[0m [36mstep[0m=[35m67354[0m
[2m2023-10-22 11:59:48[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_67354.pt[0m


Epoch 15/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 12:02:05[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=15 step=72165[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025081114173307166, 'time_algorithm_update': 0.01717666151419025, 'loss': -115.88658046147552, 'time_step': 0.017499321687992137, 'observation_error': 0.18082415278060987, 'reward_error': 0.00034023191223605485, 'variance': 0.2277820489316896}[0m [36mstep[0m=[35m72165[0m
[2m2023-10-22 12:02:05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_72165.pt[0m


Epoch 16/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 12:04:24[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=16 step=76976[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002508897390863153, 'time_algorithm_update': 0.017293336807231947, 'loss': -118.37643506437249, 'time_step': 0.017615757918214132, 'observation_error': 0.16025237561812927, 'reward_error': 0.00030711339215138463, 'variance': 0.19794018860306892}[0m [36mstep[0m=[35m76976[0m
[2m2023-10-22 12:04:24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_76976.pt[0m


Epoch 17/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 12:06:42[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=17 step=81787[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002495893642129781, 'time_algorithm_update': 0.01737393938412267, 'loss': -120.1764150828439, 'time_step': 0.017696756950858924, 'observation_error': 0.14773681633116317, 'reward_error': 0.0002641691233841272, 'variance': 0.15217574364674769}[0m [36mstep[0m=[35m81787[0m
[2m2023-10-22 12:06:42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_81787.pt[0m


Epoch 18/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 12:09:01[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=18 step=86598[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002578878273861841, 'time_algorithm_update': 0.01735106244996094, 'loss': -121.63052189917555, 'time_step': 0.017682085510996072, 'observation_error': 0.13613597338921402, 'reward_error': 0.00025570799659435666, 'variance': 0.13419586009886783}[0m [36mstep[0m=[35m86598[0m
[2m2023-10-22 12:09:01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_86598.pt[0m


Epoch 19/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 12:11:20[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=19 step=91409[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024853330519808425, 'time_algorithm_update': 0.017327238482116685, 'loss': -122.89130232404013, 'time_step': 0.017648134279142073, 'observation_error': 0.1284177722475275, 'reward_error': 0.00023381711018562054, 'variance': 0.09965311100717558}[0m [36mstep[0m=[35m91409[0m
[2m2023-10-22 12:11:20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_91409.pt[0m


Epoch 20/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 12:13:39[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=20 step=96220[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002533535629269289, 'time_algorithm_update': 0.017324963470328948, 'loss': -124.20445930038512, 'time_step': 0.017650639681279896, 'observation_error': 0.12211332138111175, 'reward_error': 0.00020530947455132647, 'variance': 0.08521417490592481}[0m [36mstep[0m=[35m96220[0m
[2m2023-10-22 12:13:39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_96220.pt[0m


Epoch 21/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 12:15:58[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=21 step=101031[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002470622065656524, 'time_algorithm_update': 0.017354470482736833, 'loss': -124.74319897677243, 'time_step': 0.017673095529873836, 'observation_error': 0.11621088296410087, 'reward_error': 0.0002035677916397003, 'variance': 0.07536343072631956}[0m [36mstep[0m=[35m101031[0m
[2m2023-10-22 12:15:58[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_101031.pt[0m


Epoch 22/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 12:18:17[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=22 step=105842[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002500576775724688, 'time_algorithm_update': 0.017357927329127443, 'loss': -126.18814124973815, 'time_step': 0.017681280160688654, 'observation_error': 0.10854510043345919, 'reward_error': 0.00017423981795621488, 'variance': 0.07111752661295523}[0m [36mstep[0m=[35m105842[0m
[2m2023-10-22 12:18:17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_105842.pt[0m


Epoch 23/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 12:20:36[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=23 step=110653[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002495216198360021, 'time_algorithm_update': 0.017363082344183628, 'loss': -127.21178067218635, 'time_step': 0.017685047134592884, 'observation_error': 0.11125606854771833, 'reward_error': 0.00017188977236787733, 'variance': 0.0702557054065071}[0m [36mstep[0m=[35m110653[0m
[2m2023-10-22 12:20:36[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_110653.pt[0m


Epoch 24/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 12:22:54[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=24 step=115464[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002511699341905015, 'time_algorithm_update': 0.017325974333387877, 'loss': -128.0006336060712, 'time_step': 0.017650187771277227, 'observation_error': 0.11961243872637715, 'reward_error': 0.00015674912993463936, 'variance': 0.07136508575865894}[0m [36mstep[0m=[35m115464[0m
[2m2023-10-22 12:22:55[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_115464.pt[0m


Epoch 25/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 12:25:13[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=25 step=120275[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025203856874766057, 'time_algorithm_update': 0.017349809550664154, 'loss': -128.74522456159414, 'time_step': 0.01767538367425843, 'observation_error': 0.1246959546983796, 'reward_error': 0.0001573743973163748, 'variance': 0.08168077111012662}[0m [36mstep[0m=[35m120275[0m
[2m2023-10-22 12:25:13[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_120275.pt[0m


Epoch 26/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 12:27:32[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=26 step=125086[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025083259990076046, 'time_algorithm_update': 0.017376400482329975, 'loss': -129.47426957911648, 'time_step': 0.017699973099049846, 'observation_error': 0.12071538326600202, 'reward_error': 0.00014916052067351614, 'variance': 0.07820626410155881}[0m [36mstep[0m=[35m125086[0m
[2m2023-10-22 12:27:33[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_125086.pt[0m


Epoch 27/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 12:29:51[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=27 step=129897[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000250128890937293, 'time_algorithm_update': 0.017352943533400046, 'loss': -129.93884975819498, 'time_step': 0.01767574439943766, 'observation_error': 0.11836239016318878, 'reward_error': 0.0001350855612602188, 'variance': 0.07564998526995764}[0m [36mstep[0m=[35m129897[0m
[2m2023-10-22 12:29:51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_129897.pt[0m


Epoch 28/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 12:32:10[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=28 step=134708[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025052891479314577, 'time_algorithm_update': 0.01736055142020664, 'loss': -130.56069854866365, 'time_step': 0.017683129626781376, 'observation_error': 0.11311281817418165, 'reward_error': 0.00013028014519583233, 'variance': 0.07441718390112478}[0m [36mstep[0m=[35m134708[0m
[2m2023-10-22 12:32:10[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_134708.pt[0m


Epoch 29/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 12:34:29[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=29 step=139519[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025299080591195835, 'time_algorithm_update': 0.01740335129670655, 'loss': -130.99087951773544, 'time_step': 0.017730682710430722, 'observation_error': 0.10778512240010676, 'reward_error': 0.00012060713532844206, 'variance': 0.06663107513332069}[0m [36mstep[0m=[35m139519[0m
[2m2023-10-22 12:34:29[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_139519.pt[0m


Epoch 30/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 12:36:47[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=30 step=144330[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025005098738161894, 'time_algorithm_update': 0.017320207983558922, 'loss': -131.4913180063232, 'time_step': 0.01764266046410265, 'observation_error': 0.10834847202518547, 'reward_error': 0.00010848356180104919, 'variance': 0.060899895249317014}[0m [36mstep[0m=[35m144330[0m
[2m2023-10-22 12:36:47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_144330.pt[0m


Epoch 31/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 12:39:05[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=31 step=149141[0m [36mepoch[0m=[35m31[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024902302716829454, 'time_algorithm_update': 0.01726844206195293, 'loss': -131.88045975632193, 'time_step': 0.017589421511142424, 'observation_error': 0.10694002134761928, 'reward_error': 0.00010584255901463274, 'variance': 0.06227628101426936}[0m [36mstep[0m=[35m149141[0m
[2m2023-10-22 12:39:05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_149141.pt[0m


Epoch 32/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 12:41:23[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=32 step=153952[0m [36mepoch[0m=[35m32[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002543637817452589, 'time_algorithm_update': 0.017285849938544283, 'loss': -132.5306681214933, 'time_step': 0.017612563327304837, 'observation_error': 0.12081018208163176, 'reward_error': 8.898164898625298e-05, 'variance': 0.09080859314355244}[0m [36mstep[0m=[35m153952[0m
[2m2023-10-22 12:41:24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_153952.pt[0m


Epoch 33/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 12:43:42[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=33 step=158763[0m [36mepoch[0m=[35m33[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024992684717362696, 'time_algorithm_update': 0.01733643769276313, 'loss': -133.109807243815, 'time_step': 0.017658636094725472, 'observation_error': 0.11063074064355186, 'reward_error': 9.206744873240441e-05, 'variance': 0.06416779379331473}[0m [36mstep[0m=[35m158763[0m
[2m2023-10-22 12:43:42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_158763.pt[0m


Epoch 34/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 12:46:02[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=34 step=163574[0m [36mepoch[0m=[35m34[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025236083771874716, 'time_algorithm_update': 0.017448486545382752, 'loss': -133.51494344974154, 'time_step': 0.01777447818644303, 'observation_error': 0.10922468087277712, 'reward_error': 8.87149090278047e-05, 'variance': 0.06315304938875392}[0m [36mstep[0m=[35m163574[0m
[2m2023-10-22 12:46:02[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_163574.pt[0m


Epoch 35/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 12:48:20[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=35 step=168385[0m [36mepoch[0m=[35m35[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025151559405107803, 'time_algorithm_update': 0.017399627883378295, 'loss': -133.4064135816087, 'time_step': 0.017724656632525884, 'observation_error': 0.1172254208921416, 'reward_error': 9.078626489276023e-05, 'variance': 0.0733663386073554}[0m [36mstep[0m=[35m168385[0m
[2m2023-10-22 12:48:20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_168385.pt[0m


Epoch 36/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 12:50:39[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=36 step=173196[0m [36mepoch[0m=[35m36[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002513949723879034, 'time_algorithm_update': 0.017318701352579534, 'loss': -133.87088883232857, 'time_step': 0.017641686570542787, 'observation_error': 0.10922511602146834, 'reward_error': 8.309303568210272e-05, 'variance': 0.06087464707141627}[0m [36mstep[0m=[35m173196[0m
[2m2023-10-22 12:50:39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_173196.pt[0m


Epoch 37/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 12:52:57[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=37 step=178007[0m [36mepoch[0m=[35m37[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002468501522941962, 'time_algorithm_update': 0.017289173030674867, 'loss': -134.17753157043774, 'time_step': 0.017608045515759375, 'observation_error': 0.11184607855529889, 'reward_error': 8.053518575101349e-05, 'variance': 0.05954594646694176}[0m [36mstep[0m=[35m178007[0m
[2m2023-10-22 12:52:57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_178007.pt[0m


Epoch 38/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 12:55:15[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=38 step=182818[0m [36mepoch[0m=[35m38[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024774371400689216, 'time_algorithm_update': 0.017373151180526614, 'loss': -134.5122685730767, 'time_step': 0.017694258833595582, 'observation_error': 0.10742357580680946, 'reward_error': 8.568005138742075e-05, 'variance': 0.06492385194999133}[0m [36mstep[0m=[35m182818[0m
[2m2023-10-22 12:55:15[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_182818.pt[0m


Epoch 39/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 12:57:34[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=39 step=187629[0m [36mepoch[0m=[35m39[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024798912011865916, 'time_algorithm_update': 0.01736064334838461, 'loss': -135.08767164107843, 'time_step': 0.017681879304446986, 'observation_error': 0.1047845429996181, 'reward_error': 6.833293464963287e-05, 'variance': 0.06184852252094999}[0m [36mstep[0m=[35m187629[0m
[2m2023-10-22 12:57:34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_187629.pt[0m


Epoch 40/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 12:59:52[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=40 step=192440[0m [36mepoch[0m=[35m40[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002489993884939583, 'time_algorithm_update': 0.017305942861287022, 'loss': -135.7696397393242, 'time_step': 0.017627015130679935, 'observation_error': 0.10501207298920943, 'reward_error': 6.992813409348942e-05, 'variance': 0.06132374104704724}[0m [36mstep[0m=[35m192440[0m
[2m2023-10-22 12:59:52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_192440.pt[0m


Epoch 41/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 13:02:11[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=41 step=197251[0m [36mepoch[0m=[35m41[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002511704297601941, 'time_algorithm_update': 0.017391403854772915, 'loss': -135.5987207765169, 'time_step': 0.017716011815694533, 'observation_error': 0.10194268345682589, 'reward_error': 5.826977376801829e-05, 'variance': 0.05804784598249238}[0m [36mstep[0m=[35m197251[0m
[2m2023-10-22 13:02:11[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_197251.pt[0m


Epoch 42/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 13:04:29[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=42 step=202062[0m [36mepoch[0m=[35m42[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025120477273989013, 'time_algorithm_update': 0.017296634922650024, 'loss': -135.68291019605417, 'time_step': 0.01762060429246578, 'observation_error': 0.11013912061726443, 'reward_error': 6.574511766813831e-05, 'variance': 0.06362449828654354}[0m [36mstep[0m=[35m202062[0m
[2m2023-10-22 13:04:29[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_202062.pt[0m


Epoch 43/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 13:06:48[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=43 step=206873[0m [36mepoch[0m=[35m43[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002536245899918024, 'time_algorithm_update': 0.017347761757580463, 'loss': -136.46635410013081, 'time_step': 0.017674145047368788, 'observation_error': 0.10978433780788868, 'reward_error': 5.4767020824585396e-05, 'variance': 0.05974254128185081}[0m [36mstep[0m=[35m206873[0m
[2m2023-10-22 13:06:48[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_206873.pt[0m


Epoch 44/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 13:09:07[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=44 step=211684[0m [36mepoch[0m=[35m44[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002500101028819809, 'time_algorithm_update': 0.017322963450163622, 'loss': -136.456488450137, 'time_step': 0.017646032716746737, 'observation_error': 0.10431487617063126, 'reward_error': 5.7025968710358704e-05, 'variance': 0.05650287516752082}[0m [36mstep[0m=[35m211684[0m
[2m2023-10-22 13:09:07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_211684.pt[0m


Epoch 45/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 13:11:25[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=45 step=216495[0m [36mepoch[0m=[35m45[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025186140258256226, 'time_algorithm_update': 0.017366648315020546, 'loss': -136.49596806202874, 'time_step': 0.017692216293550632, 'observation_error': 0.09881471389109, 'reward_error': 5.469445339696012e-05, 'variance': 0.05539480026556389}[0m [36mstep[0m=[35m216495[0m
[2m2023-10-22 13:11:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_216495.pt[0m


Epoch 46/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 13:13:43[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=46 step=221306[0m [36mepoch[0m=[35m46[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002488280204942632, 'time_algorithm_update': 0.01737601061765282, 'loss': -136.77429762463439, 'time_step': 0.017697957270211326, 'observation_error': 0.09848606347156942, 'reward_error': 5.0786401386211714e-05, 'variance': 0.055949557307262865}[0m [36mstep[0m=[35m221306[0m
[2m2023-10-22 13:13:43[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_221306.pt[0m


Epoch 47/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 13:16:02[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=47 step=226117[0m [36mepoch[0m=[35m47[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025499261012817715, 'time_algorithm_update': 0.017294995478993023, 'loss': -136.45753794780916, 'time_step': 0.017623393953379266, 'observation_error': 0.10039808617763271, 'reward_error': 4.8265316974582445e-05, 'variance': 0.05897980415474629}[0m [36mstep[0m=[35m226117[0m
[2m2023-10-22 13:16:02[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_226117.pt[0m


Epoch 48/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 13:18:21[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=48 step=230928[0m [36mepoch[0m=[35m48[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002524647091263125, 'time_algorithm_update': 0.017356959233732983, 'loss': -137.508378224917, 'time_step': 0.01768154841256325, 'observation_error': 0.09614064833362758, 'reward_error': 4.35590809881869e-05, 'variance': 0.05341835990512349}[0m [36mstep[0m=[35m230928[0m
[2m2023-10-22 13:18:21[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_230928.pt[0m


Epoch 49/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 13:20:40[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=49 step=235739[0m [36mepoch[0m=[35m49[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002532586117738301, 'time_algorithm_update': 0.01744182291706841, 'loss': -137.87605210397228, 'time_step': 0.01776893870841934, 'observation_error': 0.10649531313916842, 'reward_error': 4.67342994506662e-05, 'variance': 0.05709649059564075}[0m [36mstep[0m=[35m235739[0m
[2m2023-10-22 13:20:40[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_235739.pt[0m


Epoch 50/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 13:22:57[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=50 step=240550[0m [36mepoch[0m=[35m50[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025555047293111756, 'time_algorithm_update': 0.017281878393470958, 'loss': -137.69321229427626, 'time_step': 0.01761088939199723, 'observation_error': 0.10856098575793603, 'reward_error': 4.1478450129858895e-05, 'variance': 0.06363422615862308}[0m [36mstep[0m=[35m240550[0m
[2m2023-10-22 13:22:57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_240550.pt[0m


Epoch 51/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 13:25:15[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=51 step=245361[0m [36mepoch[0m=[35m51[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002500631288390872, 'time_algorithm_update': 0.01729035075204929, 'loss': -138.47636673252737, 'time_step': 0.01761382326369126, 'observation_error': 0.10509367505136025, 'reward_error': 3.9842153986719645e-05, 'variance': 0.05619870414129759}[0m [36mstep[0m=[35m245361[0m
[2m2023-10-22 13:25:15[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_245361.pt[0m


Epoch 52/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 13:27:45[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=52 step=250172[0m [36mepoch[0m=[35m52[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00027793847803414377, 'time_algorithm_update': 0.019081562478734115, 'loss': -138.24910822550984, 'time_step': 0.019441313219278573, 'observation_error': 0.10534287052171062, 'reward_error': 3.9323631376534926e-05, 'variance': 0.06506494885911439}[0m [36mstep[0m=[35m250172[0m
[2m2023-10-22 13:27:45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_250172.pt[0m


Epoch 53/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 13:30:05[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=53 step=254983[0m [36mepoch[0m=[35m53[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002500566864330836, 'time_algorithm_update': 0.01760100659251684, 'loss': -138.42154079176387, 'time_step': 0.01792450180125073, 'observation_error': 0.10476588900063966, 'reward_error': 3.7949147308942685e-05, 'variance': 0.054642638671928784}[0m [36mstep[0m=[35m254983[0m
[2m2023-10-22 13:30:05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_254983.pt[0m


Epoch 54/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 13:32:25[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=54 step=259794[0m [36mepoch[0m=[35m54[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002479611204310282, 'time_algorithm_update': 0.017617951507901384, 'loss': -138.65925872412623, 'time_step': 0.0179397403215128, 'observation_error': 0.09585872983500021, 'reward_error': 3.766781540137634e-05, 'variance': 0.053005588149379806}[0m [36mstep[0m=[35m259794[0m
[2m2023-10-22 13:32:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_259794.pt[0m


Epoch 55/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 13:34:45[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=55 step=264605[0m [36mepoch[0m=[35m55[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002493212114523216, 'time_algorithm_update': 0.0175791613848981, 'loss': -138.53128772117918, 'time_step': 0.017901496762323468, 'observation_error': 0.11511428864743375, 'reward_error': 3.5502035409998386e-05, 'variance': 0.06264380548190412}[0m [36mstep[0m=[35m264605[0m
[2m2023-10-22 13:34:45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_264605.pt[0m


Epoch 56/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 13:37:06[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=56 step=269416[0m [36mepoch[0m=[35m56[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002530236126256073, 'time_algorithm_update': 0.017673877935304484, 'loss': -138.99545662578043, 'time_step': 0.018001099481227814, 'observation_error': 0.10336030558106986, 'reward_error': 3.0154624275425072e-05, 'variance': 0.05837284267625782}[0m [36mstep[0m=[35m269416[0m
[2m2023-10-22 13:37:06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_269416.pt[0m


Epoch 57/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 13:39:25[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=57 step=274227[0m [36mepoch[0m=[35m57[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000252858340133131, 'time_algorithm_update': 0.01749452496982369, 'loss': -139.43285419350724, 'time_step': 0.01782091336397985, 'observation_error': 0.10193688156273993, 'reward_error': 2.9286464629283822e-05, 'variance': 0.05431588873158426}[0m [36mstep[0m=[35m274227[0m
[2m2023-10-22 13:39:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_274227.pt[0m


Epoch 58/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 13:41:46[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=58 step=279038[0m [36mepoch[0m=[35m58[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025289164241647255, 'time_algorithm_update': 0.017597905267823687, 'loss': -139.70893323884835, 'time_step': 0.01792513801362207, 'observation_error': 0.09794214188205419, 'reward_error': 2.8845115583284025e-05, 'variance': 0.05386485633329512}[0m [36mstep[0m=[35m279038[0m
[2m2023-10-22 13:41:46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_279038.pt[0m


Epoch 59/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 13:44:07[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=59 step=283849[0m [36mepoch[0m=[35m59[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002509329032065392, 'time_algorithm_update': 0.017541540409913556, 'loss': -139.89627890665153, 'time_step': 0.017866999214440366, 'observation_error': 0.09607702247470234, 'reward_error': 2.939122386889524e-05, 'variance': 0.05041105111019756}[0m [36mstep[0m=[35m283849[0m
[2m2023-10-22 13:44:07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_283849.pt[0m


Epoch 60/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 13:46:26[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=60 step=288660[0m [36mepoch[0m=[35m60[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002560182411639464, 'time_algorithm_update': 0.017588683310528354, 'loss': -139.70288968566945, 'time_step': 0.017919395352024418, 'observation_error': 0.10466245621821503, 'reward_error': 2.9325555327239324e-05, 'variance': 0.05833024002918336}[0m [36mstep[0m=[35m288660[0m
[2m2023-10-22 13:46:26[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_288660.pt[0m


Epoch 61/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 13:48:46[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=61 step=293471[0m [36mepoch[0m=[35m61[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002501137760616692, 'time_algorithm_update': 0.017568565064174323, 'loss': -140.43035026975295, 'time_step': 0.017892057745502784, 'observation_error': 0.10289858090912604, 'reward_error': 2.8538800194156264e-05, 'variance': 0.06023791356821489}[0m [36mstep[0m=[35m293471[0m
[2m2023-10-22 13:48:46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_293471.pt[0m


Epoch 62/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 13:51:06[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=62 step=298282[0m [36mepoch[0m=[35m62[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025053149175554716, 'time_algorithm_update': 0.017542048864418146, 'loss': -140.9595970490003, 'time_step': 0.017865658351523147, 'observation_error': 0.10036290079319674, 'reward_error': 2.6694155071282318e-05, 'variance': 0.056576963381893906}[0m [36mstep[0m=[35m298282[0m
[2m2023-10-22 13:51:06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_298282.pt[0m


Epoch 63/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 13:53:27[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=63 step=303093[0m [36mepoch[0m=[35m63[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002516962787609937, 'time_algorithm_update': 0.01756314819009258, 'loss': -140.72408643875963, 'time_step': 0.017889630395591542, 'observation_error': 0.10154219903064116, 'reward_error': 2.967521934610829e-05, 'variance': 0.05384936757606119}[0m [36mstep[0m=[35m303093[0m
[2m2023-10-22 13:53:27[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_303093.pt[0m


Epoch 64/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 13:55:47[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=64 step=307904[0m [36mepoch[0m=[35m64[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002490989484451982, 'time_algorithm_update': 0.017585842259037743, 'loss': -140.44300259812363, 'time_step': 0.017909295641689582, 'observation_error': 0.09533379827738576, 'reward_error': 2.4364461260024204e-05, 'variance': 0.05477412401198887}[0m [36mstep[0m=[35m307904[0m
[2m2023-10-22 13:55:47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_307904.pt[0m


Epoch 65/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 13:58:08[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=65 step=312715[0m [36mepoch[0m=[35m65[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002546998771107685, 'time_algorithm_update': 0.017664271019471954, 'loss': -140.8084701350959, 'time_step': 0.01799289448249863, 'observation_error': 0.0978345976383827, 'reward_error': 2.7959966929725383e-05, 'variance': 0.05235098111611997}[0m [36mstep[0m=[35m312715[0m
[2m2023-10-22 13:58:08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_312715.pt[0m


Epoch 66/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 14:00:28[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=66 step=317526[0m [36mepoch[0m=[35m66[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025497427404955154, 'time_algorithm_update': 0.017574791996475565, 'loss': -140.8597738935605, 'time_step': 0.017904464976997193, 'observation_error': 0.11110222492940207, 'reward_error': 2.7262784197083624e-05, 'variance': 0.06839204620434623}[0m [36mstep[0m=[35m317526[0m
[2m2023-10-22 14:00:28[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_317526.pt[0m


Epoch 67/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 14:02:49[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=67 step=322337[0m [36mepoch[0m=[35m67[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025576961384917764, 'time_algorithm_update': 0.017677235915541427, 'loss': -141.28152162650596, 'time_step': 0.018007546793371347, 'observation_error': 0.11623603102732083, 'reward_error': 2.3562756592691054e-05, 'variance': 0.08449396655523465}[0m [36mstep[0m=[35m322337[0m
[2m2023-10-22 14:02:49[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_322337.pt[0m


Epoch 68/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 14:05:10[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=68 step=327148[0m [36mepoch[0m=[35m68[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002575624858830035, 'time_algorithm_update': 0.01754258967962366, 'loss': -141.15089860336124, 'time_step': 0.017874507194396934, 'observation_error': 0.0958636919911664, 'reward_error': 2.161953958783743e-05, 'variance': 0.049910790007805086}[0m [36mstep[0m=[35m327148[0m
[2m2023-10-22 14:05:10[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_327148.pt[0m


Epoch 69/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 14:07:31[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=69 step=331959[0m [36mepoch[0m=[35m69[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002535518403609313, 'time_algorithm_update': 0.01768180244158767, 'loss': -141.89049696183903, 'time_step': 0.018009106846763597, 'observation_error': 0.09793932307864814, 'reward_error': 2.1787898856794534e-05, 'variance': 0.056445710017012665}[0m [36mstep[0m=[35m331959[0m
[2m2023-10-22 14:07:31[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_331959.pt[0m


Epoch 70/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 14:09:51[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=70 step=336770[0m [36mepoch[0m=[35m70[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002522365488398474, 'time_algorithm_update': 0.01754585622725232, 'loss': -141.8945133329751, 'time_step': 0.017871974387255112, 'observation_error': 0.0996252473671354, 'reward_error': 2.087943547985062e-05, 'variance': 0.05531928359478561}[0m [36mstep[0m=[35m336770[0m
[2m2023-10-22 14:09:51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_336770.pt[0m


Epoch 71/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 14:12:11[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=71 step=341581[0m [36mepoch[0m=[35m71[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002534777031349209, 'time_algorithm_update': 0.01758337982279227, 'loss': -142.63188379843155, 'time_step': 0.017911571743730644, 'observation_error': 0.09604621204158675, 'reward_error': 1.9428261630619605e-05, 'variance': 0.05142738453453699}[0m [36mstep[0m=[35m341581[0m
[2m2023-10-22 14:12:11[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_341581.pt[0m


Epoch 72/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 14:14:31[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=72 step=346392[0m [36mepoch[0m=[35m72[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025481068649403, 'time_algorithm_update': 0.01752535203122171, 'loss': -141.3533415733913, 'time_step': 0.017855403825216346, 'observation_error': 0.10295228582052857, 'reward_error': 1.8110364854626567e-05, 'variance': 0.06122908820243639}[0m [36mstep[0m=[35m346392[0m
[2m2023-10-22 14:14:31[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_346392.pt[0m


Epoch 73/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 14:16:54[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=73 step=351203[0m [36mepoch[0m=[35m73[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002506852670311556, 'time_algorithm_update': 0.017593787480134078, 'loss': -142.40221047703724, 'time_step': 0.017918968864746983, 'observation_error': 0.10515915021049754, 'reward_error': 1.9204673605923523e-05, 'variance': 0.06533897293221588}[0m [36mstep[0m=[35m351203[0m
[2m2023-10-22 14:16:54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_351203.pt[0m


Epoch 74/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 14:19:12[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=74 step=356014[0m [36mepoch[0m=[35m74[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024854400950344405, 'time_algorithm_update': 0.01734187904798769, 'loss': -142.23149304537634, 'time_step': 0.01766385622763926, 'observation_error': 0.09938780990944059, 'reward_error': 1.8508343287584495e-05, 'variance': 0.05660618126093838}[0m [36mstep[0m=[35m356014[0m
[2m2023-10-22 14:19:12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_356014.pt[0m


Epoch 75/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 14:21:32[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=75 step=360825[0m [36mepoch[0m=[35m75[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002533505399518042, 'time_algorithm_update': 0.017372354800030635, 'loss': -142.5066183185756, 'time_step': 0.01770062016439745, 'observation_error': 0.10333274705680226, 'reward_error': 1.93153821597936e-05, 'variance': 0.05591840922850011}[0m [36mstep[0m=[35m360825[0m
[2m2023-10-22 14:21:32[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_360825.pt[0m


Epoch 76/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 14:23:50[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=76 step=365636[0m [36mepoch[0m=[35m76[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025046052617556935, 'time_algorithm_update': 0.01734007765215515, 'loss': -142.56141692867052, 'time_step': 0.017663725050341635, 'observation_error': 0.09496979499150386, 'reward_error': 1.7783449924228515e-05, 'variance': 0.05386031107539296}[0m [36mstep[0m=[35m365636[0m
[2m2023-10-22 14:23:50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_365636.pt[0m


Epoch 77/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 14:26:08[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=77 step=370447[0m [36mepoch[0m=[35m77[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025075801666202675, 'time_algorithm_update': 0.017332313165325703, 'loss': -143.08071275551288, 'time_step': 0.01765557674478528, 'observation_error': 0.10329014952512548, 'reward_error': 1.70058342943888e-05, 'variance': 0.058529048989559296}[0m [36mstep[0m=[35m370447[0m
[2m2023-10-22 14:26:08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_370447.pt[0m


Epoch 78/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 14:28:26[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=78 step=375258[0m [36mepoch[0m=[35m78[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024906183027522376, 'time_algorithm_update': 0.017270701909308076, 'loss': -142.11656632955643, 'time_step': 0.01759286403556892, 'observation_error': 0.09525003884081971, 'reward_error': 1.5117029641396894e-05, 'variance': 0.056296871699629214}[0m [36mstep[0m=[35m375258[0m
[2m2023-10-22 14:28:26[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_375258.pt[0m


Epoch 79/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 14:30:45[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=79 step=380069[0m [36mepoch[0m=[35m79[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025139626086910407, 'time_algorithm_update': 0.017316713473871675, 'loss': -143.83572692331913, 'time_step': 0.01764161010413922, 'observation_error': 0.10486829978033309, 'reward_error': 1.668824113829448e-05, 'variance': 0.06022594928958324}[0m [36mstep[0m=[35m380069[0m
[2m2023-10-22 14:30:45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_380069.pt[0m


Epoch 80/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 14:33:04[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=80 step=384880[0m [36mepoch[0m=[35m80[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002548161377606484, 'time_algorithm_update': 0.01742253133051858, 'loss': -142.08370413588923, 'time_step': 0.01775103615416085, 'observation_error': 0.10759767653779977, 'reward_error': 1.4076724300994797e-05, 'variance': 0.0768287067062894}[0m [36mstep[0m=[35m384880[0m
[2m2023-10-22 14:33:04[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_384880.pt[0m


Epoch 81/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 14:35:24[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=81 step=389691[0m [36mepoch[0m=[35m81[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002528627011464257, 'time_algorithm_update': 0.017453143165999108, 'loss': -144.18404506056157, 'time_step': 0.017780387656799203, 'observation_error': 0.0979841149685308, 'reward_error': 1.4770202618317252e-05, 'variance': 0.054355122484060804}[0m [36mstep[0m=[35m389691[0m
[2m2023-10-22 14:35:24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_389691.pt[0m


Epoch 82/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 14:37:43[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=82 step=394502[0m [36mepoch[0m=[35m82[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002536710744289667, 'time_algorithm_update': 0.01745928897354167, 'loss': -143.29651561285894, 'time_step': 0.01778723866001431, 'observation_error': 0.10140063643294053, 'reward_error': 1.490805609465895e-05, 'variance': 0.058687565131872434}[0m [36mstep[0m=[35m394502[0m
[2m2023-10-22 14:37:43[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_394502.pt[0m


Epoch 83/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 14:40:01[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=83 step=399313[0m [36mepoch[0m=[35m83[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002555552799571356, 'time_algorithm_update': 0.017353566960073317, 'loss': -144.0219928287117, 'time_step': 0.017682670481461195, 'observation_error': 0.09851279317639382, 'reward_error': 1.4017811282939152e-05, 'variance': 0.05513069926101313}[0m [36mstep[0m=[35m399313[0m
[2m2023-10-22 14:40:01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_399313.pt[0m


Epoch 84/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 14:42:20[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=84 step=404124[0m [36mepoch[0m=[35m84[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025190372423430885, 'time_algorithm_update': 0.017465580775915673, 'loss': -145.05042402235344, 'time_step': 0.017791459823541793, 'observation_error': 0.1025683428063106, 'reward_error': 1.4378776373558682e-05, 'variance': 0.05720135080773626}[0m [36mstep[0m=[35m404124[0m
[2m2023-10-22 14:42:20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_404124.pt[0m


Epoch 85/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 14:44:39[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=85 step=408935[0m [36mepoch[0m=[35m85[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00024911673939716195, 'time_algorithm_update': 0.017375682154060573, 'loss': -144.19962344053653, 'time_step': 0.017698353527737517, 'observation_error': 0.09649958230547495, 'reward_error': 1.3106426546324707e-05, 'variance': 0.0582594104181412}[0m [36mstep[0m=[35m408935[0m
[2m2023-10-22 14:44:39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_408935.pt[0m


Epoch 86/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 14:46:58[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=86 step=413746[0m [36mepoch[0m=[35m86[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002503561591983114, 'time_algorithm_update': 0.01739363376971863, 'loss': -143.77421460719594, 'time_step': 0.017718202531077563, 'observation_error': 0.110242659866612, 'reward_error': 1.243084576150597e-05, 'variance': 0.06697265544215536}[0m [36mstep[0m=[35m413746[0m
[2m2023-10-22 14:46:58[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_413746.pt[0m


Epoch 87/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 14:49:16[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=87 step=418557[0m [36mepoch[0m=[35m87[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002535992663805115, 'time_algorithm_update': 0.017320073238159508, 'loss': -144.0583855794091, 'time_step': 0.017646814725721632, 'observation_error': 0.10045963858366067, 'reward_error': 1.245584729683384e-05, 'variance': 0.06046185180693965}[0m [36mstep[0m=[35m418557[0m
[2m2023-10-22 14:49:16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_418557.pt[0m


Epoch 88/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 14:51:37[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=88 step=423368[0m [36mepoch[0m=[35m88[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000265876014374864, 'time_algorithm_update': 0.017996965091953504, 'loss': -143.7238840802101, 'time_step': 0.018339869931937997, 'observation_error': 0.10266068552442455, 'reward_error': 1.2334255474069384e-05, 'variance': 0.05661664143218945}[0m [36mstep[0m=[35m423368[0m
[2m2023-10-22 14:51:37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_423368.pt[0m


Epoch 89/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 14:53:56[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=89 step=428179[0m [36mepoch[0m=[35m89[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025267582181535275, 'time_algorithm_update': 0.017342534092007346, 'loss': -144.02348921804958, 'time_step': 0.01766849793160787, 'observation_error': 0.09276834144186455, 'reward_error': 1.1472359291972182e-05, 'variance': 0.04956759221020024}[0m [36mstep[0m=[35m428179[0m
[2m2023-10-22 14:53:56[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_428179.pt[0m


Epoch 90/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 14:56:15[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=90 step=432990[0m [36mepoch[0m=[35m90[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025082695040626503, 'time_algorithm_update': 0.017392639756029248, 'loss': -145.33479801440433, 'time_step': 0.01771753252085319, 'observation_error': 0.09593401510734863, 'reward_error': 1.251213844128275e-05, 'variance': 0.05791488568798271}[0m [36mstep[0m=[35m432990[0m
[2m2023-10-22 14:56:15[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_432990.pt[0m


Epoch 91/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 14:58:33[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=91 step=437801[0m [36mepoch[0m=[35m91[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025297192470667094, 'time_algorithm_update': 0.017296052727375177, 'loss': -145.07807135240026, 'time_step': 0.017621838607899093, 'observation_error': 0.10223609725460656, 'reward_error': 1.2006743206974587e-05, 'variance': 0.05629816235475676}[0m [36mstep[0m=[35m437801[0m
[2m2023-10-22 14:58:33[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_437801.pt[0m


Epoch 92/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 15:00:52[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=92 step=442612[0m [36mepoch[0m=[35m92[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002527657181575873, 'time_algorithm_update': 0.017459530514209835, 'loss': -144.983045120493, 'time_step': 0.017785212225984372, 'observation_error': 0.09638304310993799, 'reward_error': 1.1748061963564655e-05, 'variance': 0.05832340030737747}[0m [36mstep[0m=[35m442612[0m
[2m2023-10-22 15:00:52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_442612.pt[0m


Epoch 93/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 15:03:11[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=93 step=447423[0m [36mepoch[0m=[35m93[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025269851890727304, 'time_algorithm_update': 0.017422299255231543, 'loss': -144.24996546077472, 'time_step': 0.0177487730856027, 'observation_error': 0.1020516153683795, 'reward_error': 1.1599368960673375e-05, 'variance': 0.055827095011008916}[0m [36mstep[0m=[35m447423[0m
[2m2023-10-22 15:03:11[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_447423.pt[0m


Epoch 94/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 15:05:30[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=94 step=452234[0m [36mepoch[0m=[35m94[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025205477387660807, 'time_algorithm_update': 0.017369329644399235, 'loss': -144.04723848490772, 'time_step': 0.01769496179920451, 'observation_error': 0.09418061858144117, 'reward_error': 1.1241874714490025e-05, 'variance': 0.0516511051416689}[0m [36mstep[0m=[35m452234[0m
[2m2023-10-22 15:05:30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_452234.pt[0m


Epoch 95/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 15:07:48[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=95 step=457045[0m [36mepoch[0m=[35m95[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002507437442548804, 'time_algorithm_update': 0.017367600552184843, 'loss': -146.22899345012596, 'time_step': 0.017690325496945554, 'observation_error': 0.10799057717964591, 'reward_error': 1.048072252689277e-05, 'variance': 0.07216700687553305}[0m [36mstep[0m=[35m457045[0m
[2m2023-10-22 15:07:48[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_457045.pt[0m


Epoch 96/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 15:10:06[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=96 step=461856[0m [36mepoch[0m=[35m96[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025368876626699193, 'time_algorithm_update': 0.0172629987244496, 'loss': -145.31322280266903, 'time_step': 0.017588974358608806, 'observation_error': 0.0988662554606847, 'reward_error': 1.0538156557201391e-05, 'variance': 0.06669990331739217}[0m [36mstep[0m=[35m461856[0m
[2m2023-10-22 15:10:06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_461856.pt[0m


Epoch 97/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 15:12:25[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=97 step=466667[0m [36mepoch[0m=[35m97[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025161069387508465, 'time_algorithm_update': 0.01748990819301062, 'loss': -145.3610089043772, 'time_step': 0.017814342555868924, 'observation_error': 0.09512446647431297, 'reward_error': 1.051242531483492e-05, 'variance': 0.05869231398882973}[0m [36mstep[0m=[35m466667[0m
[2m2023-10-22 15:12:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_466667.pt[0m


Epoch 98/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 15:14:43[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=98 step=471478[0m [36mepoch[0m=[35m98[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025345441135936954, 'time_algorithm_update': 0.017272023742349103, 'loss': -144.8367831421057, 'time_step': 0.017599146075219976, 'observation_error': 0.1020313265256675, 'reward_error': 1.0105018385693378e-05, 'variance': 0.05468199137157422}[0m [36mstep[0m=[35m471478[0m
[2m2023-10-22 15:14:43[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_471478.pt[0m


Epoch 99/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 15:17:01[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=99 step=476289[0m [36mepoch[0m=[35m99[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025216082579082073, 'time_algorithm_update': 0.01731903769572989, 'loss': -146.0275501642919, 'time_step': 0.017644506907220244, 'observation_error': 0.0971075030597745, 'reward_error': 9.910938196056781e-06, 'variance': 0.052425882288666535}[0m [36mstep[0m=[35m476289[0m
[2m2023-10-22 15:17:01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_476289.pt[0m


Epoch 100/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 15:19:20[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022112713: epoch=100 step=481100[0m [36mepoch[0m=[35m100[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002472343674768556, 'time_algorithm_update': 0.017404789786853208, 'loss': -146.65049012746516, 'time_step': 0.017723986721415452, 'observation_error': 0.09895285823097774, 'reward_error': 1.02275729792786e-05, 'variance': 0.0526068868078156}[0m [36mstep[0m=[35m481100[0m
[2m2023-10-22 15:19:20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713/model_481100.pt[0m
[2m2023-10-22 15:19:20[0m [[32m[1mdebug    [0m] [1mRoundIterator is selected.[0m
[2m2023-10-22 15:19:20[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920[0m
[2m2023-10-22 15:19:20[0m [[32m[1mdebug    [0m] [1mBuilding models...[0m
[2m2023-10-22 15:19:20[0m [[32m[1mde

Epoch 1/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 15:21:34[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=1 step=4811[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002585228008332904, 'time_algorithm_update': 0.016770909467374823, 'loss': 2561.961346314189, 'time_step': 0.01711834287276711, 'observation_error': 2.198056478123162, 'reward_error': 0.0478472702971176, 'variance': 2.1041911670798057}[0m [36mstep[0m=[35m4811[0m
[2m2023-10-22 15:21:34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_4811.pt[0m


Epoch 2/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 15:23:48[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=2 step=9622[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002482104415433666, 'time_algorithm_update': 0.016896692835796503, 'loss': -27.485084711501813, 'time_step': 0.017221997765933767, 'observation_error': 1.4384719863998614, 'reward_error': 0.03619993108530447, 'variance': 1.0688759720130547}[0m [36mstep[0m=[35m9622[0m
[2m2023-10-22 15:23:48[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_9622.pt[0m


Epoch 3/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 15:26:02[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=3 step=14433[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025078522343814955, 'time_algorithm_update': 0.016990694180022553, 'loss': -52.49199563464314, 'time_step': 0.017320911841193297, 'observation_error': 1.1031371731484363, 'reward_error': 0.01030621250170387, 'variance': 0.6920911032102152}[0m [36mstep[0m=[35m14433[0m
[2m2023-10-22 15:26:02[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_14433.pt[0m


Epoch 4/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 15:28:16[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=4 step=19244[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002539725294729648, 'time_algorithm_update': 0.016943303990131157, 'loss': -71.60184011070824, 'time_step': 0.017276352295828967, 'observation_error': 1.0725191198786124, 'reward_error': 0.004014663901668129, 'variance': 0.6287200740055877}[0m [36mstep[0m=[35m19244[0m
[2m2023-10-22 15:28:16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_19244.pt[0m


Epoch 5/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 15:30:30[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=5 step=24055[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002503138375465648, 'time_algorithm_update': 0.016976651717213527, 'loss': -83.25141938177222, 'time_step': 0.01730351085302067, 'observation_error': 0.9528376354114929, 'reward_error': 0.0021345977141436242, 'variance': 0.5442022247811783}[0m [36mstep[0m=[35m24055[0m
[2m2023-10-22 15:30:30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_24055.pt[0m


Epoch 6/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 15:32:43[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=6 step=28866[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002485143248788583, 'time_algorithm_update': 0.01695619202334131, 'loss': -93.48664551796176, 'time_step': 0.017278335962194438, 'observation_error': 0.9228449308969368, 'reward_error': 0.0013878895120904395, 'variance': 0.5204715857336343}[0m [36mstep[0m=[35m28866[0m
[2m2023-10-22 15:32:43[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_28866.pt[0m


Epoch 7/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 15:34:57[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=7 step=33677[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025484458346100266, 'time_algorithm_update': 0.017031508655663066, 'loss': -98.30233590207905, 'time_step': 0.0173606922115563, 'observation_error': 0.902411721966341, 'reward_error': 0.0010407439077677009, 'variance': 0.4974517765174975}[0m [36mstep[0m=[35m33677[0m
[2m2023-10-22 15:34:57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_33677.pt[0m


Epoch 8/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 15:37:12[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=8 step=38488[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025189123587805577, 'time_algorithm_update': 0.01705945313683024, 'loss': -101.31544710776583, 'time_step': 0.017382684157361016, 'observation_error': 0.9082468690097625, 'reward_error': 0.0008314803072558967, 'variance': 0.5187496822224267}[0m [36mstep[0m=[35m38488[0m
[2m2023-10-22 15:37:12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_38488.pt[0m


Epoch 9/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 15:39:26[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=9 step=43299[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002515489954483581, 'time_algorithm_update': 0.016959359407474485, 'loss': -103.94524294025213, 'time_step': 0.017283239921644357, 'observation_error': 0.8730896195239465, 'reward_error': 0.0006937535487710652, 'variance': 0.48366542230931486}[0m [36mstep[0m=[35m43299[0m
[2m2023-10-22 15:39:26[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_43299.pt[0m


Epoch 10/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 15:41:40[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=10 step=48110[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025041077097843405, 'time_algorithm_update': 0.016996447248583745, 'loss': -106.14027746847181, 'time_step': 0.017318584249461175, 'observation_error': 0.8529230566353929, 'reward_error': 0.0005714170207280481, 'variance': 0.48818549440488884}[0m [36mstep[0m=[35m48110[0m
[2m2023-10-22 15:41:40[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_48110.pt[0m


Epoch 11/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 15:43:53[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=11 step=52921[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025198197468876766, 'time_algorithm_update': 0.017115412569174867, 'loss': -108.03012912046931, 'time_step': 0.017439466334509268, 'observation_error': 0.8058213508176312, 'reward_error': 0.0005236859400140598, 'variance': 0.46924754848423955}[0m [36mstep[0m=[35m52921[0m
[2m2023-10-22 15:43:53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_52921.pt[0m


Epoch 12/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 15:46:08[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=12 step=57732[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002496496254875962, 'time_algorithm_update': 0.01707287113226964, 'loss': -110.0010797742052, 'time_step': 0.01739368461516909, 'observation_error': 0.7938060331222649, 'reward_error': 0.0004690510359656177, 'variance': 0.467745584783451}[0m [36mstep[0m=[35m57732[0m
[2m2023-10-22 15:46:08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_57732.pt[0m


Epoch 13/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 15:48:23[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=13 step=62543[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002493006453100794, 'time_algorithm_update': 0.017100623778408815, 'loss': -111.61248106590752, 'time_step': 0.017422297768522464, 'observation_error': 0.726545676747942, 'reward_error': 0.000397754860835847, 'variance': 0.4259159353248379}[0m [36mstep[0m=[35m62543[0m
[2m2023-10-22 15:48:23[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_62543.pt[0m


Epoch 14/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 15:50:39[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=14 step=67354[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000248783072323146, 'time_algorithm_update': 0.01693755107024087, 'loss': -113.35033276878457, 'time_step': 0.017256759452463015, 'observation_error': 0.7058179457113681, 'reward_error': 0.00035934671034781613, 'variance': 0.4352375961956359}[0m [36mstep[0m=[35m67354[0m
[2m2023-10-22 15:50:39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_67354.pt[0m


Epoch 15/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 15:52:55[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=15 step=72165[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025439554776255345, 'time_algorithm_update': 0.01723865812520083, 'loss': -115.14107662732971, 'time_step': 0.017566006239623642, 'observation_error': 0.6754279546382019, 'reward_error': 0.00033157068552247743, 'variance': 0.4158771004320754}[0m [36mstep[0m=[35m72165[0m
[2m2023-10-22 15:52:55[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_72165.pt[0m


Epoch 16/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 15:55:08[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=16 step=76976[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000251940594619437, 'time_algorithm_update': 0.01698834929290816, 'loss': -116.90799374561551, 'time_step': 0.017313009635546283, 'observation_error': 0.6606775274683219, 'reward_error': 0.000292850263033316, 'variance': 0.4452519533893217}[0m [36mstep[0m=[35m76976[0m
[2m2023-10-22 15:55:08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_76976.pt[0m


Epoch 17/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 15:57:21[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=17 step=81787[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002567983174170409, 'time_algorithm_update': 0.016947439816557577, 'loss': -118.44003230681466, 'time_step': 0.017277456524217978, 'observation_error': 0.6262707373168415, 'reward_error': 0.00028314741842894864, 'variance': 0.4121690009990564}[0m [36mstep[0m=[35m81787[0m
[2m2023-10-22 15:57:22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_81787.pt[0m


Epoch 18/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 15:59:36[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=18 step=86598[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025323784740371086, 'time_algorithm_update': 0.016998068157934247, 'loss': -119.97101275139354, 'time_step': 0.01732405132475278, 'observation_error': 0.5870765621078073, 'reward_error': 0.00025892511528403283, 'variance': 0.4152754502059863}[0m [36mstep[0m=[35m86598[0m
[2m2023-10-22 15:59:36[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_86598.pt[0m


Epoch 19/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 16:01:51[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=19 step=91409[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002521592399678045, 'time_algorithm_update': 0.017106801153740796, 'loss': -121.69712969816244, 'time_step': 0.017432306492261743, 'observation_error': 0.5595760787709312, 'reward_error': 0.00023689184995221907, 'variance': 0.39160589194592216}[0m [36mstep[0m=[35m91409[0m
[2m2023-10-22 16:01:51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_91409.pt[0m


Epoch 20/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 16:04:06[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=20 step=96220[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002530067632560595, 'time_algorithm_update': 0.017047300182145095, 'loss': -123.41263026870033, 'time_step': 0.01737338682391544, 'observation_error': 0.5399458931360495, 'reward_error': 0.00023285985042903503, 'variance': 0.3824982010865216}[0m [36mstep[0m=[35m96220[0m
[2m2023-10-22 16:04:06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_96220.pt[0m


Epoch 21/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 16:06:20[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=21 step=101031[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002497510686036679, 'time_algorithm_update': 0.01700611541683028, 'loss': -125.21593969959996, 'time_step': 0.017327838071887744, 'observation_error': 0.5102047047192588, 'reward_error': 0.00021218904664475351, 'variance': 0.36133963765877264}[0m [36mstep[0m=[35m101031[0m
[2m2023-10-22 16:06:20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_101031.pt[0m


Epoch 22/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 16:08:34[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=22 step=105842[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002488187037840427, 'time_algorithm_update': 0.01703318140160341, 'loss': -126.78359365453345, 'time_step': 0.017354525639643615, 'observation_error': 0.47886161896043256, 'reward_error': 0.00020581532948285754, 'variance': 0.3424213734812943}[0m [36mstep[0m=[35m105842[0m
[2m2023-10-22 16:08:34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_105842.pt[0m


Epoch 23/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 16:10:47[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=23 step=110653[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002532145060711902, 'time_algorithm_update': 0.016912984243427437, 'loss': -128.30132129885655, 'time_step': 0.017239329919476094, 'observation_error': 0.4361456210526189, 'reward_error': 0.00018222405103561067, 'variance': 0.309592073871037}[0m [36mstep[0m=[35m110653[0m
[2m2023-10-22 16:10:47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_110653.pt[0m


Epoch 24/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 16:13:02[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=24 step=115464[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025160330988666514, 'time_algorithm_update': 0.017064652703601788, 'loss': -129.987264697801, 'time_step': 0.017389152184317698, 'observation_error': 0.42333786658975703, 'reward_error': 0.00017781892095908506, 'variance': 0.312055033064548}[0m [36mstep[0m=[35m115464[0m
[2m2023-10-22 16:13:02[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_115464.pt[0m


Epoch 25/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 16:15:16[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=25 step=120275[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002495616618671627, 'time_algorithm_update': 0.017033443012844127, 'loss': -131.38595478927687, 'time_step': 0.01735632673813434, 'observation_error': 0.3987298453846969, 'reward_error': 0.00017788390820211173, 'variance': 0.30209059376391173}[0m [36mstep[0m=[35m120275[0m
[2m2023-10-22 16:15:16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_120275.pt[0m


Epoch 26/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 16:17:30[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=26 step=125086[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002572157853260726, 'time_algorithm_update': 0.017082645104645543, 'loss': -132.9944677565009, 'time_step': 0.01741288070543916, 'observation_error': 0.35900308608610815, 'reward_error': 0.00016801245615189538, 'variance': 0.2904305333179568}[0m [36mstep[0m=[35m125086[0m
[2m2023-10-22 16:17:30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_125086.pt[0m


Epoch 27/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 16:19:45[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=27 step=129897[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025311603637327403, 'time_algorithm_update': 0.016968764675999105, 'loss': -134.44969611603017, 'time_step': 0.01729475968693329, 'observation_error': 0.3130940516434062, 'reward_error': 0.00015422344857107714, 'variance': 0.2752228619059098}[0m [36mstep[0m=[35m129897[0m
[2m2023-10-22 16:19:45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_129897.pt[0m


Epoch 28/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 16:21:59[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=28 step=134708[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025254707280921975, 'time_algorithm_update': 0.017001926812231602, 'loss': -136.1191474955782, 'time_step': 0.017327320647571715, 'observation_error': 0.2837423632142897, 'reward_error': 0.00016128577063974855, 'variance': 0.26005930368411495}[0m [36mstep[0m=[35m134708[0m
[2m2023-10-22 16:21:59[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_134708.pt[0m


Epoch 29/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 16:24:12[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=29 step=139519[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025445541258141744, 'time_algorithm_update': 0.016952311910876264, 'loss': -137.56204029686657, 'time_step': 0.017280209265189368, 'observation_error': 0.26904146316486754, 'reward_error': 0.0001528598696234666, 'variance': 0.2543767339974423}[0m [36mstep[0m=[35m139519[0m
[2m2023-10-22 16:24:12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_139519.pt[0m


Epoch 30/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 16:26:27[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=30 step=144330[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000254661222674747, 'time_algorithm_update': 0.017093047954961332, 'loss': -138.78737823226254, 'time_step': 0.01742106266017764, 'observation_error': 0.2372760575460989, 'reward_error': 0.00014011162376503157, 'variance': 0.23533267592586748}[0m [36mstep[0m=[35m144330[0m
[2m2023-10-22 16:26:27[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_144330.pt[0m


Epoch 31/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 16:28:42[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=31 step=149141[0m [36mepoch[0m=[35m31[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002512219690082227, 'time_algorithm_update': 0.01707409181953641, 'loss': -140.29942313055318, 'time_step': 0.017398445900104517, 'observation_error': 0.21390898396748453, 'reward_error': 0.00013639505400214877, 'variance': 0.213027907034265}[0m [36mstep[0m=[35m149141[0m
[2m2023-10-22 16:28:42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_149141.pt[0m


Epoch 32/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 16:30:56[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=32 step=153952[0m [36mepoch[0m=[35m32[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002506833838663238, 'time_algorithm_update': 0.016950656113419407, 'loss': -141.96356444278544, 'time_step': 0.017274420862509094, 'observation_error': 0.18466638835196597, 'reward_error': 0.00011964906536130937, 'variance': 0.1882755773420838}[0m [36mstep[0m=[35m153952[0m
[2m2023-10-22 16:30:56[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_153952.pt[0m


Epoch 33/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 16:33:10[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=33 step=158763[0m [36mepoch[0m=[35m33[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025107775822768116, 'time_algorithm_update': 0.0169373100251424, 'loss': -143.22051161942673, 'time_step': 0.017260875803000515, 'observation_error': 0.16794246747219485, 'reward_error': 0.00011866041244275096, 'variance': 0.17799305485181752}[0m [36mstep[0m=[35m158763[0m
[2m2023-10-22 16:33:10[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_158763.pt[0m


Epoch 34/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 16:35:23[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=34 step=163574[0m [36mepoch[0m=[35m34[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002497543393636389, 'time_algorithm_update': 0.016959663191696037, 'loss': -144.3153557849758, 'time_step': 0.01728162163881323, 'observation_error': 0.15786751610304567, 'reward_error': 0.00010445587454445594, 'variance': 0.15113380705432325}[0m [36mstep[0m=[35m163574[0m
[2m2023-10-22 16:35:23[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_163574.pt[0m


Epoch 35/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 16:37:39[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=35 step=168385[0m [36mepoch[0m=[35m35[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000252741683027497, 'time_algorithm_update': 0.017126331158869728, 'loss': -145.50613952332242, 'time_step': 0.017453777346534704, 'observation_error': 0.15523171172154915, 'reward_error': 9.550661513683977e-05, 'variance': 0.14832350332926236}[0m [36mstep[0m=[35m168385[0m
[2m2023-10-22 16:37:39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_168385.pt[0m


Epoch 36/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 16:39:53[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=36 step=173196[0m [36mepoch[0m=[35m36[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025332263937811176, 'time_algorithm_update': 0.017042829747062244, 'loss': -146.6779289063236, 'time_step': 0.0173695597374075, 'observation_error': 0.14921937934198576, 'reward_error': 9.613414745199247e-05, 'variance': 0.13743101368376692}[0m [36mstep[0m=[35m173196[0m
[2m2023-10-22 16:39:53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_173196.pt[0m


Epoch 37/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 16:42:08[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=37 step=178007[0m [36mepoch[0m=[35m37[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025467202609404533, 'time_algorithm_update': 0.017149334661530943, 'loss': -147.82815737140524, 'time_step': 0.01747833832562577, 'observation_error': 0.14352587105031334, 'reward_error': 9.599174817296561e-05, 'variance': 0.11980790078640727}[0m [36mstep[0m=[35m178007[0m
[2m2023-10-22 16:42:09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_178007.pt[0m


Epoch 38/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 16:44:26[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=38 step=182818[0m [36mepoch[0m=[35m38[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025492323037121554, 'time_algorithm_update': 0.017040661084530534, 'loss': -148.85624984871248, 'time_step': 0.017368640604298668, 'observation_error': 0.1309027836001714, 'reward_error': 8.335977479882338e-05, 'variance': 0.10173675226175917}[0m [36mstep[0m=[35m182818[0m
[2m2023-10-22 16:44:26[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_182818.pt[0m


Epoch 39/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 16:46:39[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=39 step=187629[0m [36mepoch[0m=[35m39[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025347031914650143, 'time_algorithm_update': 0.017027171082814797, 'loss': -149.43516563164968, 'time_step': 0.01735409107458019, 'observation_error': 0.12298962776868778, 'reward_error': 7.760556113684389e-05, 'variance': 0.08582442906414536}[0m [36mstep[0m=[35m187629[0m
[2m2023-10-22 16:46:39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_187629.pt[0m


Epoch 40/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 16:48:54[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=40 step=192440[0m [36mepoch[0m=[35m40[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002514080554277876, 'time_algorithm_update': 0.016988498558499564, 'loss': -150.39025170360526, 'time_step': 0.01731280238830084, 'observation_error': 0.12142669949215586, 'reward_error': 8.311738009535317e-05, 'variance': 0.07204176918141333}[0m [36mstep[0m=[35m192440[0m
[2m2023-10-22 16:48:54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_192440.pt[0m


Epoch 41/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 16:51:08[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=41 step=197251[0m [36mepoch[0m=[35m41[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025360144688715884, 'time_algorithm_update': 0.01693872561996926, 'loss': -151.41909140922056, 'time_step': 0.017265229878319546, 'observation_error': 0.11797704515841301, 'reward_error': 7.360027286551439e-05, 'variance': 0.07127287085048047}[0m [36mstep[0m=[35m197251[0m
[2m2023-10-22 16:51:08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_197251.pt[0m


Epoch 42/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 16:53:21[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=42 step=202062[0m [36mepoch[0m=[35m42[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000254758106549647, 'time_algorithm_update': 0.01696608503155737, 'loss': -152.08452670268423, 'time_step': 0.017294026342902207, 'observation_error': 0.11469650116856353, 'reward_error': 6.187351078241385e-05, 'variance': 0.06861496463367164}[0m [36mstep[0m=[35m202062[0m
[2m2023-10-22 16:53:21[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_202062.pt[0m


Epoch 43/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 16:55:36[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=43 step=206873[0m [36mepoch[0m=[35m43[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002513147892116435, 'time_algorithm_update': 0.016980113023731373, 'loss': -152.52598939053954, 'time_step': 0.01730496282266297, 'observation_error': 0.11429508198782645, 'reward_error': 6.377382988999006e-05, 'variance': 0.06760442422498157}[0m [36mstep[0m=[35m206873[0m
[2m2023-10-22 16:55:36[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_206873.pt[0m


Epoch 44/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 16:57:50[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=44 step=211684[0m [36mepoch[0m=[35m44[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002479878316374584, 'time_algorithm_update': 0.017074016938955862, 'loss': -153.59987207609757, 'time_step': 0.017395143572344052, 'observation_error': 0.11728911475754879, 'reward_error': 5.464252956716026e-05, 'variance': 0.06759181432711653}[0m [36mstep[0m=[35m211684[0m
[2m2023-10-22 16:57:51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_211684.pt[0m


Epoch 45/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 17:00:04[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=45 step=216495[0m [36mepoch[0m=[35m45[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000254861531944489, 'time_algorithm_update': 0.017018584544979294, 'loss': -154.13897672727336, 'time_step': 0.017346586464497535, 'observation_error': 0.11376320287815406, 'reward_error': 5.203099970520251e-05, 'variance': 0.06730015977386826}[0m [36mstep[0m=[35m216495[0m
[2m2023-10-22 17:00:04[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_216495.pt[0m


Epoch 46/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 17:02:21[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=46 step=221306[0m [36mepoch[0m=[35m46[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002586442649649424, 'time_algorithm_update': 0.01751384004637695, 'loss': -155.09859588819688, 'time_step': 0.017847082466723344, 'observation_error': 0.11449955344667972, 'reward_error': 4.9104759584828246e-05, 'variance': 0.07138650003557026}[0m [36mstep[0m=[35m221306[0m
[2m2023-10-22 17:02:21[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_221306.pt[0m


Epoch 47/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 17:04:35[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=47 step=226117[0m [36mepoch[0m=[35m47[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025227807758008584, 'time_algorithm_update': 0.016994587722426267, 'loss': -155.7878382145008, 'time_step': 0.017320566627345445, 'observation_error': 0.10740083638681297, 'reward_error': 5.0059895025697826e-05, 'variance': 0.0676249122140337}[0m [36mstep[0m=[35m226117[0m
[2m2023-10-22 17:04:35[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_226117.pt[0m


Epoch 48/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 17:06:50[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=48 step=230928[0m [36mepoch[0m=[35m48[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025008017643651207, 'time_algorithm_update': 0.017048166487524698, 'loss': -156.3141315597379, 'time_step': 0.017370843659367044, 'observation_error': 0.11050870899845792, 'reward_error': 4.9962683906572284e-05, 'variance': 0.0647645943603831}[0m [36mstep[0m=[35m230928[0m
[2m2023-10-22 17:06:50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_230928.pt[0m


Epoch 49/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 17:09:05[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=49 step=235739[0m [36mepoch[0m=[35m49[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002538335717311646, 'time_algorithm_update': 0.017066279807573446, 'loss': -156.42496221443642, 'time_step': 0.017394614353469343, 'observation_error': 0.12190756090153507, 'reward_error': 4.920567539306941e-05, 'variance': 0.09588450559081026}[0m [36mstep[0m=[35m235739[0m
[2m2023-10-22 17:09:05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_235739.pt[0m


Epoch 50/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 17:11:19[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=50 step=240550[0m [36mepoch[0m=[35m50[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002587440231440593, 'time_algorithm_update': 0.016932935334124155, 'loss': -157.4364445630242, 'time_step': 0.017264881294597783, 'observation_error': 0.11275890314260667, 'reward_error': 4.212667960740302e-05, 'variance': 0.06541873518890581}[0m [36mstep[0m=[35m240550[0m
[2m2023-10-22 17:11:19[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_240550.pt[0m


Epoch 51/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 17:13:32[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=51 step=245361[0m [36mepoch[0m=[35m51[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025517156034416874, 'time_algorithm_update': 0.016960661566398715, 'loss': -157.51574759403056, 'time_step': 0.017290510771503023, 'observation_error': 0.1081907707615275, 'reward_error': 3.9876083186517254e-05, 'variance': 0.06463544304401672}[0m [36mstep[0m=[35m245361[0m
[2m2023-10-22 17:13:32[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_245361.pt[0m


Epoch 52/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 17:15:47[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=52 step=250172[0m [36mepoch[0m=[35m52[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025051860694354005, 'time_algorithm_update': 0.017005140928586786, 'loss': -157.915353993868, 'time_step': 0.017328147010034097, 'observation_error': 0.1060562717320886, 'reward_error': 3.7306156588460655e-05, 'variance': 0.06440992497949452}[0m [36mstep[0m=[35m250172[0m
[2m2023-10-22 17:15:47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_250172.pt[0m


Epoch 53/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 17:18:00[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=53 step=254983[0m [36mepoch[0m=[35m53[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025299972616642485, 'time_algorithm_update': 0.017044734072719933, 'loss': -158.37762231614678, 'time_step': 0.017371516940351387, 'observation_error': 0.1065666321648727, 'reward_error': 3.669650788209539e-05, 'variance': 0.06521087256823223}[0m [36mstep[0m=[35m254983[0m
[2m2023-10-22 17:18:00[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_254983.pt[0m


Epoch 54/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 17:20:16[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=54 step=259794[0m [36mepoch[0m=[35m54[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002535552597918101, 'time_algorithm_update': 0.01708444868098473, 'loss': -159.39884389093527, 'time_step': 0.017411479333462473, 'observation_error': 0.11679792697704713, 'reward_error': 3.386506236942977e-05, 'variance': 0.0827009690284446}[0m [36mstep[0m=[35m259794[0m
[2m2023-10-22 17:20:16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_259794.pt[0m


Epoch 55/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 17:22:30[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=55 step=264605[0m [36mepoch[0m=[35m55[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00026096704967104703, 'time_algorithm_update': 0.017089614400346273, 'loss': -159.98577010559703, 'time_step': 0.017425201162680398, 'observation_error': 0.1151205798105093, 'reward_error': 3.103368806088346e-05, 'variance': 0.06561621050697884}[0m [36mstep[0m=[35m264605[0m
[2m2023-10-22 17:22:30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_264605.pt[0m


Epoch 56/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 17:24:47[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=56 step=269416[0m [36mepoch[0m=[35m56[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002523679243653511, 'time_algorithm_update': 0.017068664290706276, 'loss': -159.9583833790004, 'time_step': 0.017394307496715698, 'observation_error': 0.11295550995236892, 'reward_error': 3.041428309899712e-05, 'variance': 0.0640692012239684}[0m [36mstep[0m=[35m269416[0m
[2m2023-10-22 17:24:47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_269416.pt[0m


Epoch 57/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 17:27:02[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=57 step=274227[0m [36mepoch[0m=[35m57[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025516501882422664, 'time_algorithm_update': 0.017162514684403964, 'loss': -160.46412413919623, 'time_step': 0.017491867031334492, 'observation_error': 0.10796937425016787, 'reward_error': 2.9086950831784905e-05, 'variance': 0.060939846631584575}[0m [36mstep[0m=[35m274227[0m
[2m2023-10-22 17:27:02[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_274227.pt[0m


Epoch 58/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 17:29:15[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=58 step=279038[0m [36mepoch[0m=[35m58[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002492822596744846, 'time_algorithm_update': 0.016941645120142207, 'loss': -160.7097322344656, 'time_step': 0.017264243050390708, 'observation_error': 0.11062908073147461, 'reward_error': 2.8309228252330224e-05, 'variance': 0.06481207040764629}[0m [36mstep[0m=[35m279038[0m
[2m2023-10-22 17:29:15[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_279038.pt[0m


Epoch 59/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 17:31:29[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=59 step=283849[0m [36mepoch[0m=[35m59[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002528260289891746, 'time_algorithm_update': 0.017000650026475633, 'loss': -160.25941470377512, 'time_step': 0.01732753909469221, 'observation_error': 0.11187407499079494, 'reward_error': 2.7657005859068825e-05, 'variance': 0.06432913312035055}[0m [36mstep[0m=[35m283849[0m
[2m2023-10-22 17:31:29[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_283849.pt[0m


Epoch 60/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 17:33:44[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=60 step=288660[0m [36mepoch[0m=[35m60[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025196666158526683, 'time_algorithm_update': 0.0170245073461602, 'loss': -161.03791878382498, 'time_step': 0.017350285594910846, 'observation_error': 0.10639245198674588, 'reward_error': 2.873724488923766e-05, 'variance': 0.059103095349715}[0m [36mstep[0m=[35m288660[0m
[2m2023-10-22 17:33:44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_288660.pt[0m


Epoch 61/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 17:35:58[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=61 step=293471[0m [36mepoch[0m=[35m61[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002520691949546622, 'time_algorithm_update': 0.017004067227290837, 'loss': -161.69756381959186, 'time_step': 0.017330640072486574, 'observation_error': 0.10782282644292485, 'reward_error': 2.4987555379633436e-05, 'variance': 0.06140585139002004}[0m [36mstep[0m=[35m293471[0m
[2m2023-10-22 17:35:58[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_293471.pt[0m


Epoch 62/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 17:38:12[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=62 step=298282[0m [36mepoch[0m=[35m62[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025376424153117225, 'time_algorithm_update': 0.017071990306698044, 'loss': -161.79996785203724, 'time_step': 0.017399497548549147, 'observation_error': 0.11046563116928908, 'reward_error': 2.7900265981625685e-05, 'variance': 0.0630427414364749}[0m [36mstep[0m=[35m298282[0m
[2m2023-10-22 17:38:12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_298282.pt[0m


Epoch 63/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 17:40:27[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=63 step=303093[0m [36mepoch[0m=[35m63[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025437567541788085, 'time_algorithm_update': 0.01712145891588013, 'loss': -162.36175709488336, 'time_step': 0.01745123819610082, 'observation_error': 0.1102379841185044, 'reward_error': 2.3473439638970323e-05, 'variance': 0.05968770536864673}[0m [36mstep[0m=[35m303093[0m
[2m2023-10-22 17:40:27[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_303093.pt[0m


Epoch 64/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 17:42:42[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=64 step=307904[0m [36mepoch[0m=[35m64[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002513857547916213, 'time_algorithm_update': 0.01715479355992262, 'loss': -162.1017220008896, 'time_step': 0.017480005669856462, 'observation_error': 0.11624989913887145, 'reward_error': 2.3489142684159042e-05, 'variance': 0.07468554055325764}[0m [36mstep[0m=[35m307904[0m
[2m2023-10-22 17:42:42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_307904.pt[0m


Epoch 65/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 17:44:57[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=65 step=312715[0m [36mepoch[0m=[35m65[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002549982100657033, 'time_algorithm_update': 0.017074177503536257, 'loss': -161.94182477604943, 'time_step': 0.017403767823033163, 'observation_error': 0.1061505731017005, 'reward_error': 2.168229987322636e-05, 'variance': 0.05875874804825384}[0m [36mstep[0m=[35m312715[0m
[2m2023-10-22 17:44:57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_312715.pt[0m


Epoch 66/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 17:47:12[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=66 step=317526[0m [36mepoch[0m=[35m66[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002559901919193462, 'time_algorithm_update': 0.01707999559084112, 'loss': -163.14092736332412, 'time_step': 0.017410069933256767, 'observation_error': 0.10217707280322103, 'reward_error': 2.252421079158277e-05, 'variance': 0.0573948875994925}[0m [36mstep[0m=[35m317526[0m
[2m2023-10-22 17:47:12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_317526.pt[0m


Epoch 67/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 17:49:25[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=67 step=322337[0m [36mepoch[0m=[35m67[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002566168397956171, 'time_algorithm_update': 0.016907434110655355, 'loss': -163.12677235267336, 'time_step': 0.017236773622330843, 'observation_error': 0.10574126410639981, 'reward_error': 2.036860571330126e-05, 'variance': 0.05902979428627918}[0m [36mstep[0m=[35m322337[0m
[2m2023-10-22 17:49:25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_322337.pt[0m


Epoch 68/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 17:51:40[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=68 step=327148[0m [36mepoch[0m=[35m68[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002520859947672408, 'time_algorithm_update': 0.017012642119238565, 'loss': -163.42293562975328, 'time_step': 0.01733928716893851, 'observation_error': 0.11141802225524082, 'reward_error': 2.0335880714179124e-05, 'variance': 0.059671376377804786}[0m [36mstep[0m=[35m327148[0m
[2m2023-10-22 17:51:40[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_327148.pt[0m


Epoch 69/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 17:53:53[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=69 step=331959[0m [36mepoch[0m=[35m69[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002579433807487226, 'time_algorithm_update': 0.017031677248472484, 'loss': -163.6960761851864, 'time_step': 0.017364033441537634, 'observation_error': 0.10668206554065122, 'reward_error': 1.955080386519618e-05, 'variance': 0.05672443829584515}[0m [36mstep[0m=[35m331959[0m
[2m2023-10-22 17:53:53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_331959.pt[0m


Epoch 70/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 17:56:08[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=70 step=336770[0m [36mepoch[0m=[35m70[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002518342453634087, 'time_algorithm_update': 0.0170377883165796, 'loss': -163.8193516672785, 'time_step': 0.01736377242498055, 'observation_error': 0.12330226417147738, 'reward_error': 1.8935453634991083e-05, 'variance': 0.12382892902494078}[0m [36mstep[0m=[35m336770[0m
[2m2023-10-22 17:56:08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_336770.pt[0m


Epoch 71/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 17:58:23[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=71 step=341581[0m [36mepoch[0m=[35m71[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002517735876330366, 'time_algorithm_update': 0.017095368162705037, 'loss': -164.52351571715218, 'time_step': 0.01742065703638426, 'observation_error': 0.10915402409581326, 'reward_error': 1.8423519031450326e-05, 'variance': 0.06203082635042762}[0m [36mstep[0m=[35m341581[0m
[2m2023-10-22 17:58:23[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_341581.pt[0m


Epoch 72/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 18:00:38[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=72 step=346392[0m [36mepoch[0m=[35m72[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025266610864937815, 'time_algorithm_update': 0.01717573405551058, 'loss': -165.00734866817635, 'time_step': 0.017502192324993393, 'observation_error': 0.1016401296387042, 'reward_error': 1.8456480040130795e-05, 'variance': 0.05672112823289799}[0m [36mstep[0m=[35m346392[0m
[2m2023-10-22 18:00:38[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_346392.pt[0m


Epoch 73/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 18:02:53[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=73 step=351203[0m [36mepoch[0m=[35m73[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002525637735078598, 'time_algorithm_update': 0.017079497939755828, 'loss': -164.66844908112276, 'time_step': 0.017407389694131404, 'observation_error': 0.10620095966262255, 'reward_error': 1.5704858532983693e-05, 'variance': 0.05709329636786556}[0m [36mstep[0m=[35m351203[0m
[2m2023-10-22 18:02:53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_351203.pt[0m


Epoch 74/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 18:05:08[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=74 step=356014[0m [36mepoch[0m=[35m74[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002530996825734188, 'time_algorithm_update': 0.01706647793633654, 'loss': -164.77860161173345, 'time_step': 0.01739423127809698, 'observation_error': 0.11272207557527851, 'reward_error': 1.7608936787910297e-05, 'variance': 0.06520770000537401}[0m [36mstep[0m=[35m356014[0m
[2m2023-10-22 18:05:08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_356014.pt[0m


Epoch 75/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 18:07:23[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=75 step=360825[0m [36mepoch[0m=[35m75[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002554721233627202, 'time_algorithm_update': 0.017033758690738303, 'loss': -165.00041754997537, 'time_step': 0.017363768460423007, 'observation_error': 0.10654635422183754, 'reward_error': 1.5880532662658468e-05, 'variance': 0.05764426691218712}[0m [36mstep[0m=[35m360825[0m
[2m2023-10-22 18:07:23[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_360825.pt[0m


Epoch 76/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 18:09:37[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=76 step=365636[0m [36mepoch[0m=[35m76[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002514032484017695, 'time_algorithm_update': 0.01692287422866837, 'loss': -166.0447171765686, 'time_step': 0.01724787220293805, 'observation_error': 0.10327879766728919, 'reward_error': 1.564427979690774e-05, 'variance': 0.06168254542801552}[0m [36mstep[0m=[35m365636[0m
[2m2023-10-22 18:09:37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_365636.pt[0m


Epoch 77/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 18:11:52[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=77 step=370447[0m [36mepoch[0m=[35m77[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025243661032474304, 'time_algorithm_update': 0.017086716011442234, 'loss': -166.1109498721154, 'time_step': 0.01741380400133341, 'observation_error': 0.10459081837688422, 'reward_error': 1.7100310606597512e-05, 'variance': 0.05643460421082343}[0m [36mstep[0m=[35m370447[0m
[2m2023-10-22 18:11:52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_370447.pt[0m


Epoch 78/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 18:14:07[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=78 step=375258[0m [36mepoch[0m=[35m78[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002505417500481836, 'time_algorithm_update': 0.017038144978087352, 'loss': -166.07103474406287, 'time_step': 0.01736193044199019, 'observation_error': 0.10327449642635118, 'reward_error': 1.441949527751648e-05, 'variance': 0.05687397772571731}[0m [36mstep[0m=[35m375258[0m
[2m2023-10-22 18:14:07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_375258.pt[0m


Epoch 79/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 18:16:22[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=79 step=380069[0m [36mepoch[0m=[35m79[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000253421307303905, 'time_algorithm_update': 0.017080819723239885, 'loss': -166.36145535795933, 'time_step': 0.01740917453793621, 'observation_error': 0.1078145944002046, 'reward_error': 1.4731318212838217e-05, 'variance': 0.05692838516735958}[0m [36mstep[0m=[35m380069[0m
[2m2023-10-22 18:16:22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_380069.pt[0m


Epoch 80/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 18:18:37[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=80 step=384880[0m [36mepoch[0m=[35m80[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002493007939809872, 'time_algorithm_update': 0.017102356835180747, 'loss': -166.46873535571734, 'time_step': 0.017425456975755708, 'observation_error': 0.10458100821044228, 'reward_error': 1.3459192465840707e-05, 'variance': 0.05832000527734151}[0m [36mstep[0m=[35m384880[0m
[2m2023-10-22 18:18:37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_384880.pt[0m


Epoch 81/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 18:20:51[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=81 step=389691[0m [36mepoch[0m=[35m81[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025593870222828685, 'time_algorithm_update': 0.017045323156413505, 'loss': -166.70846668941962, 'time_step': 0.017376122616403342, 'observation_error': 0.10174053106460144, 'reward_error': 1.415063572160403e-05, 'variance': 0.05842711718765664}[0m [36mstep[0m=[35m389691[0m
[2m2023-10-22 18:20:51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_389691.pt[0m


Epoch 82/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 18:23:05[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=82 step=394502[0m [36mepoch[0m=[35m82[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002531963682204417, 'time_algorithm_update': 0.01709322849100034, 'loss': -165.63978121984874, 'time_step': 0.017422102464506616, 'observation_error': 0.10031818287905733, 'reward_error': 1.4044553626879963e-05, 'variance': 0.0538242016226884}[0m [36mstep[0m=[35m394502[0m
[2m2023-10-22 18:23:05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_394502.pt[0m


Epoch 83/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 18:25:19[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=83 step=399313[0m [36mepoch[0m=[35m83[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002547518128145512, 'time_algorithm_update': 0.017039194049569578, 'loss': -166.17336581279125, 'time_step': 0.01736848891041577, 'observation_error': 0.09733852661511094, 'reward_error': 1.1888903111373691e-05, 'variance': 0.053064148710961066}[0m [36mstep[0m=[35m399313[0m
[2m2023-10-22 18:25:19[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_399313.pt[0m


Epoch 84/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 18:27:34[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=84 step=404124[0m [36mepoch[0m=[35m84[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002535096673800925, 'time_algorithm_update': 0.017129289709934447, 'loss': -166.44821366805888, 'time_step': 0.017457892953717667, 'observation_error': 0.09830539127510801, 'reward_error': 1.225464507235051e-05, 'variance': 0.04949175800979402}[0m [36mstep[0m=[35m404124[0m
[2m2023-10-22 18:27:34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_404124.pt[0m


Epoch 85/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 18:29:49[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=85 step=408935[0m [36mepoch[0m=[35m85[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025460244810920674, 'time_algorithm_update': 0.017071168899932588, 'loss': -165.87008861542938, 'time_step': 0.01740227605914455, 'observation_error': 0.1065494137739928, 'reward_error': 1.1737061884461529e-05, 'variance': 0.07561673628908758}[0m [36mstep[0m=[35m408935[0m
[2m2023-10-22 18:29:49[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_408935.pt[0m


Epoch 86/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 18:32:03[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=86 step=413746[0m [36mepoch[0m=[35m86[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002554573553858812, 'time_algorithm_update': 0.017033924657028347, 'loss': -166.66101554088795, 'time_step': 0.017363392422140276, 'observation_error': 0.09791309257436363, 'reward_error': 1.2221106396520848e-05, 'variance': 0.05062586046155216}[0m [36mstep[0m=[35m413746[0m
[2m2023-10-22 18:32:03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_413746.pt[0m


Epoch 87/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 18:34:17[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=87 step=418557[0m [36mepoch[0m=[35m87[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025146846537331337, 'time_algorithm_update': 0.017023193095335468, 'loss': -167.4048768441758, 'time_step': 0.01734912497069082, 'observation_error': 0.10052354484795431, 'reward_error': 1.2192328739806169e-05, 'variance': 0.052951559877806594}[0m [36mstep[0m=[35m418557[0m
[2m2023-10-22 18:34:17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_418557.pt[0m


Epoch 88/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 18:36:32[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=88 step=423368[0m [36mepoch[0m=[35m88[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025880894277378766, 'time_algorithm_update': 0.017044676933534376, 'loss': -167.3517730314849, 'time_step': 0.017378422307561772, 'observation_error': 0.10214279373219202, 'reward_error': 1.1028274326976193e-05, 'variance': 0.05555029954011412}[0m [36mstep[0m=[35m423368[0m
[2m2023-10-22 18:36:32[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_423368.pt[0m


Epoch 89/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 18:38:47[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=89 step=428179[0m [36mepoch[0m=[35m89[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002559099096291478, 'time_algorithm_update': 0.017076296311756896, 'loss': -167.80536933610307, 'time_step': 0.017407389941916252, 'observation_error': 0.10234678592066622, 'reward_error': 1.1108125419084385e-05, 'variance': 0.05368784036932829}[0m [36mstep[0m=[35m428179[0m
[2m2023-10-22 18:38:47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_428179.pt[0m


Epoch 90/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 18:41:01[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=90 step=432990[0m [36mepoch[0m=[35m90[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025223198959867563, 'time_algorithm_update': 0.01706562808387073, 'loss': -168.0436578329178, 'time_step': 0.017392106820381843, 'observation_error': 0.10519260617953292, 'reward_error': 1.0143727533062075e-05, 'variance': 0.05110634773362382}[0m [36mstep[0m=[35m432990[0m
[2m2023-10-22 18:41:01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_432990.pt[0m


Epoch 91/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 18:43:16[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=91 step=437801[0m [36mepoch[0m=[35m91[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002555166750780834, 'time_algorithm_update': 0.017066761550871605, 'loss': -167.596649096974, 'time_step': 0.017397377055391555, 'observation_error': 0.09851593829382749, 'reward_error': 1.057728877897003e-05, 'variance': 0.051837503270565806}[0m [36mstep[0m=[35m437801[0m
[2m2023-10-22 18:43:16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_437801.pt[0m


Epoch 92/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 18:45:31[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=92 step=442612[0m [36mepoch[0m=[35m92[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025112969393146383, 'time_algorithm_update': 0.01709457207955087, 'loss': -169.23213781762385, 'time_step': 0.017420528733390853, 'observation_error': 0.09883765850868759, 'reward_error': 1.0644067704310559e-05, 'variance': 0.049581464066289854}[0m [36mstep[0m=[35m442612[0m
[2m2023-10-22 18:45:31[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_442612.pt[0m


Epoch 93/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 18:47:46[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=93 step=447423[0m [36mepoch[0m=[35m93[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002529261340670763, 'time_algorithm_update': 0.01712339138989636, 'loss': -168.34942839929155, 'time_step': 0.017451373437069925, 'observation_error': 0.09983198391462421, 'reward_error': 9.514768453622784e-06, 'variance': 0.05026329368241947}[0m [36mstep[0m=[35m447423[0m
[2m2023-10-22 18:47:46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_447423.pt[0m


Epoch 94/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 18:50:01[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=94 step=452234[0m [36mepoch[0m=[35m94[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025278544183135205, 'time_algorithm_update': 0.017036330945227652, 'loss': -168.52547832973482, 'time_step': 0.017362844916743912, 'observation_error': 0.10341524488434117, 'reward_error': 9.337202424039789e-06, 'variance': 0.05152108345609298}[0m [36mstep[0m=[35m452234[0m
[2m2023-10-22 18:50:01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_452234.pt[0m


Epoch 95/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 18:52:15[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=95 step=457045[0m [36mepoch[0m=[35m95[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002542050507727246, 'time_algorithm_update': 0.017052270101921098, 'loss': -168.57605940644942, 'time_step': 0.017381641974297514, 'observation_error': 0.10084317811401415, 'reward_error': 9.003239708692481e-06, 'variance': 0.059413376062594406}[0m [36mstep[0m=[35m457045[0m
[2m2023-10-22 18:52:15[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_457045.pt[0m


Epoch 96/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 18:54:29[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=96 step=461856[0m [36mepoch[0m=[35m96[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025295448065349206, 'time_algorithm_update': 0.017038778960395073, 'loss': -169.4844893108805, 'time_step': 0.01736616959469751, 'observation_error': 0.12297982277496178, 'reward_error': 8.8871511780203e-06, 'variance': 0.12733206500836716}[0m [36mstep[0m=[35m461856[0m
[2m2023-10-22 18:54:29[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_461856.pt[0m


Epoch 97/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 18:56:43[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=97 step=466667[0m [36mepoch[0m=[35m97[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002554397131048253, 'time_algorithm_update': 0.017081086934418125, 'loss': -169.95369062025267, 'time_step': 0.01741152358783602, 'observation_error': 0.0972222576841269, 'reward_error': 8.91747667483165e-06, 'variance': 0.048911529109217325}[0m [36mstep[0m=[35m466667[0m
[2m2023-10-22 18:56:43[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_466667.pt[0m


Epoch 98/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 18:58:57[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=98 step=471478[0m [36mepoch[0m=[35m98[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002518787475218027, 'time_algorithm_update': 0.01700541805115888, 'loss': -169.793736654665, 'time_step': 0.01733218109600263, 'observation_error': 0.09946431987360776, 'reward_error': 9.797447985920796e-06, 'variance': 0.049045909698080425}[0m [36mstep[0m=[35m471478[0m
[2m2023-10-22 18:58:57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_471478.pt[0m


Epoch 99/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 19:01:12[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=99 step=476289[0m [36mepoch[0m=[35m99[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025319071872594625, 'time_algorithm_update': 0.017060706333468845, 'loss': -169.42349036095223, 'time_step': 0.01738923162413942, 'observation_error': 0.13280601751685417, 'reward_error': 9.202625165181117e-06, 'variance': 0.2010845124122297}[0m [36mstep[0m=[35m476289[0m
[2m2023-10-22 19:01:12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_476289.pt[0m


Epoch 100/100:   0%|          | 0/4811 [00:00<?, ?it/s]

[2m2023-10-22 19:03:27[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_default_20231022151920: epoch=100 step=481100[0m [36mepoch[0m=[35m100[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002546628084977633, 'time_algorithm_update': 0.017076166571611377, 'loss': -169.28865254968497, 'time_step': 0.01740551178233833, 'observation_error': 0.09620699387020272, 'reward_error': 8.344311619764974e-06, 'variance': 0.051795174158562454}[0m [36mstep[0m=[35m481100[0m
[2m2023-10-22 19:03:27[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920/model_481100.pt[0m
Using SymmetryEncoderFactory
[2m2023-10-22 19:03:27[0m [[32m[1mdebug    [0m] [1mRoundIterator is selected.[0m
[2m2023-10-22 19:03:27[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022190327[0m
[2m2023-10-22 19:03:27[0m [[32m[1mdebug    [0m] [1mBuilding models...[0m
[2m2023-1

Epoch 1/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 19:05:46[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022190327: epoch=1 step=4812[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002546806386977756, 'time_algorithm_update': 0.017314200191228268, 'loss': 67.55117154604169, 'time_step': 0.01765825752407337, 'observation_error': 1.7729882563186647, 'reward_error': 0.027141206179212524, 'variance': 1.3857992146105422}[0m [36mstep[0m=[35m4812[0m
[2m2023-10-22 19:05:46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022190327/model_4812.pt[0m


Epoch 2/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 19:08:04[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022190327: epoch=2 step=9624[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002527290944347556, 'time_algorithm_update': 0.017405673155463545, 'loss': -34.41131811917869, 'time_step': 0.017741769329270816, 'observation_error': 1.1365656300700744, 'reward_error': 0.010621586451023225, 'variance': 0.6648616752762887}[0m [36mstep[0m=[35m9624[0m
[2m2023-10-22 19:08:04[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022190327/model_9624.pt[0m


Epoch 3/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 19:10:24[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022190327: epoch=3 step=14436[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00025582288764262336, 'time_algorithm_update': 0.017451642804213196, 'loss': -60.73875956731544, 'time_step': 0.01778776637732933, 'observation_error': 0.9950461414000269, 'reward_error': 0.006494370335419913, 'variance': 0.5695077713723158}[0m [36mstep[0m=[35m14436[0m
[2m2023-10-22 19:10:24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022190327/model_14436.pt[0m


Epoch 4/100:   0%|          | 0/4812 [00:00<?, ?it/s]

[2m2023-10-22 19:12:42[0m [[32m[1minfo     [0m] [1mexp_5_dynamics_reacher_symmetry_20231022190327: epoch=4 step=19248[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0002536933221920074, 'time_algorithm_update': 0.0174128503375319, 'loss': -83.1134996138705, 'time_step': 0.017742859108291462, 'observation_error': 0.8791388435914068, 'reward_error': 0.00363882280111391, 'variance': 0.48822060813064266}[0m [36mstep[0m=[35m19248[0m
[2m2023-10-22 19:12:42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022190327/model_19248.pt[0m


Epoch 5/100:   0%|          | 0/4812 [00:00<?, ?it/s]

KeyboardInterrupt: 

## Load Dynamics

In [None]:
# load trained dynamics model
dynamics_model_path = "d3rlpy_logs/ProbabilisticEnsembleDynamics_20231002230632"
dynamics = d3rlpy.dynamics.ProbabilisticEnsembleDynamics.from_json(dynamics_model_path + '/params.json')
dynamics.load_model(dynamics_model_path + '/model_31542.pt')

## Train Offline RL Algorithm

In [None]:
encoder = d3rlpy.models.encoders.DefaultEncoderFactory(dropout_rate=0.2)
# give COMBO as the generator argument.
combo = COMBO(dynamics=dynamics, critic_encoder_factory=encoder, actor_encoder_factory=encoder,
              use_gpu=use_gpu)

In [None]:
combo.fit(dataset = train_episodes, eval_episodes=test_episodes, n_steps=100000, n_steps_per_epoch=1000, tensorboard_dir="tensorboard_logs",
         scorers={
            'environment': d3rlpy.metrics.scorer.evaluate_on_environment(eval_env)
        })

In [10]:
def experiment_COMBO_training(dataset, eval_env, experiment_name, save_name, models_dir, symmetry_project, projection_size, seed=1, use_gpu=True):
    model_paths = [filename for filename in os.listdir(models_dir) if filename.startswith(experiment_name+'_dynamics')]
    model_paths = [models_dir + model_paths_i for model_paths_i in model_paths]
    model_paths.sort()
    print(model_paths)

    symmetry_reduced_paths = []
    default_paths = []
    for model_path_i in model_paths:
        f = open(model_path_i +'/params.json')
        model_path_i_params = json.load(f)
        if(model_path_i_params["state_encoder_factory"]['type']=='symmetry'):
            symmetry_reduced_paths.append(model_path_i)
        elif(model_path_i_params["state_encoder_factory"]['type']=='default'):
            default_paths.append(model_path_i)
    print("Default_paths:", default_paths, "Symmetry reduced paths: ", symmetry_reduced_paths)

    # load trained dynamics model
    for i in range(len(default_paths)):
        for type, dynamics_model_path in zip(['symmetry', 'default'],[symmetry_reduced_paths[i], default_paths[i]]):
            # use the same seeds for default and symmetric runs
            train_episodes, test_episodes = train_test_split(dataset, random_state=seed+i)
            if type == 'symmetry':
                state_encoder_factory = encoders.SymmetryEncoderFactory(project=symmetry_project, projection_size=projection_size)
                dynamics = d3rlpy.dynamics.ProbabilisticEnsembleDynamics(learning_rate=1e-4, use_gpu=use_gpu, state_encoder_factory=state_encoder_factory)
                dynamics.build_with_dataset(dataset)
            else:
                dynamics = d3rlpy.dynamics.ProbabilisticEnsembleDynamics.from_json(dynamics_model_path + '/params.json')

            filenames = os.listdir(dynamics_model_path)
            latest_model_path = dynamics_model_path + '/model_' +  str(max([int(filename.strip('model_.pt')) for filename in filenames if filename.endswith(".pt")])) + '.pt'
            dynamics.load_model(latest_model_path)
            print("Loaded model: ", latest_model_path)
            
            encoder = d3rlpy.models.encoders.DefaultEncoderFactory(dropout_rate=0.2)
            # give COMBO as the generator argument.
            combo = COMBO(dynamics=dynamics, critic_encoder_factory=encoder, actor_encoder_factory=encoder, use_gpu=use_gpu)
            combo.fit(dataset = train_episodes, eval_episodes=test_episodes, n_steps=1000000, n_steps_per_epoch=1000,
                      tensorboard_dir="tensorboard_logs",
                     scorers={
                        'environment': d3rlpy.metrics.scorer.evaluate_on_environment(eval_env)
                    },
                     experiment_name=save_name + "_" + type,
                     save_interval=50)


In [None]:
experiment_COMBO_training(dataset, eval_env1, 'exp_5', save_name='exp_5_COMBO_reacher', models_dir='d3rlpy_logs/', symmetry_project=reacher_project, projection_size=3, seed=1, use_gpu=True)

['d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548', 'd3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412', 'd3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920', 'd3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231021133449', 'd3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204', 'd3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713']
Default_paths: ['d3rlpy_logs/exp_5_dynamics_reacher_default_20231022000548', 'd3rlpy_logs/exp_5_dynamics_reacher_default_20231022074412', 'd3rlpy_logs/exp_5_dynamics_reacher_default_20231022151920'] Symmetry reduced paths:  ['d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231021133449', 'd3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022035204', 'd3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231022112713']
Using SymmetryEncoderFactory
Loaded model:  d3rlpy_logs/exp_5_dynamics_reacher_symmetry_20231021133449/model_481100.pt
[2m2023-10-22 22:05:37[0m [[32m[1mdebug    [0m] [1mRandomIterator is selected.[0m
[2m2023-10

Epoch 1/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:05:40[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:06:06[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=1 step=1000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0010448517799377442, 'time_algorithm_update': 0.02410107946395874, 'critic_loss': 6.164930180072784, 'actor_loss': 0.7840588709590957, 'temp_loss': 1.9782691452503205, 'temp': 0.9607453961968422, 'time_step': 0.025244216203689576, 'environment': -11.378090222772746}[0m [36mstep[0m=[35m1000[0m


Epoch 2/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:06:08[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m500000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:06:32[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=2 step=2000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000974318265914917, 'time_algorithm_update': 0.022514952182769776, 'critic_loss': 6.441056068897248, 'actor_loss': -0.3853411850333214, 'temp_loss': 2.197696668386459, 'temp': 0.8796660744547844, 'time_step': 0.023581775665283203, 'environment': -9.45402920163083}[0m [36mstep[0m=[35m2000[0m


Epoch 3/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:06:34[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m750000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:06:56[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=3 step=3000[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008982834815979004, 'time_algorithm_update': 0.020760424137115478, 'critic_loss': 5.72396941280365, 'actor_loss': -1.447117608487606, 'temp_loss': 1.515995758652687, 'temp': 0.8048089358210564, 'time_step': 0.02173899483680725, 'environment': -13.306788215474432}[0m [36mstep[0m=[35m3000[0m


Epoch 4/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:06:58[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1000000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:07:22[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=4 step=4000[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009476220607757568, 'time_algorithm_update': 0.02188751196861267, 'critic_loss': 5.960247170925141, 'actor_loss': -2.824837364792824, 'temp_loss': 1.2216288534998894, 'temp': 0.743604374051094, 'time_step': 0.022915480375289916, 'environment': -11.59308494716095}[0m [36mstep[0m=[35m4000[0m


Epoch 5/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:07:24[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:07:46[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=5 step=5000[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000904782772064209, 'time_algorithm_update': 0.020933420181274413, 'critic_loss': 6.126681846618652, 'actor_loss': -3.9663084988594055, 'temp_loss': 1.0000704315900804, 'temp': 0.6873183034658432, 'time_step': 0.02191606855392456, 'environment': -12.868504867224436}[0m [36mstep[0m=[35m5000[0m


Epoch 6/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:07:48[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:08:11[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=6 step=6000[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009081273078918457, 'time_algorithm_update': 0.020884114742279053, 'critic_loss': 6.2228364725112915, 'actor_loss': -4.870485695362091, 'temp_loss': 0.7845671022534371, 'temp': 0.6365881853699684, 'time_step': 0.021872065782546996, 'environment': -9.595464266434737}[0m [36mstep[0m=[35m6000[0m


Epoch 7/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:08:13[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:08:35[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=7 step=7000[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009114305973052979, 'time_algorithm_update': 0.0209736967086792, 'critic_loss': 6.109674341201782, 'actor_loss': -5.415237628936768, 'temp_loss': 0.6603889551758766, 'temp': 0.5892914682626724, 'time_step': 0.02196425700187683, 'environment': -7.05141766163092}[0m [36mstep[0m=[35m7000[0m


Epoch 8/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:08:37[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:09:01[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=8 step=8000[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009443862438201905, 'time_algorithm_update': 0.02180388832092285, 'critic_loss': 5.888156137943268, 'actor_loss': -5.798446466445923, 'temp_loss': 0.5079215131103992, 'temp': 0.5462790789008141, 'time_step': 0.022827143907546997, 'environment': -7.15925957776022}[0m [36mstep[0m=[35m8000[0m


Epoch 9/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:09:02[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:09:25[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=9 step=9000[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008962349891662597, 'time_algorithm_update': 0.020811432123184204, 'critic_loss': 5.684026957988739, 'actor_loss': -6.000930200576782, 'temp_loss': 0.3951742529571056, 'temp': 0.5075755498707294, 'time_step': 0.021783833503723146, 'environment': -7.19920723925401}[0m [36mstep[0m=[35m9000[0m


Epoch 10/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:09:27[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:09:49[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=10 step=10000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009032092094421386, 'time_algorithm_update': 0.02087155795097351, 'critic_loss': 5.52108828163147, 'actor_loss': -6.123822211265564, 'temp_loss': 0.3050425755679607, 'temp': 0.4723744069635868, 'time_step': 0.021849528312683105, 'environment': -4.295841438791577}[0m [36mstep[0m=[35m10000[0m


Epoch 11/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:09:51[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:10:15[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=11 step=11000[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009494717121124268, 'time_algorithm_update': 0.021759862422943116, 'critic_loss': 5.335129775047302, 'actor_loss': -6.166763888835907, 'temp_loss': 0.2273626386374235, 'temp': 0.4405156519114971, 'time_step': 0.022784265279769896, 'environment': -7.636023747493934}[0m [36mstep[0m=[35m11000[0m


Epoch 12/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:10:17[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:10:39[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=12 step=12000[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009227967262268066, 'time_algorithm_update': 0.02100396728515625, 'critic_loss': 5.151256232738495, 'actor_loss': -6.1243832621574406, 'temp_loss': 0.16370662254840135, 'temp': 0.4121418144106865, 'time_step': 0.02200215768814087, 'environment': -5.003131125908841}[0m [36mstep[0m=[35m12000[0m


Epoch 13/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:10:41[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:11:04[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=13 step=13000[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008987038135528564, 'time_algorithm_update': 0.020776196718215942, 'critic_loss': 4.955436664104462, 'actor_loss': -6.015678730487823, 'temp_loss': 0.11501106555759907, 'temp': 0.3869228814840317, 'time_step': 0.02175032901763916, 'environment': -6.617932011775144}[0m [36mstep[0m=[35m13000[0m


Epoch 14/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:11:06[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:11:28[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=14 step=14000[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009189965724945068, 'time_algorithm_update': 0.021000933170318602, 'critic_loss': 4.772475698947907, 'actor_loss': -5.881640050888062, 'temp_loss': 0.07321414167992771, 'temp': 0.3654467552304268, 'time_step': 0.022000169277191162, 'environment': -6.415529603771584}[0m [36mstep[0m=[35m14000[0m


Epoch 15/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:11:30[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:11:52[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=15 step=15000[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000894312858581543, 'time_algorithm_update': 0.02056062388420105, 'critic_loss': 4.575043620586396, 'actor_loss': -5.705599377632141, 'temp_loss': 0.042463930384023116, 'temp': 0.34796223014593125, 'time_step': 0.02153500437736511, 'environment': -6.745404267711916}[0m [36mstep[0m=[35m15000[0m


Epoch 16/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:11:55[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:12:17[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=16 step=16000[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008992013931274414, 'time_algorithm_update': 0.020741590738296508, 'critic_loss': 4.406714433431626, 'actor_loss': -5.493315636157989, 'temp_loss': 0.022574137995019554, 'temp': 0.33469546020030977, 'time_step': 0.021722209453582764, 'environment': -6.394032494618195}[0m [36mstep[0m=[35m16000[0m


Epoch 17/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:12:19[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:12:41[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=17 step=17000[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009200398921966553, 'time_algorithm_update': 0.020958128929138183, 'critic_loss': 4.293131200313568, 'actor_loss': -5.2977013230323795, 'temp_loss': 0.006463657052023336, 'temp': 0.32683619514107703, 'time_step': 0.02196212100982666, 'environment': -5.794153003939854}[0m [36mstep[0m=[35m17000[0m


Epoch 18/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:12:44[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:13:06[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=18 step=18000[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00090305757522583, 'time_algorithm_update': 0.020721632003784178, 'critic_loss': 4.200144693613052, 'actor_loss': -5.121315078735352, 'temp_loss': 0.00045078251615632324, 'temp': 0.32462031677365305, 'time_step': 0.021708473443984987, 'environment': -5.964828215231699}[0m [36mstep[0m=[35m18000[0m


Epoch 19/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:13:08[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:13:30[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=19 step=19000[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000907724380493164, 'time_algorithm_update': 0.02088681125640869, 'critic_loss': 4.1117848708629605, 'actor_loss': -4.9590821671485905, 'temp_loss': -0.004131247925804928, 'temp': 0.32727784922719, 'time_step': 0.02187776303291321, 'environment': -4.850984643042811}[0m [36mstep[0m=[35m19000[0m


Epoch 20/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:13:33[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:13:55[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=20 step=20000[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009006829261779785, 'time_algorithm_update': 0.020661261558532713, 'critic_loss': 4.0817008526325225, 'actor_loss': -4.824825167655945, 'temp_loss': -0.003400350028881803, 'temp': 0.3307215103805065, 'time_step': 0.021645982027053833, 'environment': -5.576807274811808}[0m [36mstep[0m=[35m20000[0m


Epoch 21/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:13:56[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:14:19[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=21 step=21000[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009039988517761231, 'time_algorithm_update': 0.020827373027801514, 'critic_loss': 4.028676295518875, 'actor_loss': -4.715131332874298, 'temp_loss': -0.005151480537373573, 'temp': 0.3359869908094406, 'time_step': 0.021815558195114135, 'environment': -5.277766827854359}[0m [36mstep[0m=[35m21000[0m


Epoch 22/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:14:21[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:14:44[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=22 step=22000[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009174668788909913, 'time_algorithm_update': 0.021023951530456542, 'critic_loss': 3.992425004482269, 'actor_loss': -4.628542704582214, 'temp_loss': -0.00511302535654977, 'temp': 0.34260856279730795, 'time_step': 0.022026490211486817, 'environment': -5.352823527578539}[0m [36mstep[0m=[35m22000[0m


Epoch 23/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:14:46[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:15:08[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=23 step=23000[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009004678726196289, 'time_algorithm_update': 0.02072205972671509, 'critic_loss': 3.98927379155159, 'actor_loss': -4.577315338134766, 'temp_loss': -0.00036579523771069946, 'temp': 0.34611220291256906, 'time_step': 0.02170657515525818, 'environment': -4.316391012278996}[0m [36mstep[0m=[35m23000[0m


Epoch 24/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:15:10[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:15:33[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=24 step=24000[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000902216911315918, 'time_algorithm_update': 0.02078666067123413, 'critic_loss': 3.9590460567474364, 'actor_loss': -4.546675184249878, 'temp_loss': -0.0023066474145743998, 'temp': 0.3470818750858307, 'time_step': 0.02177396059036255, 'environment': -5.2962203092981035}[0m [36mstep[0m=[35m24000[0m


Epoch 25/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:15:34[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:15:57[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=25 step=25000[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009191133975982666, 'time_algorithm_update': 0.020988659381866455, 'critic_loss': 3.957989158630371, 'actor_loss': -4.538364521026612, 'temp_loss': 0.0018957564358133823, 'temp': 0.3475942094922066, 'time_step': 0.021991968631744383, 'environment': -3.871074392305431}[0m [36mstep[0m=[35m25000[0m


Epoch 26/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:15:59[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:16:20[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=26 step=26000[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008752837181091309, 'time_algorithm_update': 0.02010487723350525, 'critic_loss': 3.952091586828232, 'actor_loss': -4.542866096496582, 'temp_loss': -0.00033727538026869294, 'temp': 0.34674606573581696, 'time_step': 0.021062538385391235, 'environment': -5.344633267782315}[0m [36mstep[0m=[35m26000[0m


Epoch 27/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:16:23[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:16:45[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=27 step=27000[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008905954360961914, 'time_algorithm_update': 0.020504746198654174, 'critic_loss': 3.9501672434806823, 'actor_loss': -4.568326230525971, 'temp_loss': 0.00027795566129498186, 'temp': 0.3455994276702404, 'time_step': 0.021480494737625123, 'environment': -5.421564025668678}[0m [36mstep[0m=[35m27000[0m


Epoch 28/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:16:47[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:17:09[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=28 step=28000[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008940830230712891, 'time_algorithm_update': 0.020386516094207763, 'critic_loss': 3.9399600303173066, 'actor_loss': -4.588236658096314, 'temp_loss': -0.0013271586580667644, 'temp': 0.3482446448206902, 'time_step': 0.02136410117149353, 'environment': -4.9275378963854255}[0m [36mstep[0m=[35m28000[0m


Epoch 29/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:17:11[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:17:33[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=29 step=29000[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008795349597930908, 'time_algorithm_update': 0.020144407749176026, 'critic_loss': 3.940180385828018, 'actor_loss': -4.622540180206299, 'temp_loss': 0.0002407679872121662, 'temp': 0.34859052470326424, 'time_step': 0.021106424570083618, 'environment': -3.955017050619147}[0m [36mstep[0m=[35m29000[0m


Epoch 30/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:17:35[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:17:56[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=30 step=30000[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008793880939483642, 'time_algorithm_update': 0.020224169015884398, 'critic_loss': 3.938380835056305, 'actor_loss': -4.667987545013427, 'temp_loss': 0.000775716835167259, 'temp': 0.34756915146112444, 'time_step': 0.021187817811965944, 'environment': -4.56045903160725}[0m [36mstep[0m=[35m30000[0m


Epoch 31/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:17:59[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:18:21[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=31 step=31000[0m [36mepoch[0m=[35m31[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008965153694152832, 'time_algorithm_update': 0.02049800252914429, 'critic_loss': 3.9521711876392365, 'actor_loss': -4.730067782402038, 'temp_loss': 0.0024752538099419327, 'temp': 0.34462788262963295, 'time_step': 0.021478821516036986, 'environment': -5.7522421850546515}[0m [36mstep[0m=[35m31000[0m


Epoch 32/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:18:23[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:18:45[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=32 step=32000[0m [36mepoch[0m=[35m32[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009014027118682861, 'time_algorithm_update': 0.02066847348213196, 'critic_loss': 3.960065915822983, 'actor_loss': -4.767784334659576, 'temp_loss': 0.0002708406256278977, 'temp': 0.3436160718202591, 'time_step': 0.02165676474571228, 'environment': -5.186687237511277}[0m [36mstep[0m=[35m32000[0m


Epoch 33/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:18:47[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:19:09[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=33 step=33000[0m [36mepoch[0m=[35m33[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008770556449890137, 'time_algorithm_update': 0.02014479899406433, 'critic_loss': 3.9412173352241515, 'actor_loss': -4.808300775051117, 'temp_loss': -0.0009426629218505695, 'temp': 0.3434359992444515, 'time_step': 0.021107102870941162, 'environment': -4.667593394784987}[0m [36mstep[0m=[35m33000[0m


Epoch 34/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:19:11[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:19:33[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=34 step=34000[0m [36mepoch[0m=[35m34[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008791260719299317, 'time_algorithm_update': 0.02037480401992798, 'critic_loss': 3.939914969444275, 'actor_loss': -4.8382097754478455, 'temp_loss': -0.0020914772396208718, 'temp': 0.34614594393968584, 'time_step': 0.0213390691280365, 'environment': -4.521121806925662}[0m [36mstep[0m=[35m34000[0m


Epoch 35/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:19:35[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:19:58[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=35 step=35000[0m [36mepoch[0m=[35m35[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009136824607849122, 'time_algorithm_update': 0.0207739098072052, 'critic_loss': 3.9593414497375488, 'actor_loss': -4.8853756327629085, 'temp_loss': 0.0001837627865606919, 'temp': 0.34646569752693174, 'time_step': 0.02177485704421997, 'environment': -4.1264860399978724}[0m [36mstep[0m=[35m35000[0m


Epoch 36/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:19:59[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:20:22[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=36 step=36000[0m [36mepoch[0m=[35m36[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008987078666687012, 'time_algorithm_update': 0.020597198724746704, 'critic_loss': 3.972540936231613, 'actor_loss': -4.934910318374634, 'temp_loss': 0.0016715424715075641, 'temp': 0.3454805145561695, 'time_step': 0.021581019401550293, 'environment': -4.122656400614314}[0m [36mstep[0m=[35m36000[0m


Epoch 37/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:20:23[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:20:45[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=37 step=37000[0m [36mepoch[0m=[35m37[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008827948570251465, 'time_algorithm_update': 0.02025320863723755, 'critic_loss': 3.972068170547485, 'actor_loss': -4.96918962430954, 'temp_loss': -0.002695804743794724, 'temp': 0.34532259783148767, 'time_step': 0.021220927715301513, 'environment': -3.871829834964789}[0m [36mstep[0m=[35m37000[0m


Epoch 38/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:20:48[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:21:10[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=38 step=38000[0m [36mepoch[0m=[35m38[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008999438285827637, 'time_algorithm_update': 0.02061329174041748, 'critic_loss': 4.0026477169990535, 'actor_loss': -5.0224898543357845, 'temp_loss': 0.0015198945289012044, 'temp': 0.34723943707346916, 'time_step': 0.0215999071598053, 'environment': -4.4785006318709035}[0m [36mstep[0m=[35m38000[0m


Epoch 39/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:21:12[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:21:34[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=39 step=39000[0m [36mepoch[0m=[35m39[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009026134014129639, 'time_algorithm_update': 0.020727076530456544, 'critic_loss': 4.00891507768631, 'actor_loss': -5.076990302562714, 'temp_loss': 0.0018794643480796367, 'temp': 0.34475791320204735, 'time_step': 0.021716638565063475, 'environment': -4.601581251623875}[0m [36mstep[0m=[35m39000[0m


Epoch 40/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:21:37[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:21:58[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=40 step=40000[0m [36mepoch[0m=[35m40[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000877913236618042, 'time_algorithm_update': 0.020194199800491332, 'critic_loss': 4.027934609413147, 'actor_loss': -5.130310612678528, 'temp_loss': -0.0005334242938552052, 'temp': 0.3435857740342617, 'time_step': 0.02115630602836609, 'environment': -4.25998176862154}[0m [36mstep[0m=[35m40000[0m


Epoch 41/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:22:00[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:22:22[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=41 step=41000[0m [36mepoch[0m=[35m41[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008742976188659668, 'time_algorithm_update': 0.020220851898193358, 'critic_loss': 4.074100132703781, 'actor_loss': -5.170705250740051, 'temp_loss': 0.0020267714289948346, 'temp': 0.3426677470803261, 'time_step': 0.021178522109985352, 'environment': -5.214419676708522}[0m [36mstep[0m=[35m41000[0m


Epoch 42/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:22:24[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:22:47[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=42 step=42000[0m [36mepoch[0m=[35m42[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009043197631835938, 'time_algorithm_update': 0.02067099380493164, 'critic_loss': 4.121481203556061, 'actor_loss': -5.210391209125519, 'temp_loss': -0.00017557507439050824, 'temp': 0.3419606859982014, 'time_step': 0.02166239356994629, 'environment': -4.589518333425508}[0m [36mstep[0m=[35m42000[0m


Epoch 43/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:22:48[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:23:11[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=43 step=43000[0m [36mepoch[0m=[35m43[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008979179859161377, 'time_algorithm_update': 0.020729831218719484, 'critic_loss': 4.185952093601227, 'actor_loss': -5.22784423828125, 'temp_loss': -0.0016581414365209638, 'temp': 0.34353909397125243, 'time_step': 0.021713563442230223, 'environment': -4.835904168792605}[0m [36mstep[0m=[35m43000[0m


Epoch 44/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:23:13[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:23:35[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=44 step=44000[0m [36mepoch[0m=[35m44[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008846669197082519, 'time_algorithm_update': 0.020362029075622557, 'critic_loss': 4.258981281042099, 'actor_loss': -5.229370434761047, 'temp_loss': -3.718491387553513e-05, 'temp': 0.34434250834584235, 'time_step': 0.021332450151443482, 'environment': -4.228445647449081}[0m [36mstep[0m=[35m44000[0m


Epoch 45/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:23:37[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:23:58[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=45 step=45000[0m [36mepoch[0m=[35m45[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008700387477874756, 'time_algorithm_update': 0.020092132568359376, 'critic_loss': 4.354750743627548, 'actor_loss': -5.206800522804261, 'temp_loss': -0.002139158206759021, 'temp': 0.34708538272976874, 'time_step': 0.021045929193496703, 'environment': -3.8885017410929272}[0m [36mstep[0m=[35m45000[0m


Epoch 46/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:24:01[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:24:23[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=46 step=46000[0m [36mepoch[0m=[35m46[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009030308723449707, 'time_algorithm_update': 0.020667901039123537, 'critic_loss': 4.5181848247051235, 'actor_loss': -5.197545381069183, 'temp_loss': -0.003903904138947837, 'temp': 0.35045418280363083, 'time_step': 0.021658124446868897, 'environment': -4.2262497855394505}[0m [36mstep[0m=[35m46000[0m


Epoch 47/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:24:25[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:24:48[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=47 step=47000[0m [36mepoch[0m=[35m47[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009230492115020752, 'time_algorithm_update': 0.020890310525894164, 'critic_loss': 4.607561668872833, 'actor_loss': -5.193749307632446, 'temp_loss': 0.0006303554326295852, 'temp': 0.35234347176551817, 'time_step': 0.021900959014892578, 'environment': -4.445616020652132}[0m [36mstep[0m=[35m47000[0m


Epoch 48/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:24:50[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:25:12[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=48 step=48000[0m [36mepoch[0m=[35m48[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008827104568481446, 'time_algorithm_update': 0.020321789026260376, 'critic_loss': 4.774220684528351, 'actor_loss': -5.186479447841644, 'temp_loss': 0.0005790726265404373, 'temp': 0.35115070050954816, 'time_step': 0.021291393041610717, 'environment': -4.454332153427629}[0m [36mstep[0m=[35m48000[0m


Epoch 49/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:25:14[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:25:36[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=49 step=49000[0m [36mepoch[0m=[35m49[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008810019493103027, 'time_algorithm_update': 0.020337695837020876, 'critic_loss': 5.103588313102722, 'actor_loss': -5.169224733829498, 'temp_loss': 0.002390818126499653, 'temp': 0.3490563348829746, 'time_step': 0.02130436325073242, 'environment': -4.011326144310542}[0m [36mstep[0m=[35m49000[0m


Epoch 50/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:25:38[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:26:01[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=50 step=50000[0m [36mepoch[0m=[35m50[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008976120948791504, 'time_algorithm_update': 0.02068601703643799, 'critic_loss': 5.480059623003006, 'actor_loss': -5.1103853154182435, 'temp_loss': -0.0017068760868860409, 'temp': 0.34896364569664, 'time_step': 0.021671799182891846, 'environment': -4.392352669042279}[0m [36mstep[0m=[35m50000[0m
[2m2023-10-22 22:26:01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/exp_5_COMBO_reacher_symmetry_20231022220537/model_50000.pt[0m


Epoch 51/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:26:02[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:26:25[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=51 step=51000[0m [36mepoch[0m=[35m51[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009107680320739746, 'time_algorithm_update': 0.020787155628204344, 'critic_loss': 6.248575587511063, 'actor_loss': -5.025872481346131, 'temp_loss': -0.0019577318497467785, 'temp': 0.351324115216732, 'time_step': 0.02178528642654419, 'environment': -5.0304482554585706}[0m [36mstep[0m=[35m51000[0m


Epoch 52/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:26:27[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:26:50[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=52 step=52000[0m [36mepoch[0m=[35m52[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009089343547821045, 'time_algorithm_update': 0.02075000214576721, 'critic_loss': 6.968903758525848, 'actor_loss': -4.947689302206039, 'temp_loss': 0.0015131253431318328, 'temp': 0.3516368364393711, 'time_step': 0.021747808218002318, 'environment': -4.143472802198297}[0m [36mstep[0m=[35m52000[0m


Epoch 53/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:26:52[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:27:14[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=53 step=53000[0m [36mepoch[0m=[35m53[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008981180191040039, 'time_algorithm_update': 0.02073054623603821, 'critic_loss': 8.209818056821824, 'actor_loss': -4.80264640545845, 'temp_loss': -0.0002902813039254397, 'temp': 0.3506202033162117, 'time_step': 0.021716034650802612, 'environment': -4.321639178004276}[0m [36mstep[0m=[35m53000[0m


Epoch 54/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:27:16[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:27:37[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=54 step=54000[0m [36mepoch[0m=[35m54[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008765225410461426, 'time_algorithm_update': 0.02015346360206604, 'critic_loss': 10.220213712453843, 'actor_loss': -4.587304208397866, 'temp_loss': -0.002068852843251079, 'temp': 0.35264120319485665, 'time_step': 0.021115660429000854, 'environment': -4.740769720548724}[0m [36mstep[0m=[35m54000[0m


Epoch 55/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:27:40[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:28:02[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=55 step=55000[0m [36mepoch[0m=[35m55[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009026002883911133, 'time_algorithm_update': 0.020756117582321168, 'critic_loss': 11.609241840124131, 'actor_loss': -4.405093714356423, 'temp_loss': -0.0008328063741791993, 'temp': 0.3545222128629684, 'time_step': 0.02174642300605774, 'environment': -4.454199455043655}[0m [36mstep[0m=[35m55000[0m


Epoch 56/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:28:04[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:28:26[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=56 step=56000[0m [36mepoch[0m=[35m56[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008942406177520752, 'time_algorithm_update': 0.02065766429901123, 'critic_loss': 13.810775103092194, 'actor_loss': -4.155578772544861, 'temp_loss': -0.002917256031301804, 'temp': 0.35748459461331367, 'time_step': 0.02163908648490906, 'environment': -4.749349731641984}[0m [36mstep[0m=[35m56000[0m


Epoch 57/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:28:29[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:28:51[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=57 step=57000[0m [36mepoch[0m=[35m57[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009020857810974121, 'time_algorithm_update': 0.020748263359069825, 'critic_loss': 17.526950740337373, 'actor_loss': -3.845759323000908, 'temp_loss': -0.002424378439434804, 'temp': 0.36120541232824327, 'time_step': 0.021739036083221436, 'environment': -5.349019790747791}[0m [36mstep[0m=[35m57000[0m


Epoch 58/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:28:53[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:29:15[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=58 step=58000[0m [36mepoch[0m=[35m58[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008753561973571778, 'time_algorithm_update': 0.02037979292869568, 'critic_loss': 20.603243010878565, 'actor_loss': -3.565627954185009, 'temp_loss': 0.0009194931434467435, 'temp': 0.3625544378757477, 'time_step': 0.021342135906219483, 'environment': -4.873500617542076}[0m [36mstep[0m=[35m58000[0m


Epoch 59/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:29:18[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:29:40[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=59 step=59000[0m [36mepoch[0m=[35m59[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000891197681427002, 'time_algorithm_update': 0.020499093055725098, 'critic_loss': 26.48458503961563, 'actor_loss': -3.191422775283456, 'temp_loss': 0.0009481272688135505, 'temp': 0.36064117747545243, 'time_step': 0.021478057861328125, 'environment': -3.4293582652452286}[0m [36mstep[0m=[35m59000[0m


Epoch 60/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:29:42[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:30:04[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=60 step=60000[0m [36mepoch[0m=[35m60[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009081287384033203, 'time_algorithm_update': 0.02076735019683838, 'critic_loss': 32.05052517461777, 'actor_loss': -2.8658575016260146, 'temp_loss': -0.00038667202356737105, 'temp': 0.3611105874478817, 'time_step': 0.021763720989227294, 'environment': -4.71631028226492}[0m [36mstep[0m=[35m60000[0m


Epoch 61/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:30:06[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:30:29[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=61 step=61000[0m [36mepoch[0m=[35m61[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009000382423400879, 'time_algorithm_update': 0.02070082116127014, 'critic_loss': 38.99287717485428, 'actor_loss': -2.2630420074611903, 'temp_loss': -0.0030463278421666473, 'temp': 0.36254339388012885, 'time_step': 0.02169028115272522, 'environment': -4.754811018205678}[0m [36mstep[0m=[35m61000[0m


Epoch 62/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:30:30[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:30:52[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=62 step=62000[0m [36mepoch[0m=[35m62[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008814537525177002, 'time_algorithm_update': 0.020351232290267945, 'critic_loss': 51.22450922870636, 'actor_loss': -1.5437151124477386, 'temp_loss': -0.004182047667447478, 'temp': 0.3684564324915409, 'time_step': 0.02131990957260132, 'environment': -3.3032145439052862}[0m [36mstep[0m=[35m62000[0m


Epoch 63/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:30:55[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:31:17[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=63 step=63000[0m [36mepoch[0m=[35m63[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008985273838043213, 'time_algorithm_update': 0.020631139755249023, 'critic_loss': 67.12923266077041, 'actor_loss': -0.5924435911923647, 'temp_loss': -0.004570194564177655, 'temp': 0.3745730184316635, 'time_step': 0.02161938190460205, 'environment': -4.972895193268575}[0m [36mstep[0m=[35m63000[0m


Epoch 64/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:31:19[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:31:41[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=64 step=64000[0m [36mepoch[0m=[35m64[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008927228450775147, 'time_algorithm_update': 0.02051926803588867, 'critic_loss': 89.31251761984825, 'actor_loss': 0.49263123323023317, 'temp_loss': -0.007379901873646304, 'temp': 0.38341978380084035, 'time_step': 0.02149962258338928, 'environment': -4.130460451493348}[0m [36mstep[0m=[35m64000[0m


Epoch 65/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:31:44[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:32:06[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=65 step=65000[0m [36mepoch[0m=[35m65[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008923842906951904, 'time_algorithm_update': 0.02050818920135498, 'critic_loss': 114.05446219825744, 'actor_loss': 1.7702468284219504, 'temp_loss': -0.002224600455025211, 'temp': 0.3873144348859787, 'time_step': 0.0214893102645874, 'environment': -5.507424128611705}[0m [36mstep[0m=[35m65000[0m


Epoch 66/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:32:08[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:32:29[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=66 step=66000[0m [36mepoch[0m=[35m66[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000881831169128418, 'time_algorithm_update': 0.020359386920928955, 'critic_loss': 151.38050160121918, 'actor_loss': 3.1383432812988756, 'temp_loss': -0.004569102683104575, 'temp': 0.39120602905750274, 'time_step': 0.021329442501068115, 'environment': -5.012106282619037}[0m [36mstep[0m=[35m66000[0m


Epoch 67/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:32:32[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:32:54[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=67 step=67000[0m [36mepoch[0m=[35m67[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008978652954101563, 'time_algorithm_update': 0.020606661319732666, 'critic_loss': 182.94032447242736, 'actor_loss': 4.911619972020388, 'temp_loss': -0.0028643232214963063, 'temp': 0.39732360452413557, 'time_step': 0.021594417333602907, 'environment': -5.402857854205543}[0m [36mstep[0m=[35m67000[0m


Epoch 68/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:32:56[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:33:18[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=68 step=68000[0m [36mepoch[0m=[35m68[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009006595611572265, 'time_algorithm_update': 0.020622665405273438, 'critic_loss': 251.49546082115174, 'actor_loss': 7.3806735749691725, 'temp_loss': -0.015596760941436515, 'temp': 0.4068146172761917, 'time_step': 0.021613022089004518, 'environment': -4.1980295438532815}[0m [36mstep[0m=[35m68000[0m


Epoch 69/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:33:20[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:33:42[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=69 step=69000[0m [36mepoch[0m=[35m69[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008686721324920654, 'time_algorithm_update': 0.020217557430267333, 'critic_loss': 317.3174387817383, 'actor_loss': 10.109889789164066, 'temp_loss': -0.010400833827909082, 'temp': 0.4236425482034683, 'time_step': 0.02117222237586975, 'environment': -3.5660366804745145}[0m [36mstep[0m=[35m69000[0m


Epoch 70/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:33:45[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:34:07[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=70 step=70000[0m [36mepoch[0m=[35m70[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009142625331878662, 'time_algorithm_update': 0.02085638093948364, 'critic_loss': 426.55644091796876, 'actor_loss': 13.775197802066803, 'temp_loss': -0.014261729398043826, 'temp': 0.43732592862844466, 'time_step': 0.021860518455505372, 'environment': -5.208090030029976}[0m [36mstep[0m=[35m70000[0m


Epoch 71/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:34:09[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:34:31[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=71 step=71000[0m [36mepoch[0m=[35m71[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009130616188049316, 'time_algorithm_update': 0.020803969383239748, 'critic_loss': 566.812091331482, 'actor_loss': 16.96492835712433, 'temp_loss': -0.012447107347426937, 'temp': 0.4538199931681156, 'time_step': 0.021806803703308106, 'environment': -4.585976552358167}[0m [36mstep[0m=[35m71000[0m


Epoch 72/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:34:34[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:34:56[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=72 step=72000[0m [36mepoch[0m=[35m72[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009063715934753418, 'time_algorithm_update': 0.02078813934326172, 'critic_loss': 687.1179365539551, 'actor_loss': 21.095598650455475, 'temp_loss': -0.02582155381166376, 'temp': 0.4739368204772472, 'time_step': 0.02178412437438965, 'environment': -4.484012684347118}[0m [36mstep[0m=[35m72000[0m


Epoch 73/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:34:58[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:35:20[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=73 step=73000[0m [36mepoch[0m=[35m73[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009031219482421875, 'time_algorithm_update': 0.020802536725997925, 'critic_loss': 901.6505922393799, 'actor_loss': 26.10922987270355, 'temp_loss': -0.021280208116862924, 'temp': 0.4972797001302242, 'time_step': 0.0217946093082428, 'environment': -5.660802157703865}[0m [36mstep[0m=[35m73000[0m


Epoch 74/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:35:23[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:35:45[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=74 step=74000[0m [36mepoch[0m=[35m74[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008972086906433106, 'time_algorithm_update': 0.020674437522888185, 'critic_loss': 1196.1909838333129, 'actor_loss': 31.652154277801515, 'temp_loss': -0.007429020049981773, 'temp': 0.5117785949707031, 'time_step': 0.021662473440170287, 'environment': -6.228432587270685}[0m [36mstep[0m=[35m74000[0m


Epoch 75/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:35:47[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:36:09[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=75 step=75000[0m [36mepoch[0m=[35m75[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008815133571624756, 'time_algorithm_update': 0.020407407283782958, 'critic_loss': 1494.15720438385, 'actor_loss': 38.337564125061036, 'temp_loss': -0.030269161668606104, 'temp': 0.5283205699324608, 'time_step': 0.02137699770927429, 'environment': -6.257484074374679}[0m [36mstep[0m=[35m75000[0m


Epoch 76/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:36:12[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:36:34[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=76 step=76000[0m [36mepoch[0m=[35m76[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009090275764465332, 'time_algorithm_update': 0.020883450031280518, 'critic_loss': 2018.1203741760253, 'actor_loss': 47.99486967086792, 'temp_loss': -0.03351065945718437, 'temp': 0.5599198216795921, 'time_step': 0.02188319993019104, 'environment': -5.96621807531764}[0m [36mstep[0m=[35m76000[0m


Epoch 77/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:36:36[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:36:58[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=77 step=77000[0m [36mepoch[0m=[35m77[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000903977632522583, 'time_algorithm_update': 0.020736515522003174, 'critic_loss': 2651.6152488098146, 'actor_loss': 58.940160036087036, 'temp_loss': -0.054663261130917816, 'temp': 0.5968633784651757, 'time_step': 0.02172958207130432, 'environment': -8.261467355440319}[0m [36mstep[0m=[35m77000[0m


Epoch 78/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:37:01[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:37:23[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=78 step=78000[0m [36mepoch[0m=[35m78[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000898735761642456, 'time_algorithm_update': 0.020712093830108642, 'critic_loss': 3099.582154663086, 'actor_loss': 69.27715954971313, 'temp_loss': -0.05310316171590239, 'temp': 0.6393436191082, 'time_step': 0.021700743913650514, 'environment': -4.8451759746045}[0m [36mstep[0m=[35m78000[0m


Epoch 79/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:37:25[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:37:47[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=79 step=79000[0m [36mepoch[0m=[35m79[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008846628665924072, 'time_algorithm_update': 0.020484532117843627, 'critic_loss': 3993.481199462891, 'actor_loss': 82.65861131286621, 'temp_loss': -0.04769535589171574, 'temp': 0.6804163671135902, 'time_step': 0.02145791006088257, 'environment': -6.766082540786556}[0m [36mstep[0m=[35m79000[0m


Epoch 80/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:37:50[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:38:12[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=80 step=80000[0m [36mepoch[0m=[35m80[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008908448219299317, 'time_algorithm_update': 0.02044730544090271, 'critic_loss': 5058.313953857421, 'actor_loss': 98.88461190795898, 'temp_loss': -0.05309750034287572, 'temp': 0.7178543443083764, 'time_step': 0.021427679538726808, 'environment': -9.714976119785073}[0m [36mstep[0m=[35m80000[0m


Epoch 81/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:38:14[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:38:36[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=81 step=81000[0m [36mepoch[0m=[35m81[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000906691312789917, 'time_algorithm_update': 0.020841907024383544, 'critic_loss': 6344.418805419922, 'actor_loss': 119.3879316444397, 'temp_loss': -0.06710530547983945, 'temp': 0.7708420199155808, 'time_step': 0.021838690042495727, 'environment': -5.125367693350352}[0m [36mstep[0m=[35m81000[0m


Epoch 82/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:38:38[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:39:00[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=82 step=82000[0m [36mepoch[0m=[35m82[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008905212879180909, 'time_algorithm_update': 0.020568399667739868, 'critic_loss': 8232.55125378418, 'actor_loss': 143.44789876556396, 'temp_loss': -0.07037628032173962, 'temp': 0.8183617252707481, 'time_step': 0.021547902345657347, 'environment': -6.333635357097525}[0m [36mstep[0m=[35m82000[0m


Epoch 83/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:39:03[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:39:25[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=83 step=83000[0m [36mepoch[0m=[35m83[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000899479866027832, 'time_algorithm_update': 0.02056581926345825, 'critic_loss': 10458.596505615234, 'actor_loss': 166.8606595840454, 'temp_loss': -0.11062699143821374, 'temp': 0.8853723227977752, 'time_step': 0.021548881530761718, 'environment': -9.39598243890359}[0m [36mstep[0m=[35m83000[0m


Epoch 84/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:39:27[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:39:49[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=84 step=84000[0m [36mepoch[0m=[35m84[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008837690353393554, 'time_algorithm_update': 0.02031317448616028, 'critic_loss': 12860.982458984376, 'actor_loss': 196.4624436264038, 'temp_loss': -0.10212756040040404, 'temp': 0.9588519257307052, 'time_step': 0.021277916431427003, 'environment': -6.769811168903348}[0m [36mstep[0m=[35m84000[0m


Epoch 85/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:39:51[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:40:14[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=85 step=85000[0m [36mepoch[0m=[35m85[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008978264331817627, 'time_algorithm_update': 0.020678174257278442, 'critic_loss': 15831.014339355468, 'actor_loss': 235.50408500671386, 'temp_loss': -0.11413158832397312, 'temp': 1.0300606426596641, 'time_step': 0.021656872272491454, 'environment': -7.191996393449218}[0m [36mstep[0m=[35m85000[0m


Epoch 86/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:40:15[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:40:38[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=86 step=86000[0m [36mepoch[0m=[35m86[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009167888164520264, 'time_algorithm_update': 0.02090105724334717, 'critic_loss': 20280.71672558594, 'actor_loss': 281.23427865600587, 'temp_loss': -0.16439784571528435, 'temp': 1.1208024419546128, 'time_step': 0.021901169061660765, 'environment': -9.58064505272704}[0m [36mstep[0m=[35m86000[0m


Epoch 87/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:40:40[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:41:03[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=87 step=87000[0m [36mepoch[0m=[35m87[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008990068435668946, 'time_algorithm_update': 0.020553667545318604, 'critic_loss': 25467.847107910155, 'actor_loss': 334.97730297851564, 'temp_loss': -0.18819915134832263, 'temp': 1.2290089383125304, 'time_step': 0.02153640079498291, 'environment': -9.530216877273846}[0m [36mstep[0m=[35m87000[0m


Epoch 88/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:41:04[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:41:26[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=88 step=88000[0m [36mepoch[0m=[35m88[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008726906776428222, 'time_algorithm_update': 0.020132940769195556, 'critic_loss': 35137.556078125, 'actor_loss': 399.0478441772461, 'temp_loss': -0.22669193504005672, 'temp': 1.3439051817655563, 'time_step': 0.02108669686317444, 'environment': -6.609574132796354}[0m [36mstep[0m=[35m88000[0m


Epoch 89/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:41:29[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:41:51[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=89 step=89000[0m [36mepoch[0m=[35m89[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009039113521575928, 'time_algorithm_update': 0.020721963167190553, 'critic_loss': 41614.81034765625, 'actor_loss': 466.8784781188965, 'temp_loss': -0.1860424500182271, 'temp': 1.461549055814743, 'time_step': 0.02170966863632202, 'environment': -7.548472401374428}[0m [36mstep[0m=[35m89000[0m


Epoch 90/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:41:53[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:42:15[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=90 step=90000[0m [36mepoch[0m=[35m90[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009060800075531006, 'time_algorithm_update': 0.02056327176094055, 'critic_loss': 57572.22547265625, 'actor_loss': 559.198274017334, 'temp_loss': -0.2630052318163216, 'temp': 1.5906973991394042, 'time_step': 0.021552715301513672, 'environment': -8.17423307022677}[0m [36mstep[0m=[35m90000[0m


Epoch 91/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:42:17[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:42:39[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=91 step=91000[0m [36mepoch[0m=[35m91[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009236624240875244, 'time_algorithm_update': 0.020681511878967284, 'critic_loss': 67080.41461914062, 'actor_loss': 650.599731414795, 'temp_loss': -0.28930823516100646, 'temp': 1.7450585907697678, 'time_step': 0.02170849275588989, 'environment': -11.971975399799488}[0m [36mstep[0m=[35m91000[0m


Epoch 92/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:42:42[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:43:04[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=92 step=92000[0m [36mepoch[0m=[35m92[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009105665683746338, 'time_algorithm_update': 0.020649622440338133, 'critic_loss': 85420.92030664062, 'actor_loss': 761.7728034667969, 'temp_loss': -0.382599518597126, 'temp': 1.9146271911859511, 'time_step': 0.021644398689270018, 'environment': -11.890937869568598}[0m [36mstep[0m=[35m92000[0m


Epoch 93/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:43:06[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:43:28[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=93 step=93000[0m [36mepoch[0m=[35m93[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009093513488769531, 'time_algorithm_update': 0.02068190026283264, 'critic_loss': 108499.2085390625, 'actor_loss': 891.251578125, 'temp_loss': -0.3915121020972729, 'temp': 2.1042075974941254, 'time_step': 0.021675215721130372, 'environment': -7.711825955181611}[0m [36mstep[0m=[35m93000[0m


Epoch 94/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:43:31[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:43:53[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=94 step=94000[0m [36mepoch[0m=[35m94[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009048917293548584, 'time_algorithm_update': 0.020605491876602173, 'critic_loss': 137500.0516015625, 'actor_loss': 1042.5408963623047, 'temp_loss': -0.4989516198504716, 'temp': 2.3195443341732025, 'time_step': 0.021593619585037232, 'environment': -11.824266185280422}[0m [36mstep[0m=[35m94000[0m


Epoch 95/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:43:55[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:44:17[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=95 step=95000[0m [36mepoch[0m=[35m95[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009264252185821533, 'time_algorithm_update': 0.02084336996078491, 'critic_loss': 178840.828578125, 'actor_loss': 1224.5034395141602, 'temp_loss': -0.7322644879370928, 'temp': 2.5931632697582243, 'time_step': 0.021857815265655518, 'environment': -8.94280079167909}[0m [36mstep[0m=[35m95000[0m


Epoch 96/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:44:20[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m
[2m2023-10-22 22:44:42[0m [[32m[1minfo     [0m] [1mexp_5_COMBO_reacher_symmetry_20231022220537: epoch=96 step=96000[0m [36mepoch[0m=[35m96[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009189109802246094, 'time_algorithm_update': 0.02077023148536682, 'critic_loss': 229219.4055234375, 'actor_loss': 1425.6629993286133, 'temp_loss': -0.7912385091520846, 'temp': 2.88725052189827, 'time_step': 0.02177622604370117, 'environment': -10.061407372358808}[0m [36mstep[0m=[35m96000[0m


Epoch 97/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

[2m2023-10-22 22:44:44[0m [[32m[1mdebug    [0m] [1m250000 transitions are generated.[0m [36mfake_transitions[0m=[35m1250000[0m [36mreal_transitions[0m=[35m481194[0m


## Load the Policy

In [6]:
#actor_encoder = d3rlpy.models.encoders.DefaultEncoderFactory(dropout_rate=0.2)
# setup algorithm
trained_policy = d3rlpy.algos.SAC()
trained_policy.build_with_env(env1)
trained_policy.load_model('d3rlpy_logs/exp_6_SAC_reacher_20231024124119/model_100000.pt')

# initialize with dataset
#trained_policy.build_with_dataset(dataset)
# Load entire model parameters.
#trained_policy.load_model('d3rlpy_logs/COMBO_20230929153035/model_53000.pt')

## See the policy running

In [None]:
scorer = d3rlpy.metrics.scorer.evaluate_on_environment(env1, render=True)
mean_episode_return = scorer(trained_policy)

Creating window glfw
