In [1]:
import minari
import d3rlpy
import numpy as np
import os

# Parameters of the experiments

In [2]:
# Total number of training updates
n_steps = 1_000 # 100_000

# Number of updates between two evaluations (epochs)
n_steps_per_epoch = 100 # 1000

# Number of episodes during the testing phase
N = 50

# Delay between one step of the episode and the next in simulation
delay = 0.01

# Loading and preparation of datasets and environments

In [3]:
# Loading Minari datasets for the tasks
pen_dataset = minari.load_dataset("D4RL/pen/expert-v2")
relocate_dataset = minari.load_dataset("D4RL/relocate/expert-v2")
hammer_dataset = minari.load_dataset("D4RL/hammer/expert-v2")
door_dataset = minari.load_dataset("D4RL/door/expert-v2")

pen_env = pen_dataset.recover_environment()
relocate_env = relocate_dataset.recover_environment()
hammer_env = hammer_dataset.recover_environment()
door_env = door_dataset.recover_environment()

In [4]:
def prepare_d3_dataset(minari_dataset):
    # Lists to collect observations, actions, rewards, and terminals from all episodes
    observations = []
    actions = []
    rewards = []
    terminals = []

    # Iterate over episodes in the Minari dataset
    for episode in minari_dataset.iterate_episodes():
        # Extract sequences of data, removing the last observation
        obs = episode.observations[:-1]
        actions_ep = episode.actions
        rewards_ep = episode.rewards
        dones = np.array(episode.terminations) | np.array(episode.truncations)

        observations.append(obs)
        actions.append(actions_ep)
        rewards.append(rewards_ep)
        terminals.append(dones)

    # Merge all episodes into single arrays
    observations = np.concatenate(observations)
    actions = np.concatenate(actions)
    rewards = np.concatenate(rewards)
    terminals = np.concatenate(terminals)

    # Build the final dataset in d3rlpy's MDPDataset format
    d3_dataset = d3rlpy.datasets.MDPDataset(
        observations=observations,
        actions=actions,
        rewards=rewards,
        terminals=terminals,
        action_space=d3rlpy.constants.ActionSpace.CONTINUOUS
    )

    return d3_dataset

In [5]:
# Conversion of Minari datasets into MDPDataset format for training
pen_d3_dataset = prepare_d3_dataset(pen_dataset)
relocate_d3_dataset = prepare_d3_dataset(relocate_dataset)
hammer_d3_dataset = prepare_d3_dataset(hammer_dataset)
door_d3_dataset = prepare_d3_dataset(door_dataset)

[2m2025-05-18 18:37.09[0m [[32m[1minfo     [0m] [1mSignatures have been automatically determined.[0m [36maction_signature[0m=[35mSignature(dtype=[dtype('float32')], shape=[(24,)])[0m [36mobservation_signature[0m=[35mSignature(dtype=[dtype('float64')], shape=[(45,)])[0m [36mreward_signature[0m=[35mSignature(dtype=[dtype('float64')], shape=[(1,)])[0m
[2m2025-05-18 18:37.09[0m [[32m[1minfo     [0m] [1mAction size has been automatically determined.[0m [36maction_size[0m=[35m24[0m
[2m2025-05-18 18:37.12[0m [[32m[1minfo     [0m] [1mSignatures have been automatically determined.[0m [36maction_signature[0m=[35mSignature(dtype=[dtype('float32')], shape=[(30,)])[0m [36mobservation_signature[0m=[35mSignature(dtype=[dtype('float64')], shape=[(39,)])[0m [36mreward_signature[0m=[35mSignature(dtype=[dtype('float64')], shape=[(1,)])[0m
[2m2025-05-18 18:37.12[0m [[32m[1minfo     [0m] [1mAction size has been automatically determined.[0m [36maction

# Training Algorithm

In [6]:
def train_offline_algorithm(config_class, dataset, env, filename, task):
    # Initialize the algorithm on CPU
    algo = config_class().create(device="cpu")

    # Build the neural networks based on the dataset
    algo.build_with_dataset(dataset)

    # Train the algorithm on the offline dataset and periodically evaluate online, saving the training history
    algo.fit(
        dataset=dataset,
        n_steps=n_steps,
        n_steps_per_epoch=n_steps_per_epoch,
        evaluators={"environment": d3rlpy.metrics.EnvironmentEvaluator(env)},
        logger_adapter=d3rlpy.logging.FileAdapterFactory(root_dir=f"training_logs/offline/{task}"),
    )

    algo.save(f'policies/offline/{filename}.d3')

# Creation of folders for policies and logs

In [8]:
# Create policies/offline
policies_path = os.path.join("policies", "offline")
if not os.path.exists(policies_path):
    os.makedirs(policies_path)
    print(f"Created: {policies_path}")
else:
    print(f"Already exists: {policies_path}")

# Create training_logs/offline/{task}
training_base = os.path.join("training_logs", "offline")
task_dirs = ["pen", "relocate", "hammer", "door"]

for task in task_dirs:
    task_path = os.path.join(training_base, task)
    if not os.path.exists(task_path):
        os.makedirs(task_path)
        print(f"Created: {task_path}")
    else:
        print(f"Already exists: {task_path}")

Created: policies/offline
Created: training_logs/offline/pen
Created: training_logs/offline/relocate
Created: training_logs/offline/hammer
Created: training_logs/offline/door


# Policy training

### Pen

In [9]:
train_offline_algorithm(d3rlpy.algos.IQLConfig, pen_d3_dataset, pen_env, 'pen_iql', 'pen')
train_offline_algorithm(d3rlpy.algos.CQLConfig, pen_d3_dataset, pen_env, 'pen_cql', 'pen')
train_offline_algorithm(d3rlpy.algos.BCConfig, pen_d3_dataset, pen_env, 'pen_bc', 'pen')
train_offline_algorithm(d3rlpy.algos.TD3PlusBCConfig, pen_d3_dataset, pen_env, 'pen_td3bc', 'pen')
train_offline_algorithm(d3rlpy.algos.AWACConfig, pen_d3_dataset, pen_env, 'pen_awac', 'pen')

[2m2025-05-18 18:37.55[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(45,)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(24,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_size=24)[0m
[2m2025-05-18 18:37.55[0m [[32m[1minfo     [0m] [1mDirectory is created at training_logs/offline/pen/IQL_20250518183755[0m
[2m2025-05-18 18:37.55[0m [[32m[1minfo     [0m] [1mParameters                    [0m [36mparams[0m=[35m{'observation_shape': [45], 'action_size': 24, 'config': {'type': 'iql', 'params': {'batch_size': 256, 'gamma': 0.99, 'observation_scaler': {'type': 'none', 'params': {}}, 'action_scaler': {'type': 'none', 'params': {}}, 'reward_scaler': {'type': 'none', 'params': {}}, 'compile_graph': False, 'actor_learning_rate': 0.0003, 'critic_learning_rate': 0.00

Epoch 1/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:37.56[0m [[32m[1minfo     [0m] [1mIQL_20250518183755: epoch=1 step=100[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0023915457725524904, 'time_algorithm_update': 0.006236391067504883, 'critic_loss': 2401.4978924560546, 'q_loss': 2401.4966394042967, 'v_loss': 0.0012520333076827229, 'actor_loss': 31.78553134918213, 'time_step': 0.008673124313354492, 'environment': 489.2528916382565}[0m [36mstep[0m=[35m100[0m
[2m2025-05-18 18:37.56[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/IQL_20250518183755/model_100.d3[0m


Epoch 2/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:37.57[0m [[32m[1minfo     [0m] [1mIQL_20250518183755: epoch=2 step=200[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0017271947860717774, 'time_algorithm_update': 0.007433810234069824, 'critic_loss': 693.5073046875, 'q_loss': 693.4424584960938, 'v_loss': 0.06484254992567003, 'actor_loss': 13.433249387741089, 'time_step': 0.009196622371673584, 'environment': 562.4906482424059}[0m [36mstep[0m=[35m200[0m
[2m2025-05-18 18:37.57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/IQL_20250518183755/model_200.d3[0m


Epoch 3/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:37.58[0m [[32m[1minfo     [0m] [1mIQL_20250518183755: epoch=3 step=300[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0017153215408325195, 'time_algorithm_update': 0.005501208305358887, 'critic_loss': 573.1619937133789, 'q_loss': 572.5764120483399, 'v_loss': 0.5855824917554855, 'actor_loss': 52.3593971824646, 'time_step': 0.007253105640411377, 'environment': 1200.616158888086}[0m [36mstep[0m=[35m300[0m
[2m2025-05-18 18:37.58[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/IQL_20250518183755/model_300.d3[0m


Epoch 4/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:37.59[0m [[32m[1minfo     [0m] [1mIQL_20250518183755: epoch=4 step=400[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0017052841186523436, 'time_algorithm_update': 0.005221478939056397, 'critic_loss': 543.6203857421875, 'q_loss': 541.5903411865235, 'v_loss': 2.030042631626129, 'actor_loss': 88.396455078125, 'time_step': 0.006962659358978271, 'environment': 544.9184390532171}[0m [36mstep[0m=[35m400[0m
[2m2025-05-18 18:37.59[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/IQL_20250518183755/model_400.d3[0m


Epoch 5/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.01[0m [[32m[1minfo     [0m] [1mIQL_20250518183755: epoch=5 step=500[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0017405509948730468, 'time_algorithm_update': 0.005456602573394776, 'critic_loss': 526.9444299316406, 'q_loss': 522.6446755981445, 'v_loss': 4.2997559595108035, 'actor_loss': 96.90768592834473, 'time_step': 0.007233684062957764, 'environment': 1555.1747083751652}[0m [36mstep[0m=[35m500[0m
[2m2025-05-18 18:38.01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/IQL_20250518183755/model_500.d3[0m


Epoch 6/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.02[0m [[32m[1minfo     [0m] [1mIQL_20250518183755: epoch=6 step=600[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0017657470703125, 'time_algorithm_update': 0.005008907318115235, 'critic_loss': 566.4813327026367, 'q_loss': 559.7124545288086, 'v_loss': 6.7688799524307255, 'actor_loss': 93.21356903076172, 'time_step': 0.006808981895446777, 'environment': 891.6184900782046}[0m [36mstep[0m=[35m600[0m
[2m2025-05-18 18:38.02[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/IQL_20250518183755/model_600.d3[0m


Epoch 7/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.03[0m [[32m[1minfo     [0m] [1mIQL_20250518183755: epoch=7 step=700[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0016872096061706543, 'time_algorithm_update': 0.005046024322509766, 'critic_loss': 611.2186221313476, 'q_loss': 602.4289483642578, 'v_loss': 8.789672513008117, 'actor_loss': 86.50951492309571, 'time_step': 0.006767346858978272, 'environment': 874.6956508664338}[0m [36mstep[0m=[35m700[0m
[2m2025-05-18 18:38.03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/IQL_20250518183755/model_700.d3[0m


Epoch 8/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.04[0m [[32m[1minfo     [0m] [1mIQL_20250518183755: epoch=8 step=800[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0016891336441040039, 'time_algorithm_update': 0.005122876167297364, 'critic_loss': 676.5256002807618, 'q_loss': 666.4580087280274, 'v_loss': 10.067590475082397, 'actor_loss': 83.95579174041748, 'time_step': 0.006847825050354004, 'environment': 1920.8254540063263}[0m [36mstep[0m=[35m800[0m
[2m2025-05-18 18:38.04[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/IQL_20250518183755/model_800.d3[0m


Epoch 9/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.05[0m [[32m[1minfo     [0m] [1mIQL_20250518183755: epoch=9 step=900[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0016629576683044433, 'time_algorithm_update': 0.004936819076538086, 'critic_loss': 787.7351919555664, 'q_loss': 776.5870907592773, 'v_loss': 11.148100996017456, 'actor_loss': 81.48355308532715, 'time_step': 0.006633355617523194, 'environment': 2113.947059990209}[0m [36mstep[0m=[35m900[0m
[2m2025-05-18 18:38.05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/IQL_20250518183755/model_900.d3[0m


Epoch 10/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.06[0m [[32m[1minfo     [0m] [1mIQL_20250518183755: epoch=10 step=1000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0016598129272460938, 'time_algorithm_update': 0.0050315213203430175, 'critic_loss': 879.2197732543946, 'q_loss': 867.707907409668, 'v_loss': 11.51186101436615, 'actor_loss': 74.85352363586426, 'time_step': 0.006725165843963623, 'environment': 1776.6581431780032}[0m [36mstep[0m=[35m1000[0m
[2m2025-05-18 18:38.06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/IQL_20250518183755/model_1000.d3[0m
[2m2025-05-18 18:38.06[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(45,)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(24,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>

Epoch 1/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.09[0m [[32m[1minfo     [0m] [1mCQL_20250518183806: epoch=1 step=100[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.001825251579284668, 'time_algorithm_update': 0.029733405113220215, 'critic_loss': 2544.494907836914, 'conservative_loss': 77.54498958587646, 'alpha': 1.004752552509308, 'actor_loss': -28.087945556640626, 'temp': 0.9949698036909104, 'temp_loss': 39.27418285369873, 'time_step': 0.031601967811584475, 'environment': 574.699098039526}[0m [36mstep[0m=[35m100[0m
[2m2025-05-18 18:38.09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/CQL_20250518183806/model_100.d3[0m


Epoch 2/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.13[0m [[32m[1minfo     [0m] [1mCQL_20250518183806: epoch=2 step=200[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0019074773788452148, 'time_algorithm_update': 0.030282316207885743, 'critic_loss': 739.945560913086, 'conservative_loss': 68.27887062072755, 'alpha': 1.0132822751998902, 'actor_loss': -50.87469589233398, 'temp': 0.9858731985092163, 'temp_loss': 30.995810108184813, 'time_step': 0.03223266363143921, 'environment': -18.36526152125152}[0m [36mstep[0m=[35m200[0m
[2m2025-05-18 18:38.13[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/CQL_20250518183806/model_200.d3[0m


Epoch 3/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.17[0m [[32m[1minfo     [0m] [1mCQL_20250518183806: epoch=3 step=300[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.001881401538848877, 'time_algorithm_update': 0.02990744113922119, 'critic_loss': 618.1574920654297, 'conservative_loss': 31.7912979888916, 'alpha': 1.0208201813697815, 'actor_loss': -58.367444801330564, 'temp': 0.9772688233852387, 'temp_loss': 31.18894441604614, 'time_step': 0.03183393001556396, 'environment': 139.48552780299}[0m [36mstep[0m=[35m300[0m
[2m2025-05-18 18:38.17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/CQL_20250518183806/model_300.d3[0m


Epoch 4/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.20[0m [[32m[1minfo     [0m] [1mCQL_20250518183806: epoch=4 step=400[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0018298649787902833, 'time_algorithm_update': 0.02920253038406372, 'critic_loss': 534.269130859375, 'conservative_loss': 3.8533651554584503, 'alpha': 1.02386900305748, 'actor_loss': -67.12620765686034, 'temp': 0.9682497429847717, 'temp_loss': 31.31550548553467, 'time_step': 0.031075425148010254, 'environment': 11.987321877538543}[0m [36mstep[0m=[35m400[0m
[2m2025-05-18 18:38.20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/CQL_20250518183806/model_400.d3[0m


Epoch 5/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.24[0m [[32m[1minfo     [0m] [1mCQL_20250518183806: epoch=5 step=500[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.001955463886260986, 'time_algorithm_update': 0.030978493690490723, 'critic_loss': 505.2198712158203, 'conservative_loss': -12.980834345817566, 'alpha': 1.0229701173305512, 'actor_loss': -80.10164108276368, 'temp': 0.9592773121595383, 'temp_loss': 29.888840465545655, 'time_step': 0.032979161739349366, 'environment': 223.9608393398183}[0m [36mstep[0m=[35m500[0m
[2m2025-05-18 18:38.24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/CQL_20250518183806/model_500.d3[0m


Epoch 6/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.27[0m [[32m[1minfo     [0m] [1mCQL_20250518183806: epoch=6 step=600[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002027621269226074, 'time_algorithm_update': 0.02973128318786621, 'critic_loss': 514.6781732177734, 'conservative_loss': -21.019168882369996, 'alpha': 1.0192955780029296, 'actor_loss': -94.36067459106445, 'temp': 0.9506559693813323, 'temp_loss': 28.26866912841797, 'time_step': 0.031802241802215574, 'environment': -41.780338354131146}[0m [36mstep[0m=[35m600[0m
[2m2025-05-18 18:38.27[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/CQL_20250518183806/model_600.d3[0m


Epoch 7/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.31[0m [[32m[1minfo     [0m] [1mCQL_20250518183806: epoch=7 step=700[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0018517565727233887, 'time_algorithm_update': 0.02939276933670044, 'critic_loss': 588.5625564575196, 'conservative_loss': -27.5911904335022, 'alpha': 1.0135485780239106, 'actor_loss': -109.01750076293945, 'temp': 0.9423156380653381, 'temp_loss': 27.05601266860962, 'time_step': 0.031287386417388915, 'environment': 164.88236814383686}[0m [36mstep[0m=[35m700[0m
[2m2025-05-18 18:38.31[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/CQL_20250518183806/model_700.d3[0m


Epoch 8/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.35[0m [[32m[1minfo     [0m] [1mCQL_20250518183806: epoch=8 step=800[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0018687820434570312, 'time_algorithm_update': 0.030355653762817382, 'critic_loss': 635.4439254760742, 'conservative_loss': -33.06088060379028, 'alpha': 1.0060149931907654, 'actor_loss': -123.94144523620605, 'temp': 0.9341652286052704, 'temp_loss': 26.157040672302244, 'time_step': 0.032267885208129884, 'environment': 479.6519175963514}[0m [36mstep[0m=[35m800[0m
[2m2025-05-18 18:38.35[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/CQL_20250518183806/model_800.d3[0m


Epoch 9/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.38[0m [[32m[1minfo     [0m] [1mCQL_20250518183806: epoch=9 step=900[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0020010185241699217, 'time_algorithm_update': 0.03002650499343872, 'critic_loss': 772.3431030273438, 'conservative_loss': -36.44170114517212, 'alpha': 0.9970827025175094, 'actor_loss': -138.95604553222657, 'temp': 0.926144734621048, 'temp_loss': 25.431164569854737, 'time_step': 0.032073190212249754, 'environment': 481.2230587028449}[0m [36mstep[0m=[35m900[0m
[2m2025-05-18 18:38.38[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/CQL_20250518183806/model_900.d3[0m


Epoch 10/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.42[0m [[32m[1minfo     [0m] [1mCQL_20250518183806: epoch=10 step=1000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0018840765953063966, 'time_algorithm_update': 0.029783549308776854, 'critic_loss': 1018.0065209960937, 'conservative_loss': -38.442665824890135, 'alpha': 0.9875029343366623, 'actor_loss': -155.24788864135743, 'temp': 0.9182659471035004, 'temp_loss': 24.53575090408325, 'time_step': 0.03171037197113037, 'environment': 54.54758489698298}[0m [36mstep[0m=[35m1000[0m
[2m2025-05-18 18:38.42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/CQL_20250518183806/model_1000.d3[0m
[2m2025-05-18 18:38.42[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(45,)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(24,)]), reward_signature=Signature(dtype=[

Epoch 1/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.42[0m [[32m[1minfo     [0m] [1mBC_20250518183842: epoch=1 step=100[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007213139533996582, 'time_algorithm_update': 0.0007260251045227051, 'loss': 0.1508719039708376, 'time_step': 0.0014708948135375977, 'environment': 2006.8033746963144}[0m [36mstep[0m=[35m100[0m
[2m2025-05-18 18:38.42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/BC_20250518183842/model_100.d3[0m


Epoch 2/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.43[0m [[32m[1minfo     [0m] [1mBC_20250518183842: epoch=2 step=200[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006995654106140137, 'time_algorithm_update': 0.0007262039184570313, 'loss': 0.10671293899416924, 'time_step': 0.0014458298683166504, 'environment': 2671.8405014290283}[0m [36mstep[0m=[35m200[0m
[2m2025-05-18 18:38.43[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/BC_20250518183842/model_200.d3[0m


Epoch 3/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.43[0m [[32m[1minfo     [0m] [1mBC_20250518183842: epoch=3 step=300[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006834197044372558, 'time_algorithm_update': 0.0006840777397155762, 'loss': 0.10308715768158436, 'time_step': 0.001383965015411377, 'environment': 2240.6073853548132}[0m [36mstep[0m=[35m300[0m
[2m2025-05-18 18:38.43[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/BC_20250518183842/model_300.d3[0m


Epoch 4/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.44[0m [[32m[1minfo     [0m] [1mBC_20250518183842: epoch=4 step=400[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007005739212036133, 'time_algorithm_update': 0.0007199645042419434, 'loss': 0.1008719526976347, 'time_step': 0.0014356327056884765, 'environment': 2074.799204498978}[0m [36mstep[0m=[35m400[0m
[2m2025-05-18 18:38.44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/BC_20250518183842/model_400.d3[0m


Epoch 5/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.44[0m [[32m[1minfo     [0m] [1mBC_20250518183842: epoch=5 step=500[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006688642501831055, 'time_algorithm_update': 0.0006577873229980469, 'loss': 0.10125747494399548, 'time_step': 0.0013428497314453124, 'environment': 2679.4451346742244}[0m [36mstep[0m=[35m500[0m
[2m2025-05-18 18:38.44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/BC_20250518183842/model_500.d3[0m


Epoch 6/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.45[0m [[32m[1minfo     [0m] [1mBC_20250518183842: epoch=6 step=600[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006804156303405761, 'time_algorithm_update': 0.0006720590591430664, 'loss': 0.10010632000863552, 'time_step': 0.0013666510581970214, 'environment': 3122.341849441915}[0m [36mstep[0m=[35m600[0m
[2m2025-05-18 18:38.45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/BC_20250518183842/model_600.d3[0m


Epoch 7/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.45[0m [[32m[1minfo     [0m] [1mBC_20250518183842: epoch=7 step=700[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006784796714782715, 'time_algorithm_update': 0.0006638622283935547, 'loss': 0.1003629618883133, 'time_step': 0.0013611483573913574, 'environment': 3514.337540575171}[0m [36mstep[0m=[35m700[0m
[2m2025-05-18 18:38.45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/BC_20250518183842/model_700.d3[0m


Epoch 8/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.46[0m [[32m[1minfo     [0m] [1mBC_20250518183842: epoch=8 step=800[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007129549980163574, 'time_algorithm_update': 0.0007524943351745606, 'loss': 0.09973118007183075, 'time_step': 0.0014855670928955078, 'environment': 3949.7824414964334}[0m [36mstep[0m=[35m800[0m
[2m2025-05-18 18:38.46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/BC_20250518183842/model_800.d3[0m


Epoch 9/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.46[0m [[32m[1minfo     [0m] [1mBC_20250518183842: epoch=9 step=900[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007301545143127441, 'time_algorithm_update': 0.0007288694381713867, 'loss': 0.09965254053473473, 'time_step': 0.0014797568321228028, 'environment': 2209.1976242061337}[0m [36mstep[0m=[35m900[0m
[2m2025-05-18 18:38.46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/BC_20250518183842/model_900.d3[0m


Epoch 10/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.47[0m [[32m[1minfo     [0m] [1mBC_20250518183842: epoch=10 step=1000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007116317749023438, 'time_algorithm_update': 0.0007156133651733398, 'loss': 0.09875684186816215, 'time_step': 0.0014482450485229492, 'environment': 3328.035671192168}[0m [36mstep[0m=[35m1000[0m
[2m2025-05-18 18:38.47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/BC_20250518183842/model_1000.d3[0m
[2m2025-05-18 18:38.47[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(45,)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(24,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_size=24)[0m
[2m2025-05-18 18:38.47[0m [[32m[1minfo     [0m] [1mDirectory is creat

Epoch 1/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.47[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518183847: epoch=1 step=100[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.001659834384918213, 'time_algorithm_update': 0.003968789577484131, 'critic_loss': 2374.598745727539, 'actor_loss': -2.055789422988892, 'bc_loss': 0.4425239473581314, 'time_step': 0.005660989284515381, 'environment': -8.394174931692394}[0m [36mstep[0m=[35m100[0m
[2m2025-05-18 18:38.47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/TD3PlusBC_20250518183847/model_100.d3[0m


Epoch 2/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.49[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518183847: epoch=2 step=200[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0017380499839782714, 'time_algorithm_update': 0.005111973285675048, 'critic_loss': 689.7350329589843, 'actor_loss': -1.8015106153488158, 'bc_loss': 0.6984893870353699, 'time_step': 0.006882798671722412, 'environment': 474.14354659619283}[0m [36mstep[0m=[35m200[0m
[2m2025-05-18 18:38.49[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/TD3PlusBC_20250518183847/model_200.d3[0m


Epoch 3/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.50[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518183847: epoch=3 step=300[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0017889118194580078, 'time_algorithm_update': 0.004356710910797119, 'critic_loss': 559.5511563110351, 'actor_loss': -1.8089009308815003, 'bc_loss': 0.6910990822315216, 'time_step': 0.006180691719055176, 'environment': 94.61432981674997}[0m [36mstep[0m=[35m300[0m
[2m2025-05-18 18:38.50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/TD3PlusBC_20250518183847/model_300.d3[0m


Epoch 4/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.51[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518183847: epoch=4 step=400[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0018513917922973633, 'time_algorithm_update': 0.004968650341033935, 'critic_loss': 500.0933352661133, 'actor_loss': -1.8764852690696716, 'bc_loss': 0.6235147249698639, 'time_step': 0.006857967376708985, 'environment': 28.545519351633708}[0m [36mstep[0m=[35m400[0m
[2m2025-05-18 18:38.51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/TD3PlusBC_20250518183847/model_400.d3[0m


Epoch 5/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.52[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518183847: epoch=5 step=500[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0017774152755737306, 'time_algorithm_update': 0.004120643138885498, 'critic_loss': 466.8305438232422, 'actor_loss': -1.9584798574447633, 'bc_loss': 0.5412578576803208, 'time_step': 0.005936727523803711, 'environment': 1.7895985813953856}[0m [36mstep[0m=[35m500[0m
[2m2025-05-18 18:38.52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/TD3PlusBC_20250518183847/model_500.d3[0m


Epoch 6/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.53[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518183847: epoch=6 step=600[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.001761324405670166, 'time_algorithm_update': 0.003960413932800293, 'critic_loss': 450.99232177734376, 'actor_loss': -2.071240701675415, 'bc_loss': 0.4286501079797745, 'time_step': 0.005756378173828125, 'environment': 21.594816452581}[0m [36mstep[0m=[35m600[0m
[2m2025-05-18 18:38.53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/TD3PlusBC_20250518183847/model_600.d3[0m


Epoch 7/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.54[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518183847: epoch=7 step=700[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0017388129234313964, 'time_algorithm_update': 0.003933429718017578, 'critic_loss': 447.89895904541015, 'actor_loss': -2.1627222537994384, 'bc_loss': 0.3372731298208237, 'time_step': 0.00570502758026123, 'environment': 35.75498061482329}[0m [36mstep[0m=[35m700[0m
[2m2025-05-18 18:38.54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/TD3PlusBC_20250518183847/model_700.d3[0m


Epoch 8/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.54[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518183847: epoch=8 step=800[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.001712930202484131, 'time_algorithm_update': 0.0039365792274475096, 'critic_loss': 451.5179669189453, 'actor_loss': -2.211255278587341, 'bc_loss': 0.28857218205928803, 'time_step': 0.005683610439300537, 'environment': 20.44102753614019}[0m [36mstep[0m=[35m800[0m
[2m2025-05-18 18:38.54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/TD3PlusBC_20250518183847/model_800.d3[0m


Epoch 9/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.55[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518183847: epoch=9 step=900[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0016898870468139648, 'time_algorithm_update': 0.003915877342224121, 'critic_loss': 459.3517419433594, 'actor_loss': -2.2366245698928835, 'bc_loss': 0.2633218893408775, 'time_step': 0.005641481876373291, 'environment': 34.38499143582584}[0m [36mstep[0m=[35m900[0m
[2m2025-05-18 18:38.55[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/TD3PlusBC_20250518183847/model_900.d3[0m


Epoch 10/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.56[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518183847: epoch=10 step=1000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.001692328453063965, 'time_algorithm_update': 0.003977658748626709, 'critic_loss': 511.6448577880859, 'actor_loss': -2.2541017818450926, 'bc_loss': 0.24505096465349196, 'time_step': 0.005702936649322509, 'environment': 21.18477646841546}[0m [36mstep[0m=[35m1000[0m
[2m2025-05-18 18:38.56[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/TD3PlusBC_20250518183847/model_1000.d3[0m
[2m2025-05-18 18:38.56[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(45,)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(24,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_size=

Epoch 1/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:38.59[0m [[32m[1minfo     [0m] [1mAWAC_20250518183856: epoch=1 step=100[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.006597633361816406, 'time_algorithm_update': 0.011078934669494628, 'critic_loss': 1515.9315173339844, 'actor_loss': 804936.3421875, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.017715644836425782, 'environment': 8.410851699632278}[0m [36mstep[0m=[35m100[0m
[2m2025-05-18 18:38.59[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/AWAC_20250518183856/model_100.d3[0m


Epoch 2/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:39.01[0m [[32m[1minfo     [0m] [1mAWAC_20250518183856: epoch=2 step=200[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.006958942413330078, 'time_algorithm_update': 0.012434933185577392, 'critic_loss': 669.8427728271485, 'actor_loss': 396145.97625, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.01943585157394409, 'environment': 748.8670424597892}[0m [36mstep[0m=[35m200[0m
[2m2025-05-18 18:39.01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/AWAC_20250518183856/model_200.d3[0m


Epoch 3/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:39.03[0m [[32m[1minfo     [0m] [1mAWAC_20250518183856: epoch=3 step=300[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.006732895374298096, 'time_algorithm_update': 0.01150679111480713, 'critic_loss': 546.1485437011719, 'actor_loss': 398595.64734375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.018281190395355223, 'environment': 553.0900370340382}[0m [36mstep[0m=[35m300[0m
[2m2025-05-18 18:39.03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/AWAC_20250518183856/model_300.d3[0m


Epoch 4/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:39.06[0m [[32m[1minfo     [0m] [1mAWAC_20250518183856: epoch=4 step=400[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.009654803276062012, 'time_algorithm_update': 0.011678814888000488, 'critic_loss': 478.9053366088867, 'actor_loss': 373659.8559375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.021373651027679443, 'environment': 4.709722611172638}[0m [36mstep[0m=[35m400[0m
[2m2025-05-18 18:39.06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/AWAC_20250518183856/model_400.d3[0m


Epoch 5/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:39.08[0m [[32m[1minfo     [0m] [1mAWAC_20250518183856: epoch=5 step=500[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0067294883728027345, 'time_algorithm_update': 0.010662670135498048, 'critic_loss': 442.9562066650391, 'actor_loss': 390711.7796875, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.01743131160736084, 'environment': 97.21545426083047}[0m [36mstep[0m=[35m500[0m
[2m2025-05-18 18:39.08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/AWAC_20250518183856/model_500.d3[0m


Epoch 6/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:39.10[0m [[32m[1minfo     [0m] [1mAWAC_20250518183856: epoch=6 step=600[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00668419599533081, 'time_algorithm_update': 0.010598323345184325, 'critic_loss': 416.42507720947265, 'actor_loss': 432147.91328125, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.01732147455215454, 'environment': 59.110549758477944}[0m [36mstep[0m=[35m600[0m
[2m2025-05-18 18:39.10[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/AWAC_20250518183856/model_600.d3[0m


Epoch 7/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:39.12[0m [[32m[1minfo     [0m] [1mAWAC_20250518183856: epoch=7 step=700[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.006702327728271484, 'time_algorithm_update': 0.010508966445922852, 'critic_loss': 406.9453421020508, 'actor_loss': 406774.241875, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.017250919342041017, 'environment': 548.4339209761745}[0m [36mstep[0m=[35m700[0m
[2m2025-05-18 18:39.12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/AWAC_20250518183856/model_700.d3[0m


Epoch 8/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:39.14[0m [[32m[1minfo     [0m] [1mAWAC_20250518183856: epoch=8 step=800[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.006740827560424805, 'time_algorithm_update': 0.010460708141326904, 'critic_loss': 402.2044805908203, 'actor_loss': 313467.2221875, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.017240161895751952, 'environment': -5.031143722963247}[0m [36mstep[0m=[35m800[0m
[2m2025-05-18 18:39.14[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/AWAC_20250518183856/model_800.d3[0m


Epoch 9/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:39.17[0m [[32m[1minfo     [0m] [1mAWAC_20250518183856: epoch=9 step=900[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.006782932281494141, 'time_algorithm_update': 0.010878560543060302, 'critic_loss': 402.0726000976563, 'actor_loss': 327483.6275, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.01769963502883911, 'environment': 35.59195899254889}[0m [36mstep[0m=[35m900[0m
[2m2025-05-18 18:39.17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/AWAC_20250518183856/model_900.d3[0m


Epoch 10/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:39.19[0m [[32m[1minfo     [0m] [1mAWAC_20250518183856: epoch=10 step=1000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.006812191009521485, 'time_algorithm_update': 0.01053680419921875, 'critic_loss': 393.3514407348633, 'actor_loss': 316696.3978125, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.017386393547058107, 'environment': 205.61118658606387}[0m [36mstep[0m=[35m1000[0m
[2m2025-05-18 18:39.19[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/pen/AWAC_20250518183856/model_1000.d3[0m


(<d3rlpy.algos.qlearning.awac.AWAC at 0x162157d00>,
 [(1,
   {'time_sample_batch': 0.006597633361816406,
    'time_algorithm_update': 0.011078934669494628,
    'critic_loss': 1515.9315173339844,
    'actor_loss': 804936.3421875,
    'temp': 0.0,
    'temp_loss': 0.0,
    'time_step': 0.017715644836425782,
    'environment': 8.410851699632278}),
  (2,
   {'time_sample_batch': 0.006958942413330078,
    'time_algorithm_update': 0.012434933185577392,
    'critic_loss': 669.8427728271485,
    'actor_loss': 396145.97625,
    'temp': 0.0,
    'temp_loss': 0.0,
    'time_step': 0.01943585157394409,
    'environment': 748.8670424597892}),
  (3,
   {'time_sample_batch': 0.006732895374298096,
    'time_algorithm_update': 0.01150679111480713,
    'critic_loss': 546.1485437011719,
    'actor_loss': 398595.64734375,
    'temp': 0.0,
    'temp_loss': 0.0,
    'time_step': 0.018281190395355223,
    'environment': 553.0900370340382}),
  (4,
   {'time_sample_batch': 0.009654803276062012,
    'time_algor

### Relocate

In [10]:
train_offline_algorithm(d3rlpy.algos.IQLConfig, relocate_d3_dataset, relocate_env, 'relocate_iql', 'relocate')
train_offline_algorithm(d3rlpy.algos.CQLConfig, relocate_d3_dataset, relocate_env, 'relocate_cql', 'relocate')
train_offline_algorithm(d3rlpy.algos.BCConfig, relocate_d3_dataset, relocate_env, 'relocate_bc', 'relocate')
train_offline_algorithm(d3rlpy.algos.TD3PlusBCConfig, relocate_d3_dataset, relocate_env, 'relocate_td3bc', 'relocate')
train_offline_algorithm(d3rlpy.algos.AWACConfig, relocate_d3_dataset, relocate_env, 'relocate_awac', 'relocate')

[2m2025-05-18 18:40.15[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(39,)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(30,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_size=30)[0m
[2m2025-05-18 18:40.15[0m [[32m[1minfo     [0m] [1mDirectory is created at training_logs/offline/relocate/IQL_20250518184015[0m
[2m2025-05-18 18:40.15[0m [[32m[1minfo     [0m] [1mParameters                    [0m [36mparams[0m=[35m{'observation_shape': [39], 'action_size': 30, 'config': {'type': 'iql', 'params': {'batch_size': 256, 'gamma': 0.99, 'observation_scaler': {'type': 'none', 'params': {}}, 'action_scaler': {'type': 'none', 'params': {}}, 'reward_scaler': {'type': 'none', 'params': {}}, 'compile_graph': False, 'actor_learning_rate': 0.0003, 'critic_learning_rate':

Epoch 1/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:40.17[0m [[32m[1minfo     [0m] [1mIQL_20250518184015: epoch=1 step=100[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003181352615356445, 'time_algorithm_update': 0.006346280574798584, 'critic_loss': 706.9986045837402, 'q_loss': 706.9981391906738, 'v_loss': 0.0004631680389138637, 'actor_loss': 10.649612110443414, 'time_step': 0.009580159187316894, 'environment': 5.188279189436387}[0m [36mstep[0m=[35m100[0m
[2m2025-05-18 18:40.17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/IQL_20250518184015/model_100.d3[0m


Epoch 2/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:40.19[0m [[32m[1minfo     [0m] [1mIQL_20250518184015: epoch=2 step=200[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.001962294578552246, 'time_algorithm_update': 0.005901815891265869, 'critic_loss': 133.73566848754882, 'q_loss': 133.7096824645996, 'v_loss': 0.025986601340118796, 'actor_loss': -2.226599508523941, 'time_step': 0.007901437282562256, 'environment': 7.198446344109041}[0m [36mstep[0m=[35m200[0m
[2m2025-05-18 18:40.19[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/IQL_20250518184015/model_200.d3[0m


Epoch 3/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:40.20[0m [[32m[1minfo     [0m] [1mIQL_20250518184015: epoch=3 step=300[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032337260246276854, 'time_algorithm_update': 0.006975879669189453, 'critic_loss': 98.6142211151123, 'q_loss': 98.4259718322754, 'v_loss': 0.18824916012585163, 'actor_loss': -12.022994220256805, 'time_step': 0.010253934860229493, 'environment': 7.840484687558508}[0m [36mstep[0m=[35m300[0m
[2m2025-05-18 18:40.20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/IQL_20250518184015/model_300.d3[0m


Epoch 4/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:40.22[0m [[32m[1minfo     [0m] [1mIQL_20250518184015: epoch=4 step=400[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0023952174186706544, 'time_algorithm_update': 0.006028103828430176, 'critic_loss': 85.50127979278564, 'q_loss': 84.9446700668335, 'v_loss': 0.5566095760464669, 'actor_loss': -48.09801203727722, 'time_step': 0.008461282253265381, 'environment': 7.39116633507349}[0m [36mstep[0m=[35m400[0m
[2m2025-05-18 18:40.22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/IQL_20250518184015/model_400.d3[0m


Epoch 5/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:40.23[0m [[32m[1minfo     [0m] [1mIQL_20250518184015: epoch=5 step=500[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0018680930137634278, 'time_algorithm_update': 0.00514136791229248, 'critic_loss': 82.4536357498169, 'q_loss': 81.43640998840333, 'v_loss': 1.0172258549928666, 'actor_loss': -90.74139862060547, 'time_step': 0.007044627666473389, 'environment': 7.891198257090212}[0m [36mstep[0m=[35m500[0m
[2m2025-05-18 18:40.23[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/IQL_20250518184015/model_500.d3[0m


Epoch 6/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:40.24[0m [[32m[1minfo     [0m] [1mIQL_20250518184015: epoch=6 step=600[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0017610836029052735, 'time_algorithm_update': 0.005132493972778321, 'critic_loss': 87.60156421661377, 'q_loss': 86.2624214553833, 'v_loss': 1.3391425323486328, 'actor_loss': -117.14386123657226, 'time_step': 0.006928415298461914, 'environment': 7.103262391871567}[0m [36mstep[0m=[35m600[0m
[2m2025-05-18 18:40.24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/IQL_20250518184015/model_600.d3[0m


Epoch 7/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:40.25[0m [[32m[1minfo     [0m] [1mIQL_20250518184015: epoch=7 step=700[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0016962432861328126, 'time_algorithm_update': 0.005089485645294189, 'critic_loss': 95.96228031158448, 'q_loss': 94.4737158203125, 'v_loss': 1.4885645771026612, 'actor_loss': -137.63516914367676, 'time_step': 0.006819100379943848, 'environment': 7.288933070099387}[0m [36mstep[0m=[35m700[0m
[2m2025-05-18 18:40.25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/IQL_20250518184015/model_700.d3[0m


Epoch 8/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:40.27[0m [[32m[1minfo     [0m] [1mIQL_20250518184015: epoch=8 step=800[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00167818546295166, 'time_algorithm_update': 0.0050708198547363284, 'critic_loss': 114.45724170684815, 'q_loss': 112.89983226776123, 'v_loss': 1.5574092721939088, 'actor_loss': -144.62502967834473, 'time_step': 0.0067810797691345214, 'environment': 17.48074611254284}[0m [36mstep[0m=[35m800[0m
[2m2025-05-18 18:40.27[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/IQL_20250518184015/model_800.d3[0m


Epoch 9/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:40.28[0m [[32m[1minfo     [0m] [1mIQL_20250518184015: epoch=9 step=900[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0016967487335205079, 'time_algorithm_update': 0.005215642452239991, 'critic_loss': 119.84263095855712, 'q_loss': 118.21239883422851, 'v_loss': 1.6302319586277008, 'actor_loss': -156.65274291992188, 'time_step': 0.006946702003479004, 'environment': 731.5422504116858}[0m [36mstep[0m=[35m900[0m
[2m2025-05-18 18:40.28[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/IQL_20250518184015/model_900.d3[0m


Epoch 10/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:40.30[0m [[32m[1minfo     [0m] [1mIQL_20250518184015: epoch=10 step=1000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0017069697380065918, 'time_algorithm_update': 0.005258638858795166, 'critic_loss': 154.68977951049806, 'q_loss': 152.96984409332276, 'v_loss': 1.7199356770515442, 'actor_loss': -162.6064250946045, 'time_step': 0.00699976921081543, 'environment': 709.6026511989983}[0m [36mstep[0m=[35m1000[0m
[2m2025-05-18 18:40.30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/IQL_20250518184015/model_1000.d3[0m
[2m2025-05-18 18:40.30[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(39,)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(30,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINU

Epoch 1/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:40.34[0m [[32m[1minfo     [0m] [1mCQL_20250518184030: epoch=1 step=100[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002109634876251221, 'time_algorithm_update': 0.031932327747344974, 'critic_loss': 837.4278456115723, 'conservative_loss': 99.94907382965089, 'alpha': 1.0046830201148986, 'actor_loss': -24.392401180267335, 'temp': 0.9949723666906357, 'temp_loss': 49.27904697418213, 'time_step': 0.03408561706542969, 'environment': 16.20998950783526}[0m [36mstep[0m=[35m100[0m
[2m2025-05-18 18:40.34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/CQL_20250518184030/model_100.d3[0m


Epoch 2/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:40.38[0m [[32m[1minfo     [0m] [1mCQL_20250518184030: epoch=2 step=200[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002201402187347412, 'time_algorithm_update': 0.03214294910430908, 'critic_loss': 140.95976779937743, 'conservative_loss': 15.41508549630642, 'alpha': 1.0099617803096772, 'actor_loss': -32.828619384765624, 'temp': 0.9857466113567352, 'temp_loss': 40.365506973266605, 'time_step': 0.03439356803894043, 'environment': 6.101043339267202}[0m [36mstep[0m=[35m200[0m
[2m2025-05-18 18:40.38[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/CQL_20250518184030/model_200.d3[0m


Epoch 3/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:40.41[0m [[32m[1minfo     [0m] [1mCQL_20250518184030: epoch=3 step=300[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0020235371589660644, 'time_algorithm_update': 0.030746161937713623, 'critic_loss': 37.517892112731936, 'conservative_loss': -48.696248836517334, 'alpha': 1.008087114095688, 'actor_loss': -33.646284942626956, 'temp': 0.9771765929460525, 'temp_loss': 37.163345375061034, 'time_step': 0.03281301736831665, 'environment': 4.530239451499768}[0m [36mstep[0m=[35m300[0m
[2m2025-05-18 18:40.41[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/CQL_20250518184030/model_300.d3[0m


Epoch 4/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:40.45[0m [[32m[1minfo     [0m] [1mCQL_20250518184030: epoch=4 step=400[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0020110154151916503, 'time_algorithm_update': 0.0319657039642334, 'critic_loss': -12.838118553161621, 'conservative_loss': -97.18185134887695, 'alpha': 0.997848419547081, 'actor_loss': -38.31517204284668, 'temp': 0.9690056121349335, 'temp_loss': 32.904104175567625, 'time_step': 0.0340212869644165, 'environment': 5.127748748995634}[0m [36mstep[0m=[35m400[0m
[2m2025-05-18 18:40.45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/CQL_20250518184030/model_400.d3[0m


Epoch 5/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:40.50[0m [[32m[1minfo     [0m] [1mCQL_20250518184030: epoch=5 step=500[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0026193976402282713, 'time_algorithm_update': 0.04285886764526367, 'critic_loss': -33.94769775390625, 'conservative_loss': -126.16655029296875, 'alpha': 0.9837861299514771, 'actor_loss': -46.027332878112794, 'temp': 0.9614098531007766, 'temp_loss': 29.060398826599123, 'time_step': 0.04553267240524292, 'environment': 5.042385581906625}[0m [36mstep[0m=[35m500[0m
[2m2025-05-18 18:40.50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/CQL_20250518184030/model_500.d3[0m


Epoch 6/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:40.55[0m [[32m[1minfo     [0m] [1mCQL_20250518184030: epoch=6 step=600[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0031119251251220705, 'time_algorithm_update': 0.03315308570861816, 'critic_loss': -38.7458674621582, 'conservative_loss': -139.47985107421874, 'alpha': 0.9696605843305588, 'actor_loss': -56.698535499572756, 'temp': 0.9543217837810516, 'temp_loss': 26.01935956954956, 'time_step': 0.03631138801574707, 'environment': 5.188042211300928}[0m [36mstep[0m=[35m600[0m
[2m2025-05-18 18:40.55[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/CQL_20250518184030/model_600.d3[0m


Epoch 7/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:40.59[0m [[32m[1minfo     [0m] [1mCQL_20250518184030: epoch=7 step=700[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002321312427520752, 'time_algorithm_update': 0.03220966577529907, 'critic_loss': -22.204895553588866, 'conservative_loss': -145.77332321166992, 'alpha': 0.9563947319984436, 'actor_loss': -68.18175033569337, 'temp': 0.947541965842247, 'temp_loss': 24.136698398590088, 'time_step': 0.0345763373374939, 'environment': 4.657224042167315}[0m [36mstep[0m=[35m700[0m
[2m2025-05-18 18:40.59[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/CQL_20250518184030/model_700.d3[0m


Epoch 8/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.03[0m [[32m[1minfo     [0m] [1mCQL_20250518184030: epoch=8 step=800[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002399933338165283, 'time_algorithm_update': 0.03427433729171753, 'critic_loss': -10.541168899536133, 'conservative_loss': -149.65672821044922, 'alpha': 0.9439508259296417, 'actor_loss': -80.75379196166992, 'temp': 0.9408897864818573, 'temp_loss': 22.85716365814209, 'time_step': 0.03671993732452392, 'environment': 4.889793611544372}[0m [36mstep[0m=[35m800[0m
[2m2025-05-18 18:41.03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/CQL_20250518184030/model_800.d3[0m


Epoch 9/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.07[0m [[32m[1minfo     [0m] [1mCQL_20250518184030: epoch=9 step=900[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002221016883850098, 'time_algorithm_update': 0.03272030830383301, 'critic_loss': 29.60462905883789, 'conservative_loss': -151.3535354614258, 'alpha': 0.9321785944700242, 'actor_loss': -93.93656890869141, 'temp': 0.9343006438016892, 'temp_loss': 21.914366874694824, 'time_step': 0.03498785018920898, 'environment': 5.374004048636849}[0m [36mstep[0m=[35m900[0m
[2m2025-05-18 18:41.07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/CQL_20250518184030/model_900.d3[0m


Epoch 10/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.11[0m [[32m[1minfo     [0m] [1mCQL_20250518184030: epoch=10 step=1000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002203512191772461, 'time_algorithm_update': 0.03239439725875855, 'critic_loss': 54.524078903198244, 'conservative_loss': -151.5727001953125, 'alpha': 0.9209888905286789, 'actor_loss': -107.4293603515625, 'temp': 0.9277126550674438, 'temp_loss': 21.208616695404054, 'time_step': 0.03464544296264648, 'environment': 5.933807190207441}[0m [36mstep[0m=[35m1000[0m
[2m2025-05-18 18:41.11[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/CQL_20250518184030/model_1000.d3[0m
[2m2025-05-18 18:41.11[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(39,)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(30,)]), reward_signature=Signature(dtype

Epoch 1/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.12[0m [[32m[1minfo     [0m] [1mBC_20250518184111: epoch=1 step=100[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008506274223327637, 'time_algorithm_update': 0.0009896421432495116, 'loss': 0.07117173422127962, 'time_step': 0.0018763780593872071, 'environment': 17.19328509756082}[0m [36mstep[0m=[35m100[0m
[2m2025-05-18 18:41.12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/BC_20250518184111/model_100.d3[0m


Epoch 2/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.12[0m [[32m[1minfo     [0m] [1mBC_20250518184111: epoch=2 step=200[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007566595077514648, 'time_algorithm_update': 0.0007344961166381836, 'loss': 0.05078827127814293, 'time_step': 0.0015144062042236328, 'environment': 7.9117121170238836}[0m [36mstep[0m=[35m200[0m
[2m2025-05-18 18:41.12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/BC_20250518184111/model_200.d3[0m


Epoch 3/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.13[0m [[32m[1minfo     [0m] [1mBC_20250518184111: epoch=3 step=300[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000717461109161377, 'time_algorithm_update': 0.0007442283630371094, 'loss': 0.049102935679256915, 'time_step': 0.0014823627471923827, 'environment': 44.134602701294206}[0m [36mstep[0m=[35m300[0m
[2m2025-05-18 18:41.13[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/BC_20250518184111/model_300.d3[0m


Epoch 4/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.14[0m [[32m[1minfo     [0m] [1mBC_20250518184111: epoch=4 step=400[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006870794296264649, 'time_algorithm_update': 0.0007248973846435547, 'loss': 0.04842592030763626, 'time_step': 0.0014313626289367675, 'environment': 302.76587729906964}[0m [36mstep[0m=[35m400[0m
[2m2025-05-18 18:41.14[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/BC_20250518184111/model_400.d3[0m


Epoch 5/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.15[0m [[32m[1minfo     [0m] [1mBC_20250518184111: epoch=5 step=500[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006965851783752442, 'time_algorithm_update': 0.0007245993614196777, 'loss': 0.04774254970252514, 'time_step': 0.0014414167404174804, 'environment': 625.6113092523952}[0m [36mstep[0m=[35m500[0m
[2m2025-05-18 18:41.15[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/BC_20250518184111/model_500.d3[0m


Epoch 6/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.16[0m [[32m[1minfo     [0m] [1mBC_20250518184111: epoch=6 step=600[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0010002398490905763, 'time_algorithm_update': 0.003182663917541504, 'loss': 0.04773768525570631, 'time_step': 0.00420884370803833, 'environment': 1668.4479438753133}[0m [36mstep[0m=[35m600[0m
[2m2025-05-18 18:41.16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/BC_20250518184111/model_600.d3[0m


Epoch 7/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.17[0m [[32m[1minfo     [0m] [1mBC_20250518184111: epoch=7 step=700[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0011521053314208984, 'time_algorithm_update': 0.0021487379074096678, 'loss': 0.04748967133462429, 'time_step': 0.003324887752532959, 'environment': 2054.7191600897177}[0m [36mstep[0m=[35m700[0m
[2m2025-05-18 18:41.17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/BC_20250518184111/model_700.d3[0m


Epoch 8/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.18[0m [[32m[1minfo     [0m] [1mBC_20250518184111: epoch=8 step=800[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.001043722629547119, 'time_algorithm_update': 0.0016017627716064453, 'loss': 0.04762175176292658, 'time_step': 0.0026683902740478518, 'environment': 229.64654518991443}[0m [36mstep[0m=[35m800[0m
[2m2025-05-18 18:41.18[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/BC_20250518184111/model_800.d3[0m


Epoch 9/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.20[0m [[32m[1minfo     [0m] [1mBC_20250518184111: epoch=9 step=900[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008341622352600098, 'time_algorithm_update': 0.0006743597984313964, 'loss': 0.04733442325145006, 'time_step': 0.0015273356437683105, 'environment': 3714.0481943610853}[0m [36mstep[0m=[35m900[0m
[2m2025-05-18 18:41.20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/BC_20250518184111/model_900.d3[0m


Epoch 10/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.20[0m [[32m[1minfo     [0m] [1mBC_20250518184111: epoch=10 step=1000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008057904243469238, 'time_algorithm_update': 0.0007351279258728027, 'loss': 0.0470067447796464, 'time_step': 0.0015619492530822754, 'environment': 486.26115640704785}[0m [36mstep[0m=[35m1000[0m
[2m2025-05-18 18:41.20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/BC_20250518184111/model_1000.d3[0m
[2m2025-05-18 18:41.20[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(39,)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(30,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_size=30)[0m
[2m2025-05-18 18:41.20[0m [[32m[1minfo     [0m] [1mDirectory is 

Epoch 1/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.21[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184120: epoch=1 step=100[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.001852419376373291, 'time_algorithm_update': 0.003775508403778076, 'critic_loss': 682.2703518676758, 'actor_loss': -2.2389522218704223, 'bc_loss': 0.26104778945446017, 'time_step': 0.005660374164581299, 'environment': 19.08733589021916}[0m [36mstep[0m=[35m100[0m
[2m2025-05-18 18:41.21[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/TD3PlusBC_20250518184120/model_100.d3[0m


Epoch 2/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.23[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184120: epoch=2 step=200[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0018016862869262695, 'time_algorithm_update': 0.00438450813293457, 'critic_loss': 131.405767288208, 'actor_loss': -1.9783358693122863, 'bc_loss': 0.5216641420125961, 'time_step': 0.006219820976257324, 'environment': 9.56951064491352}[0m [36mstep[0m=[35m200[0m
[2m2025-05-18 18:41.23[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/TD3PlusBC_20250518184120/model_200.d3[0m


Epoch 3/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.24[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184120: epoch=3 step=300[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0025771594047546385, 'time_algorithm_update': 0.005172598361968994, 'critic_loss': 92.73374694824219, 'actor_loss': -1.9498479247093201, 'bc_loss': 0.550152074098587, 'time_step': 0.007790234088897705, 'environment': 8.578734757107423}[0m [36mstep[0m=[35m300[0m
[2m2025-05-18 18:41.24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/TD3PlusBC_20250518184120/model_300.d3[0m


Epoch 4/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.25[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184120: epoch=4 step=400[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0019428586959838866, 'time_algorithm_update': 0.0038417696952819822, 'critic_loss': 73.9597539138794, 'actor_loss': -1.973915331363678, 'bc_loss': 0.5260846966505051, 'time_step': 0.005818572044372559, 'environment': 8.158145567277684}[0m [36mstep[0m=[35m400[0m
[2m2025-05-18 18:41.25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/TD3PlusBC_20250518184120/model_400.d3[0m


Epoch 5/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.26[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184120: epoch=5 step=500[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0018052864074707031, 'time_algorithm_update': 0.0037607336044311523, 'critic_loss': 66.80065128326416, 'actor_loss': -2.0376184725761415, 'bc_loss': 0.4623303687572479, 'time_step': 0.00559990644454956, 'environment': 8.418962084928705}[0m [36mstep[0m=[35m500[0m
[2m2025-05-18 18:41.26[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/TD3PlusBC_20250518184120/model_500.d3[0m


Epoch 6/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.27[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184120: epoch=6 step=600[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00199995756149292, 'time_algorithm_update': 0.004117801189422608, 'critic_loss': 66.98121402740479, 'actor_loss': -2.1102792501449583, 'bc_loss': 0.3897049731016159, 'time_step': 0.0061552333831787105, 'environment': 11.422931813860117}[0m [36mstep[0m=[35m600[0m
[2m2025-05-18 18:41.27[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/TD3PlusBC_20250518184120/model_600.d3[0m


Epoch 7/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.29[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184120: epoch=7 step=700[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0020337486267089843, 'time_algorithm_update': 0.0039182138442993165, 'critic_loss': 65.416364402771, 'actor_loss': -2.1397854232788087, 'bc_loss': 0.36019654452800753, 'time_step': 0.005988101959228515, 'environment': 15.057524230443317}[0m [36mstep[0m=[35m700[0m
[2m2025-05-18 18:41.29[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/TD3PlusBC_20250518184120/model_700.d3[0m


Epoch 8/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.30[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184120: epoch=8 step=800[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0017634344100952149, 'time_algorithm_update': 0.003707280158996582, 'critic_loss': 69.66315814971924, 'actor_loss': -2.1484664249420167, 'bc_loss': 0.3515335726737976, 'time_step': 0.005503587722778321, 'environment': 12.641663998996815}[0m [36mstep[0m=[35m800[0m
[2m2025-05-18 18:41.30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/TD3PlusBC_20250518184120/model_800.d3[0m


Epoch 9/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.31[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184120: epoch=9 step=900[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0017036032676696778, 'time_algorithm_update': 0.0035681796073913576, 'critic_loss': 77.26887382507324, 'actor_loss': -2.167877473831177, 'bc_loss': 0.3321220141649246, 'time_step': 0.0053021454811096195, 'environment': 13.464353916825056}[0m [36mstep[0m=[35m900[0m
[2m2025-05-18 18:41.31[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/TD3PlusBC_20250518184120/model_900.d3[0m


Epoch 10/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.32[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184120: epoch=10 step=1000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0016530942916870117, 'time_algorithm_update': 0.0035792183876037597, 'critic_loss': 86.92904346466065, 'actor_loss': -2.1907613372802732, 'bc_loss': 0.3092386621236801, 'time_step': 0.005260426998138428, 'environment': 13.073629691831622}[0m [36mstep[0m=[35m1000[0m
[2m2025-05-18 18:41.32[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/TD3PlusBC_20250518184120/model_1000.d3[0m
[2m2025-05-18 18:41.32[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(39,)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(30,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, actio

Epoch 1/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.34[0m [[32m[1minfo     [0m] [1mAWAC_20250518184132: epoch=1 step=100[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.006716058254241943, 'time_algorithm_update': 0.010795061588287353, 'critic_loss': 1205.6437582397461, 'actor_loss': 514241.23078125, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.017547597885131837, 'environment': 6.49402404980904}[0m [36mstep[0m=[35m100[0m
[2m2025-05-18 18:41.34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/AWAC_20250518184132/model_100.d3[0m


Epoch 2/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.37[0m [[32m[1minfo     [0m] [1mAWAC_20250518184132: epoch=2 step=200[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.007811052799224854, 'time_algorithm_update': 0.014103903770446777, 'critic_loss': 277.2829281616211, 'actor_loss': 218237.0928125, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.02196101188659668, 'environment': 7.615295358374543}[0m [36mstep[0m=[35m200[0m
[2m2025-05-18 18:41.37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/AWAC_20250518184132/model_200.d3[0m


Epoch 3/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.39[0m [[32m[1minfo     [0m] [1mAWAC_20250518184132: epoch=3 step=300[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.006952996253967285, 'time_algorithm_update': 0.01138843536376953, 'critic_loss': 213.81610900878906, 'actor_loss': 199744.95875, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.018379724025726317, 'environment': 8.653344223096}[0m [36mstep[0m=[35m300[0m
[2m2025-05-18 18:41.39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/AWAC_20250518184132/model_300.d3[0m


Epoch 4/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.42[0m [[32m[1minfo     [0m] [1mAWAC_20250518184132: epoch=4 step=400[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00679694414138794, 'time_algorithm_update': 0.010947265625, 'critic_loss': 195.75254440307617, 'actor_loss': 183148.7709375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.017781662940979003, 'environment': 7.940260828859178}[0m [36mstep[0m=[35m400[0m
[2m2025-05-18 18:41.42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/AWAC_20250518184132/model_400.d3[0m


Epoch 5/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.44[0m [[32m[1minfo     [0m] [1mAWAC_20250518184132: epoch=5 step=500[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.006800017356872559, 'time_algorithm_update': 0.01093015432357788, 'critic_loss': 185.79870040893556, 'actor_loss': 169791.37765625, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.01776599884033203, 'environment': 8.160207328220245}[0m [36mstep[0m=[35m500[0m
[2m2025-05-18 18:41.44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/AWAC_20250518184132/model_500.d3[0m


Epoch 6/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.46[0m [[32m[1minfo     [0m] [1mAWAC_20250518184132: epoch=6 step=600[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.006793458461761475, 'time_algorithm_update': 0.010850026607513427, 'critic_loss': 189.88447113037108, 'actor_loss': 156322.57609375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.01767987251281738, 'environment': 7.475162177468855}[0m [36mstep[0m=[35m600[0m
[2m2025-05-18 18:41.46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/AWAC_20250518184132/model_600.d3[0m


Epoch 7/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.49[0m [[32m[1minfo     [0m] [1mAWAC_20250518184132: epoch=7 step=700[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.006766493320465088, 'time_algorithm_update': 0.010810012817382813, 'critic_loss': 189.8804180908203, 'actor_loss': 146079.95796875, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.017611665725708006, 'environment': 7.855438805429239}[0m [36mstep[0m=[35m700[0m
[2m2025-05-18 18:41.49[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/AWAC_20250518184132/model_700.d3[0m


Epoch 8/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.51[0m [[32m[1minfo     [0m] [1mAWAC_20250518184132: epoch=8 step=800[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0070615410804748535, 'time_algorithm_update': 0.01223095417022705, 'critic_loss': 192.99727493286133, 'actor_loss': 131467.315078125, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.01932996988296509, 'environment': 7.149823583520532}[0m [36mstep[0m=[35m800[0m
[2m2025-05-18 18:41.51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/AWAC_20250518184132/model_800.d3[0m


Epoch 9/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.53[0m [[32m[1minfo     [0m] [1mAWAC_20250518184132: epoch=9 step=900[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0069163799285888675, 'time_algorithm_update': 0.010992271900177002, 'critic_loss': 198.96470092773438, 'actor_loss': 123277.99640625, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.017943065166473388, 'environment': 7.258779228649415}[0m [36mstep[0m=[35m900[0m
[2m2025-05-18 18:41.53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/AWAC_20250518184132/model_900.d3[0m


Epoch 10/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.56[0m [[32m[1minfo     [0m] [1mAWAC_20250518184132: epoch=10 step=1000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.006869642734527588, 'time_algorithm_update': 0.011153554916381836, 'critic_loss': 211.60583488464354, 'actor_loss': 109912.906953125, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.018059916496276855, 'environment': 6.972070138864593}[0m [36mstep[0m=[35m1000[0m
[2m2025-05-18 18:41.56[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/relocate/AWAC_20250518184132/model_1000.d3[0m


(<d3rlpy.algos.qlearning.awac.AWAC at 0x17abe2260>,
 [(1,
   {'time_sample_batch': 0.006716058254241943,
    'time_algorithm_update': 0.010795061588287353,
    'critic_loss': 1205.6437582397461,
    'actor_loss': 514241.23078125,
    'temp': 0.0,
    'temp_loss': 0.0,
    'time_step': 0.017547597885131837,
    'environment': 6.49402404980904}),
  (2,
   {'time_sample_batch': 0.007811052799224854,
    'time_algorithm_update': 0.014103903770446777,
    'critic_loss': 277.2829281616211,
    'actor_loss': 218237.0928125,
    'temp': 0.0,
    'temp_loss': 0.0,
    'time_step': 0.02196101188659668,
    'environment': 7.615295358374543}),
  (3,
   {'time_sample_batch': 0.006952996253967285,
    'time_algorithm_update': 0.01138843536376953,
    'critic_loss': 213.81610900878906,
    'actor_loss': 199744.95875,
    'temp': 0.0,
    'temp_loss': 0.0,
    'time_step': 0.018379724025726317,
    'environment': 8.653344223096}),
  (4,
   {'time_sample_batch': 0.00679694414138794,
    'time_algorithm

### Hammer

In [11]:
train_offline_algorithm(d3rlpy.algos.IQLConfig, hammer_d3_dataset, hammer_env, 'hammer_iql', 'hammer')
train_offline_algorithm(d3rlpy.algos.CQLConfig, hammer_d3_dataset, hammer_env, 'hammer_cql', 'hammer')
train_offline_algorithm(d3rlpy.algos.BCConfig, hammer_d3_dataset, hammer_env, 'hammer_bc', 'hammer')
train_offline_algorithm(d3rlpy.algos.TD3PlusBCConfig, hammer_d3_dataset, hammer_env, 'hammer_td3bc', 'hammer')
train_offline_algorithm(d3rlpy.algos.AWACConfig, hammer_d3_dataset, hammer_env, 'hammer_awac', 'hammer')

[2m2025-05-18 18:41.56[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(46,)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(26,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_size=26)[0m
[2m2025-05-18 18:41.56[0m [[32m[1minfo     [0m] [1mDirectory is created at training_logs/offline/hammer/IQL_20250518184156[0m
[2m2025-05-18 18:41.56[0m [[32m[1minfo     [0m] [1mParameters                    [0m [36mparams[0m=[35m{'observation_shape': [46], 'action_size': 26, 'config': {'type': 'iql', 'params': {'batch_size': 256, 'gamma': 0.99, 'observation_scaler': {'type': 'none', 'params': {}}, 'action_scaler': {'type': 'none', 'params': {}}, 'reward_scaler': {'type': 'none', 'params': {}}, 'compile_graph': False, 'actor_learning_rate': 0.0003, 'critic_learning_rate': 0

Epoch 1/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.58[0m [[32m[1minfo     [0m] [1mIQL_20250518184156: epoch=1 step=100[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.007340149879455566, 'time_algorithm_update': 0.006869847774505615, 'critic_loss': 8870.816901855469, 'q_loss': 8870.816279296874, 'v_loss': 0.0007186723929771688, 'actor_loss': 14.819753148555755, 'time_step': 0.01425492286682129, 'environment': -235.58140954462374}[0m [36mstep[0m=[35m100[0m
[2m2025-05-18 18:41.58[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/IQL_20250518184156/model_100.d3[0m


Epoch 2/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:41.59[0m [[32m[1minfo     [0m] [1mIQL_20250518184156: epoch=2 step=200[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002027769088745117, 'time_algorithm_update': 0.007426340579986573, 'critic_loss': 2303.8857348632814, 'q_loss': 2303.7929418945314, 'v_loss': 0.092792750550434, 'actor_loss': 2.9383639964461326, 'time_step': 0.009489920139312744, 'environment': -231.18594692462443}[0m [36mstep[0m=[35m200[0m
[2m2025-05-18 18:41.59[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/IQL_20250518184156/model_200.d3[0m


Epoch 3/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:42.01[0m [[32m[1minfo     [0m] [1mIQL_20250518184156: epoch=3 step=300[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0023509502410888673, 'time_algorithm_update': 0.0067435050010681155, 'critic_loss': 1574.3130126953124, 'q_loss': 1573.0636181640625, 'v_loss': 1.2493990278244018, 'actor_loss': 4.446548113822937, 'time_step': 0.009135322570800781, 'environment': -237.6506591738213}[0m [36mstep[0m=[35m300[0m
[2m2025-05-18 18:42.01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/IQL_20250518184156/model_300.d3[0m


Epoch 4/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:42.03[0m [[32m[1minfo     [0m] [1mIQL_20250518184156: epoch=4 step=400[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0020450878143310545, 'time_algorithm_update': 0.005535020828247071, 'critic_loss': 1458.4191650390626, 'q_loss': 1452.4064416503907, 'v_loss': 6.012717597484588, 'actor_loss': -18.534941029548644, 'time_step': 0.007615485191345215, 'environment': -233.13350900961638}[0m [36mstep[0m=[35m400[0m
[2m2025-05-18 18:42.03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/IQL_20250518184156/model_400.d3[0m


Epoch 5/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:42.04[0m [[32m[1minfo     [0m] [1mIQL_20250518184156: epoch=5 step=500[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0017832636833190918, 'time_algorithm_update': 0.00545445442199707, 'critic_loss': 1479.4410900878906, 'q_loss': 1463.5567236328125, 'v_loss': 15.884367437362672, 'actor_loss': -41.27219743728638, 'time_step': 0.0072722315788269045, 'environment': -238.93037255101322}[0m [36mstep[0m=[35m500[0m
[2m2025-05-18 18:42.04[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/IQL_20250518184156/model_500.d3[0m


Epoch 6/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:42.06[0m [[32m[1minfo     [0m] [1mIQL_20250518184156: epoch=6 step=600[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0017226171493530274, 'time_algorithm_update': 0.005644280910491944, 'critic_loss': 1545.857410888672, 'q_loss': 1516.8419250488282, 'v_loss': 29.015484676361083, 'actor_loss': -48.42669871330261, 'time_step': 0.007400791645050048, 'environment': -236.1423926076409}[0m [36mstep[0m=[35m600[0m
[2m2025-05-18 18:42.06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/IQL_20250518184156/model_600.d3[0m


Epoch 7/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:42.07[0m [[32m[1minfo     [0m] [1mIQL_20250518184156: epoch=7 step=700[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0020512270927429198, 'time_algorithm_update': 0.0062761688232421875, 'critic_loss': 1664.9738793945312, 'q_loss': 1623.8841320800782, 'v_loss': 41.089743537902834, 'actor_loss': -55.73245849847794, 'time_step': 0.008366048336029053, 'environment': -235.95504697977134}[0m [36mstep[0m=[35m700[0m
[2m2025-05-18 18:42.07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/IQL_20250518184156/model_700.d3[0m


Epoch 8/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:42.09[0m [[32m[1minfo     [0m] [1mIQL_20250518184156: epoch=8 step=800[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0018960762023925782, 'time_algorithm_update': 0.005650711059570312, 'critic_loss': 1782.9395825195313, 'q_loss': 1728.6464123535156, 'v_loss': 54.29316608428955, 'actor_loss': -60.17516744613648, 'time_step': 0.007582554817199707, 'environment': -237.0686403226549}[0m [36mstep[0m=[35m800[0m
[2m2025-05-18 18:42.09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/IQL_20250518184156/model_800.d3[0m


Epoch 9/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:42.11[0m [[32m[1minfo     [0m] [1mIQL_20250518184156: epoch=9 step=900[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002263648509979248, 'time_algorithm_update': 0.007568843364715576, 'critic_loss': 1886.3015441894531, 'q_loss': 1824.5552087402343, 'v_loss': 61.74632694244385, 'actor_loss': -64.49040946960449, 'time_step': 0.009884183406829833, 'environment': 5756.116904724238}[0m [36mstep[0m=[35m900[0m
[2m2025-05-18 18:42.11[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/IQL_20250518184156/model_900.d3[0m


Epoch 10/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:42.12[0m [[32m[1minfo     [0m] [1mIQL_20250518184156: epoch=10 step=1000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030164623260498045, 'time_algorithm_update': 0.0074423480033874514, 'critic_loss': 2198.906766357422, 'q_loss': 2128.7493395996094, 'v_loss': 70.15743049621582, 'actor_loss': -65.66903497695922, 'time_step': 0.010505237579345704, 'environment': -239.07084315741076}[0m [36mstep[0m=[35m1000[0m
[2m2025-05-18 18:42.12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/IQL_20250518184156/model_1000.d3[0m
[2m2025-05-18 18:42.12[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(46,)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(26,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINU

Epoch 1/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:42.17[0m [[32m[1minfo     [0m] [1mCQL_20250518184213: epoch=1 step=100[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002293691635131836, 'time_algorithm_update': 0.032082533836364745, 'critic_loss': 9125.05328125, 'conservative_loss': 82.13334083557129, 'alpha': 1.0046847200393676, 'actor_loss': -30.029853382110595, 'temp': 0.9949755626916885, 'temp_loss': 42.43228736877441, 'time_step': 0.03442169189453125, 'environment': -232.1601985668022}[0m [36mstep[0m=[35m100[0m
[2m2025-05-18 18:42.17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/CQL_20250518184213/model_100.d3[0m


Epoch 2/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:42.21[0m [[32m[1minfo     [0m] [1mCQL_20250518184213: epoch=2 step=200[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002490551471710205, 'time_algorithm_update': 0.03384026288986206, 'critic_loss': 2304.9956994628906, 'conservative_loss': 133.80258533477783, 'alpha': 1.0138226068019867, 'actor_loss': -87.7770157623291, 'temp': 0.986468414068222, 'temp_loss': 26.815800857543945, 'time_step': 0.0363779091835022, 'environment': -234.39915405301485}[0m [36mstep[0m=[35m200[0m
[2m2025-05-18 18:42.21[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/CQL_20250518184213/model_200.d3[0m


Epoch 3/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:42.25[0m [[32m[1minfo     [0m] [1mCQL_20250518184213: epoch=3 step=300[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002681889533996582, 'time_algorithm_update': 0.03240859031677246, 'critic_loss': 1654.881749267578, 'conservative_loss': 177.8546371459961, 'alpha': 1.0282945656776428, 'actor_loss': -113.33016296386718, 'temp': 0.9792013126611709, 'temp_loss': 25.700348720550537, 'time_step': 0.035138335227966305, 'environment': -233.0905792940029}[0m [36mstep[0m=[35m300[0m
[2m2025-05-18 18:42.25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/CQL_20250518184213/model_300.d3[0m


Epoch 4/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:42.29[0m [[32m[1minfo     [0m] [1mCQL_20250518184213: epoch=4 step=400[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0022875571250915525, 'time_algorithm_update': 0.032380273342132566, 'critic_loss': 1491.1177111816405, 'conservative_loss': 136.44151763916017, 'alpha': 1.0400415360927582, 'actor_loss': -138.98232719421387, 'temp': 0.9712722432613373, 'temp_loss': 26.78101541519165, 'time_step': 0.034713923931121826, 'environment': -234.52831495584533}[0m [36mstep[0m=[35m400[0m
[2m2025-05-18 18:42.29[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/CQL_20250518184213/model_400.d3[0m


Epoch 5/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:42.34[0m [[32m[1minfo     [0m] [1mCQL_20250518184213: epoch=5 step=500[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0021505093574523924, 'time_algorithm_update': 0.03252678632736206, 'critic_loss': 1460.2485107421876, 'conservative_loss': 116.71095924377441, 'alpha': 1.0494622457027436, 'actor_loss': -168.55370162963868, 'temp': 0.9626404452323913, 'temp_loss': 27.991418495178223, 'time_step': 0.03472301006317138, 'environment': -234.1550051154532}[0m [36mstep[0m=[35m500[0m
[2m2025-05-18 18:42.34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/CQL_20250518184213/model_500.d3[0m


Epoch 6/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:42.38[0m [[32m[1minfo     [0m] [1mCQL_20250518184213: epoch=6 step=600[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0026473736763000487, 'time_algorithm_update': 0.03324616432189941, 'critic_loss': 1547.7325708007813, 'conservative_loss': 93.67047439575195, 'alpha': 1.0577930986881257, 'actor_loss': -198.32566940307618, 'temp': 0.9536146026849747, 'temp_loss': 28.561969871520997, 'time_step': 0.03594030141830444, 'environment': -232.7091224331265}[0m [36mstep[0m=[35m600[0m
[2m2025-05-18 18:42.38[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/CQL_20250518184213/model_600.d3[0m


Epoch 7/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:42.42[0m [[32m[1minfo     [0m] [1mCQL_20250518184213: epoch=7 step=700[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0029395747184753416, 'time_algorithm_update': 0.035488734245300295, 'critic_loss': 1616.6147399902343, 'conservative_loss': 72.26364387512207, 'alpha': 1.064750462770462, 'actor_loss': -228.56780288696288, 'temp': 0.9444825559854507, 'temp_loss': 28.47896213531494, 'time_step': 0.03848135471343994, 'environment': -233.89069765962185}[0m [36mstep[0m=[35m700[0m
[2m2025-05-18 18:42.42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/CQL_20250518184213/model_700.d3[0m


Epoch 8/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:42.47[0m [[32m[1minfo     [0m] [1mCQL_20250518184213: epoch=8 step=800[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0026868057250976564, 'time_algorithm_update': 0.033935713768005374, 'critic_loss': 1720.6819775390625, 'conservative_loss': 53.159128494262696, 'alpha': 1.070417332649231, 'actor_loss': -258.8551457214355, 'temp': 0.9354267692565919, 'temp_loss': 28.181531085968018, 'time_step': 0.03667044401168823, 'environment': -233.90962984171082}[0m [36mstep[0m=[35m800[0m
[2m2025-05-18 18:42.47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/CQL_20250518184213/model_800.d3[0m


Epoch 9/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:42.51[0m [[32m[1minfo     [0m] [1mCQL_20250518184213: epoch=9 step=900[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0034372830390930176, 'time_algorithm_update': 0.03603414058685303, 'critic_loss': 1831.4119470214844, 'conservative_loss': 39.55425924301147, 'alpha': 1.0749139237403869, 'actor_loss': -288.5359997558594, 'temp': 0.9265332287549972, 'temp_loss': 27.707593059539796, 'time_step': 0.03952239751815796, 'environment': -232.21619649918117}[0m [36mstep[0m=[35m900[0m
[2m2025-05-18 18:42.51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/CQL_20250518184213/model_900.d3[0m


Epoch 10/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:42.56[0m [[32m[1minfo     [0m] [1mCQL_20250518184213: epoch=10 step=1000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032607293128967285, 'time_algorithm_update': 0.03470083475112915, 'critic_loss': 2097.3392944335938, 'conservative_loss': 29.935892276763916, 'alpha': 1.0786337316036225, 'actor_loss': -319.0595178222656, 'temp': 0.9177661710977554, 'temp_loss': 27.303816947937012, 'time_step': 0.03801322937011719, 'environment': -232.74496006314544}[0m [36mstep[0m=[35m1000[0m
[2m2025-05-18 18:42.56[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/CQL_20250518184213/model_1000.d3[0m
[2m2025-05-18 18:42.56[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(46,)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(26,)]), reward_signature=Signature(dtyp

Epoch 1/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:42.57[0m [[32m[1minfo     [0m] [1mBC_20250518184256: epoch=1 step=100[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008708405494689942, 'time_algorithm_update': 0.00076005220413208, 'loss': 0.09566425271332264, 'time_step': 0.0016599440574645997, 'environment': 11668.739344212203}[0m [36mstep[0m=[35m100[0m
[2m2025-05-18 18:42.57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/BC_20250518184256/model_100.d3[0m


Epoch 2/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:42.58[0m [[32m[1minfo     [0m] [1mBC_20250518184256: epoch=2 step=200[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007487344741821289, 'time_algorithm_update': 0.0007145094871520996, 'loss': 0.07048346646130085, 'time_step': 0.0014836740493774413, 'environment': 86.25707263065658}[0m [36mstep[0m=[35m200[0m
[2m2025-05-18 18:42.58[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/BC_20250518184256/model_200.d3[0m


Epoch 3/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.00[0m [[32m[1minfo     [0m] [1mBC_20250518184256: epoch=3 step=300[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008048439025878907, 'time_algorithm_update': 0.0007476925849914551, 'loss': 0.0688965629786253, 'time_step': 0.0015772938728332519, 'environment': 12950.57721154857}[0m [36mstep[0m=[35m300[0m
[2m2025-05-18 18:43.00[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/BC_20250518184256/model_300.d3[0m


Epoch 4/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.01[0m [[32m[1minfo     [0m] [1mBC_20250518184256: epoch=4 step=400[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007477378845214843, 'time_algorithm_update': 0.0007854795455932617, 'loss': 0.0681410402804613, 'time_step': 0.0015590381622314453, 'environment': 9651.525649158319}[0m [36mstep[0m=[35m400[0m
[2m2025-05-18 18:43.01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/BC_20250518184256/model_400.d3[0m


Epoch 5/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.03[0m [[32m[1minfo     [0m] [1mBC_20250518184256: epoch=5 step=500[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0011109614372253418, 'time_algorithm_update': 0.0007563090324401856, 'loss': 0.06730738934129477, 'time_step': 0.0018930768966674805, 'environment': 14950.80778066882}[0m [36mstep[0m=[35m500[0m
[2m2025-05-18 18:43.03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/BC_20250518184256/model_500.d3[0m


Epoch 6/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.04[0m [[32m[1minfo     [0m] [1mBC_20250518184256: epoch=6 step=600[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008439993858337402, 'time_algorithm_update': 0.0006873559951782227, 'loss': 0.06760636355727911, 'time_step': 0.001552116870880127, 'environment': 12779.704988900086}[0m [36mstep[0m=[35m600[0m
[2m2025-05-18 18:43.04[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/BC_20250518184256/model_600.d3[0m


Epoch 7/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.05[0m [[32m[1minfo     [0m] [1mBC_20250518184256: epoch=7 step=700[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007951927185058594, 'time_algorithm_update': 0.0006720447540283204, 'loss': 0.067361809797585, 'time_step': 0.001486520767211914, 'environment': 11377.704648815841}[0m [36mstep[0m=[35m700[0m
[2m2025-05-18 18:43.05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/BC_20250518184256/model_700.d3[0m


Epoch 8/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.07[0m [[32m[1minfo     [0m] [1mBC_20250518184256: epoch=8 step=800[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007241201400756836, 'time_algorithm_update': 0.0006944465637207031, 'loss': 0.06723463319242001, 'time_step': 0.0014365673065185546, 'environment': 10332.245462128787}[0m [36mstep[0m=[35m800[0m
[2m2025-05-18 18:43.07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/BC_20250518184256/model_800.d3[0m


Epoch 9/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.08[0m [[32m[1minfo     [0m] [1mBC_20250518184256: epoch=9 step=900[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006878876686096191, 'time_algorithm_update': 0.0006684708595275879, 'loss': 0.06728637464344502, 'time_step': 0.0013726115226745605, 'environment': 16187.637103360114}[0m [36mstep[0m=[35m900[0m
[2m2025-05-18 18:43.08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/BC_20250518184256/model_900.d3[0m


Epoch 10/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.10[0m [[32m[1minfo     [0m] [1mBC_20250518184256: epoch=10 step=1000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0013362407684326173, 'time_algorithm_update': 0.000842599868774414, 'loss': 0.06674823272973299, 'time_step': 0.002208693027496338, 'environment': 9713.427330050568}[0m [36mstep[0m=[35m1000[0m
[2m2025-05-18 18:43.10[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/BC_20250518184256/model_1000.d3[0m
[2m2025-05-18 18:43.10[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(46,)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(26,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_size=26)[0m
[2m2025-05-18 18:43.10[0m [[32m[1minfo     [0m] [1mDirectory is crea

Epoch 1/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.11[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184310: epoch=1 step=100[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0022896552085876465, 'time_algorithm_update': 0.004323687553405762, 'critic_loss': 8797.519458007813, 'actor_loss': -2.2109726667404175, 'bc_loss': 0.2883394145965576, 'time_step': 0.006649911403656006, 'environment': -231.63757713907097}[0m [36mstep[0m=[35m100[0m
[2m2025-05-18 18:43.11[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/TD3PlusBC_20250518184310/model_100.d3[0m


Epoch 2/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.13[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184310: epoch=2 step=200[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0018815350532531737, 'time_algorithm_update': 0.004848318099975586, 'critic_loss': 2343.4916259765623, 'actor_loss': -2.0789885473251344, 'bc_loss': 0.42101143538951874, 'time_step': 0.006764905452728271, 'environment': -234.85152732997616}[0m [36mstep[0m=[35m200[0m
[2m2025-05-18 18:43.13[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/TD3PlusBC_20250518184310/model_200.d3[0m


Epoch 3/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.14[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184310: epoch=3 step=300[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0017869687080383301, 'time_algorithm_update': 0.004225404262542725, 'critic_loss': 1565.9541613769532, 'actor_loss': -2.0766539573669434, 'bc_loss': 0.42334603011608124, 'time_step': 0.006046781539916992, 'environment': -236.89183053097108}[0m [36mstep[0m=[35m300[0m
[2m2025-05-18 18:43.14[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/TD3PlusBC_20250518184310/model_300.d3[0m


Epoch 4/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.15[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184310: epoch=4 step=400[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0017496466636657715, 'time_algorithm_update': 0.0041688823699951175, 'critic_loss': 1394.1344085693358, 'actor_loss': -2.0864481592178343, 'bc_loss': 0.4135518163442612, 'time_step': 0.005951197147369385, 'environment': -236.66102803238527}[0m [36mstep[0m=[35m400[0m
[2m2025-05-18 18:43.15[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/TD3PlusBC_20250518184310/model_400.d3[0m


Epoch 5/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.16[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184310: epoch=5 step=500[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0017746424674987794, 'time_algorithm_update': 0.004111418724060059, 'critic_loss': 1259.0177124023437, 'actor_loss': -2.0987718296051026, 'bc_loss': 0.4012281775474548, 'time_step': 0.005915708541870117, 'environment': -238.3725793674308}[0m [36mstep[0m=[35m500[0m
[2m2025-05-18 18:43.16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/TD3PlusBC_20250518184310/model_500.d3[0m


Epoch 6/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.18[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184310: epoch=6 step=600[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0017820978164672851, 'time_algorithm_update': 0.004149134159088135, 'critic_loss': 1268.5381597900391, 'actor_loss': -2.104002637863159, 'bc_loss': 0.39599734485149385, 'time_step': 0.005964393615722656, 'environment': -235.4067818839224}[0m [36mstep[0m=[35m600[0m
[2m2025-05-18 18:43.18[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/TD3PlusBC_20250518184310/model_600.d3[0m


Epoch 7/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.19[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184310: epoch=7 step=700[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0017454099655151367, 'time_algorithm_update': 0.004172115325927734, 'critic_loss': 1286.9652990722657, 'actor_loss': -2.140511350631714, 'bc_loss': 0.35948860704898833, 'time_step': 0.005951457023620606, 'environment': -233.04148862576062}[0m [36mstep[0m=[35m700[0m
[2m2025-05-18 18:43.19[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/TD3PlusBC_20250518184310/model_700.d3[0m


Epoch 8/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.20[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184310: epoch=8 step=800[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0017797040939331054, 'time_algorithm_update': 0.004184608459472656, 'critic_loss': 1310.0464263916015, 'actor_loss': -2.1581817150115965, 'bc_loss': 0.3418182986974716, 'time_step': 0.005998623371124267, 'environment': -232.67011607293983}[0m [36mstep[0m=[35m800[0m
[2m2025-05-18 18:43.20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/TD3PlusBC_20250518184310/model_800.d3[0m


Epoch 9/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.22[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184310: epoch=9 step=900[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002561047077178955, 'time_algorithm_update': 0.007046089172363281, 'critic_loss': 1304.8894177246093, 'actor_loss': -2.165798487663269, 'bc_loss': 0.33420147001743317, 'time_step': 0.00966768741607666, 'environment': -234.85583533189697}[0m [36mstep[0m=[35m900[0m
[2m2025-05-18 18:43.22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/TD3PlusBC_20250518184310/model_900.d3[0m


Epoch 10/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.24[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184310: epoch=10 step=1000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030658769607543944, 'time_algorithm_update': 0.005095124244689941, 'critic_loss': 1438.590393676758, 'actor_loss': -2.1843226242065428, 'bc_loss': 0.315677375793457, 'time_step': 0.008202869892120362, 'environment': -237.48560266686476}[0m [36mstep[0m=[35m1000[0m
[2m2025-05-18 18:43.24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/TD3PlusBC_20250518184310/model_1000.d3[0m
[2m2025-05-18 18:43.24[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(46,)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(26,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_s

Epoch 1/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.26[0m [[32m[1minfo     [0m] [1mAWAC_20250518184324: epoch=1 step=100[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.007343995571136475, 'time_algorithm_update': 0.012756972312927247, 'critic_loss': 4242.282707519531, 'actor_loss': 471206.53640625, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.02014333963394165, 'environment': -236.434993416237}[0m [36mstep[0m=[35m100[0m
[2m2025-05-18 18:43.26[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/AWAC_20250518184324/model_100.d3[0m


Epoch 2/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.29[0m [[32m[1minfo     [0m] [1mAWAC_20250518184324: epoch=2 step=200[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.006957356929779053, 'time_algorithm_update': 0.01290010690689087, 'critic_loss': 1757.700421142578, 'actor_loss': 204663.432578125, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.019899809360504152, 'environment': -231.73257020335245}[0m [36mstep[0m=[35m200[0m
[2m2025-05-18 18:43.29[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/AWAC_20250518184324/model_200.d3[0m


Epoch 3/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.32[0m [[32m[1minfo     [0m] [1mAWAC_20250518184324: epoch=3 step=300[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00690997838973999, 'time_algorithm_update': 0.011356468200683595, 'critic_loss': 1308.7425939941406, 'actor_loss': 184735.8736328125, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.018306519985198975, 'environment': -231.67110109072217}[0m [36mstep[0m=[35m300[0m
[2m2025-05-18 18:43.32[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/AWAC_20250518184324/model_300.d3[0m


Epoch 4/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.34[0m [[32m[1minfo     [0m] [1mAWAC_20250518184324: epoch=4 step=400[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.006955242156982422, 'time_algorithm_update': 0.011447415351867676, 'critic_loss': 1162.1668927001954, 'actor_loss': 171305.5797265625, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.01844316005706787, 'environment': -232.51571763133606}[0m [36mstep[0m=[35m400[0m
[2m2025-05-18 18:43.34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/AWAC_20250518184324/model_400.d3[0m


Epoch 5/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.37[0m [[32m[1minfo     [0m] [1mAWAC_20250518184324: epoch=5 step=500[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.006974084377288818, 'time_algorithm_update': 0.011377532482147217, 'critic_loss': 1150.278472290039, 'actor_loss': 163063.701015625, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.018389639854431154, 'environment': -234.11571465956598}[0m [36mstep[0m=[35m500[0m
[2m2025-05-18 18:43.37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/AWAC_20250518184324/model_500.d3[0m


Epoch 6/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.39[0m [[32m[1minfo     [0m] [1mAWAC_20250518184324: epoch=6 step=600[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.007070722579956054, 'time_algorithm_update': 0.011941168308258057, 'critic_loss': 1129.9893383789063, 'actor_loss': 158372.541953125, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.019052650928497315, 'environment': -234.30619044375044}[0m [36mstep[0m=[35m600[0m
[2m2025-05-18 18:43.39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/AWAC_20250518184324/model_600.d3[0m


Epoch 7/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.42[0m [[32m[1minfo     [0m] [1mAWAC_20250518184324: epoch=7 step=700[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.007454190254211426, 'time_algorithm_update': 0.013004894256591798, 'critic_loss': 1145.2420825195313, 'actor_loss': 150688.7248046875, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.02050103187561035, 'environment': -234.5880645039672}[0m [36mstep[0m=[35m700[0m
[2m2025-05-18 18:43.42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/AWAC_20250518184324/model_700.d3[0m


Epoch 8/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.45[0m [[32m[1minfo     [0m] [1mAWAC_20250518184324: epoch=8 step=800[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.007223193645477295, 'time_algorithm_update': 0.012555429935455322, 'critic_loss': 1155.8998815917969, 'actor_loss': 149120.83234375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.019819087982177734, 'environment': -235.67131357208353}[0m [36mstep[0m=[35m800[0m
[2m2025-05-18 18:43.45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/AWAC_20250518184324/model_800.d3[0m


Epoch 9/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.47[0m [[32m[1minfo     [0m] [1mAWAC_20250518184324: epoch=9 step=900[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.007092351913452148, 'time_algorithm_update': 0.011800620555877685, 'critic_loss': 1184.016176147461, 'actor_loss': 138509.465, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.0189325213432312, 'environment': -232.4682727325093}[0m [36mstep[0m=[35m900[0m
[2m2025-05-18 18:43.47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/AWAC_20250518184324/model_900.d3[0m


Epoch 10/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.50[0m [[32m[1minfo     [0m] [1mAWAC_20250518184324: epoch=10 step=1000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.007115712165832519, 'time_algorithm_update': 0.011782417297363281, 'critic_loss': 1199.7043359375, 'actor_loss': 133668.7353515625, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.018937323093414307, 'environment': -233.18178041608434}[0m [36mstep[0m=[35m1000[0m
[2m2025-05-18 18:43.50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/hammer/AWAC_20250518184324/model_1000.d3[0m


(<d3rlpy.algos.qlearning.awac.AWAC at 0x17da76ef0>,
 [(1,
   {'time_sample_batch': 0.007343995571136475,
    'time_algorithm_update': 0.012756972312927247,
    'critic_loss': 4242.282707519531,
    'actor_loss': 471206.53640625,
    'temp': 0.0,
    'temp_loss': 0.0,
    'time_step': 0.02014333963394165,
    'environment': -236.434993416237}),
  (2,
   {'time_sample_batch': 0.006957356929779053,
    'time_algorithm_update': 0.01290010690689087,
    'critic_loss': 1757.700421142578,
    'actor_loss': 204663.432578125,
    'temp': 0.0,
    'temp_loss': 0.0,
    'time_step': 0.019899809360504152,
    'environment': -231.73257020335245}),
  (3,
   {'time_sample_batch': 0.00690997838973999,
    'time_algorithm_update': 0.011356468200683595,
    'critic_loss': 1308.7425939941406,
    'actor_loss': 184735.8736328125,
    'temp': 0.0,
    'temp_loss': 0.0,
    'time_step': 0.018306519985198975,
    'environment': -231.67110109072217}),
  (4,
   {'time_sample_batch': 0.006955242156982422,
    '

### Door

In [12]:
train_offline_algorithm(d3rlpy.algos.IQLConfig, door_d3_dataset, door_env, 'door_iql', 'door')
train_offline_algorithm(d3rlpy.algos.CQLConfig, door_d3_dataset, door_env, 'door_cql', 'door')
train_offline_algorithm(d3rlpy.algos.BCConfig, door_d3_dataset, door_env, 'door_bc', 'door')
train_offline_algorithm(d3rlpy.algos.TD3PlusBCConfig, door_d3_dataset, door_env, 'door_td3bc', 'door')
train_offline_algorithm(d3rlpy.algos.AWACConfig, door_d3_dataset, door_env, 'door_awac', 'door')

[2m2025-05-18 18:43.50[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(39,)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(28,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_size=28)[0m
[2m2025-05-18 18:43.50[0m [[32m[1minfo     [0m] [1mDirectory is created at training_logs/offline/door/IQL_20250518184350[0m
[2m2025-05-18 18:43.50[0m [[32m[1minfo     [0m] [1mParameters                    [0m [36mparams[0m=[35m{'observation_shape': [39], 'action_size': 28, 'config': {'type': 'iql', 'params': {'batch_size': 256, 'gamma': 0.99, 'observation_scaler': {'type': 'none', 'params': {}}, 'action_scaler': {'type': 'none', 'params': {}}, 'reward_scaler': {'type': 'none', 'params': {}}, 'compile_graph': False, 'actor_learning_rate': 0.0003, 'critic_learning_rate': 0.0

Epoch 1/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.52[0m [[32m[1minfo     [0m] [1mIQL_20250518184350: epoch=1 step=100[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.005966944694519043, 'time_algorithm_update': 0.0074886465072631836, 'critic_loss': 229.86341054916383, 'q_loss': 229.86245990753173, 'v_loss': 0.000949906738824211, 'actor_loss': 16.686412696838378, 'time_step': 0.013509025573730469, 'environment': -44.061035208679314}[0m [36mstep[0m=[35m100[0m
[2m2025-05-18 18:43.52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/IQL_20250518184350/model_100.d3[0m


Epoch 2/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.53[0m [[32m[1minfo     [0m] [1mIQL_20250518184350: epoch=2 step=200[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0021971487998962403, 'time_algorithm_update': 0.006391475200653076, 'critic_loss': 8.263968176841736, 'q_loss': 8.2446111536026, 'v_loss': 0.019357012070249768, 'actor_loss': 1.9356118315458297, 'time_step': 0.00862600564956665, 'environment': 591.8126201373532}[0m [36mstep[0m=[35m200[0m
[2m2025-05-18 18:43.53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/IQL_20250518184350/model_200.d3[0m


Epoch 3/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.55[0m [[32m[1minfo     [0m] [1mIQL_20250518184350: epoch=3 step=300[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0019348978996276856, 'time_algorithm_update': 0.005673329830169677, 'critic_loss': 6.741763215065003, 'q_loss': 6.659997892379761, 'v_loss': 0.08176531694829464, 'actor_loss': -0.3326988162100315, 'time_step': 0.0076441168785095215, 'environment': -44.64318588154474}[0m [36mstep[0m=[35m300[0m
[2m2025-05-18 18:43.55[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/IQL_20250518184350/model_300.d3[0m


Epoch 4/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.56[0m [[32m[1minfo     [0m] [1mIQL_20250518184350: epoch=4 step=400[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0018001747131347655, 'time_algorithm_update': 0.0053176069259643554, 'critic_loss': 8.958024480342864, 'q_loss': 8.80051113128662, 'v_loss': 0.1575133693218231, 'actor_loss': -3.977626560926437, 'time_step': 0.007153759002685547, 'environment': -43.4567602978796}[0m [36mstep[0m=[35m400[0m
[2m2025-05-18 18:43.56[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/IQL_20250518184350/model_400.d3[0m


Epoch 5/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.57[0m [[32m[1minfo     [0m] [1mIQL_20250518184350: epoch=5 step=500[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.001753265857696533, 'time_algorithm_update': 0.005341200828552246, 'critic_loss': 13.062900381088257, 'q_loss': 12.860840051174163, 'v_loss': 0.20206039935350417, 'actor_loss': -6.455472158789635, 'time_step': 0.007130510807037353, 'environment': 873.9883190374476}[0m [36mstep[0m=[35m500[0m
[2m2025-05-18 18:43.57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/IQL_20250518184350/model_500.d3[0m


Epoch 6/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:43.58[0m [[32m[1minfo     [0m] [1mIQL_20250518184350: epoch=6 step=600[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.001728365421295166, 'time_algorithm_update': 0.005331840515136719, 'critic_loss': 18.207115998268126, 'q_loss': 18.003454093933104, 'v_loss': 0.2036620283126831, 'actor_loss': -6.691529277563095, 'time_step': 0.007094964981079101, 'environment': -8.526524265744458}[0m [36mstep[0m=[35m600[0m
[2m2025-05-18 18:43.58[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/IQL_20250518184350/model_600.d3[0m


Epoch 7/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.00[0m [[32m[1minfo     [0m] [1mIQL_20250518184350: epoch=7 step=700[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0017296576499938965, 'time_algorithm_update': 0.005373728275299072, 'critic_loss': 26.299085783958436, 'q_loss': 26.10725525856018, 'v_loss': 0.191830598115921, 'actor_loss': -6.32877312362194, 'time_step': 0.007140941619873047, 'environment': 1094.7736741512044}[0m [36mstep[0m=[35m700[0m
[2m2025-05-18 18:44.00[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/IQL_20250518184350/model_700.d3[0m


Epoch 8/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.01[0m [[32m[1minfo     [0m] [1mIQL_20250518184350: epoch=8 step=800[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0019517302513122558, 'time_algorithm_update': 0.00583507776260376, 'critic_loss': 32.83241745471955, 'q_loss': 32.65400374412537, 'v_loss': 0.17841384083032608, 'actor_loss': -5.9414089584350585, 'time_step': 0.007823517322540283, 'environment': -44.66897150015717}[0m [36mstep[0m=[35m800[0m
[2m2025-05-18 18:44.01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/IQL_20250518184350/model_800.d3[0m


Epoch 9/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.03[0m [[32m[1minfo     [0m] [1mIQL_20250518184350: epoch=9 step=900[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00203965425491333, 'time_algorithm_update': 0.005786790847778321, 'critic_loss': 46.3619875049591, 'q_loss': 46.19646254062653, 'v_loss': 0.16552496753633023, 'actor_loss': -5.394454120993614, 'time_step': 0.007864561080932617, 'environment': -45.641051684193535}[0m [36mstep[0m=[35m900[0m
[2m2025-05-18 18:44.03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/IQL_20250518184350/model_900.d3[0m


Epoch 10/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.04[0m [[32m[1minfo     [0m] [1mIQL_20250518184350: epoch=10 step=1000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0018549799919128419, 'time_algorithm_update': 0.0054980325698852535, 'critic_loss': 52.09798045873642, 'q_loss': 51.936273937225344, 'v_loss': 0.16170640990138055, 'actor_loss': -5.195140135288239, 'time_step': 0.007389082908630371, 'environment': -43.74015172806379}[0m [36mstep[0m=[35m1000[0m
[2m2025-05-18 18:44.04[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/IQL_20250518184350/model_1000.d3[0m
[2m2025-05-18 18:44.04[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(39,)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(28,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINUO

Epoch 1/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.08[0m [[32m[1minfo     [0m] [1mCQL_20250518184404: epoch=1 step=100[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002247645854949951, 'time_algorithm_update': 0.03234184980392456, 'critic_loss': 325.7398119926453, 'conservative_loss': 72.67562147259713, 'alpha': 1.0044426155090331, 'actor_loss': -20.90406087875366, 'temp': 0.9949748021364212, 'temp_loss': 45.741810722351076, 'time_step': 0.034636447429656984, 'environment': -36.62784223288529}[0m [36mstep[0m=[35m100[0m
[2m2025-05-18 18:44.08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/CQL_20250518184404/model_100.d3[0m


Epoch 2/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.12[0m [[32m[1minfo     [0m] [1mCQL_20250518184404: epoch=2 step=200[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002428839206695557, 'time_algorithm_update': 0.03257480144500732, 'critic_loss': -7.361382732391357, 'conservative_loss': -24.382204257547855, 'alpha': 1.0064471137523652, 'actor_loss': -21.44818458557129, 'temp': 0.9857350808382034, 'temp_loss': 37.6757048034668, 'time_step': 0.03504979372024536, 'environment': -38.2870219529218}[0m [36mstep[0m=[35m200[0m
[2m2025-05-18 18:44.12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/CQL_20250518184404/model_200.d3[0m


Epoch 3/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.16[0m [[32m[1minfo     [0m] [1mCQL_20250518184404: epoch=3 step=300[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002251436710357666, 'time_algorithm_update': 0.03200758218765259, 'critic_loss': -59.364792594909666, 'conservative_loss': -78.81998405456542, 'alpha': 0.9988891887664795, 'actor_loss': -21.02653522491455, 'temp': 0.9773504489660263, 'temp_loss': 32.707902069091794, 'time_step': 0.03430468082427979, 'environment': -39.77223681708495}[0m [36mstep[0m=[35m300[0m
[2m2025-05-18 18:44.16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/CQL_20250518184404/model_300.d3[0m


Epoch 4/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.20[0m [[32m[1minfo     [0m] [1mCQL_20250518184404: epoch=4 step=400[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0023046469688415526, 'time_algorithm_update': 0.032593200206756594, 'critic_loss': -89.81487480163574, 'conservative_loss': -114.36195304870606, 'alpha': 0.9854982918500901, 'actor_loss': -23.87031665802002, 'temp': 0.9695961511135102, 'temp_loss': 28.34748336791992, 'time_step': 0.03494548797607422, 'environment': -42.98325164069102}[0m [36mstep[0m=[35m400[0m
[2m2025-05-18 18:44.20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/CQL_20250518184404/model_400.d3[0m


Epoch 5/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.24[0m [[32m[1minfo     [0m] [1mCQL_20250518184404: epoch=5 step=500[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002201688289642334, 'time_algorithm_update': 0.030918426513671875, 'critic_loss': -100.66740562438964, 'conservative_loss': -131.12098762512207, 'alpha': 0.9713993722200394, 'actor_loss': -29.076786193847656, 'temp': 0.9623590910434723, 'temp_loss': 25.282161273956298, 'time_step': 0.033166158199310306, 'environment': -43.44364371306078}[0m [36mstep[0m=[35m500[0m
[2m2025-05-18 18:44.24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/CQL_20250518184404/model_500.d3[0m


Epoch 6/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.28[0m [[32m[1minfo     [0m] [1mCQL_20250518184404: epoch=6 step=600[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0021161985397338866, 'time_algorithm_update': 0.031075139045715332, 'critic_loss': -95.36199981689452, 'conservative_loss': -137.20185485839843, 'alpha': 0.9581754124164581, 'actor_loss': -36.060603046417235, 'temp': 0.9553843969106675, 'temp_loss': 23.63057413101196, 'time_step': 0.03323653221130371, 'environment': -44.348558648449185}[0m [36mstep[0m=[35m600[0m
[2m2025-05-18 18:44.28[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/CQL_20250518184404/model_600.d3[0m


Epoch 7/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.31[0m [[32m[1minfo     [0m] [1mCQL_20250518184404: epoch=7 step=700[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0020430564880371095, 'time_algorithm_update': 0.030371832847595214, 'critic_loss': -85.30448463439942, 'conservative_loss': -139.92003372192383, 'alpha': 0.9459054762125015, 'actor_loss': -43.994288787841796, 'temp': 0.9484530586004257, 'temp_loss': 22.620334720611574, 'time_step': 0.032460322380065916, 'environment': -45.21763044517912}[0m [36mstep[0m=[35m700[0m
[2m2025-05-18 18:44.31[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/CQL_20250518184404/model_700.d3[0m


Epoch 8/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.35[0m [[32m[1minfo     [0m] [1mCQL_20250518184404: epoch=8 step=800[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002136561870574951, 'time_algorithm_update': 0.03131730079650879, 'critic_loss': -85.02288276672363, 'conservative_loss': -141.42916900634765, 'alpha': 0.9343471473455429, 'actor_loss': -52.30986682891846, 'temp': 0.9415025931596755, 'temp_loss': 21.887737026214598, 'time_step': 0.033499305248260496, 'environment': -44.11854779362582}[0m [36mstep[0m=[35m800[0m
[2m2025-05-18 18:44.35[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/CQL_20250518184404/model_800.d3[0m


Epoch 9/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.39[0m [[32m[1minfo     [0m] [1mCQL_20250518184404: epoch=9 step=900[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00241558313369751, 'time_algorithm_update': 0.0307480263710022, 'critic_loss': -63.294768829345706, 'conservative_loss': -140.9538624572754, 'alpha': 0.9233632773160935, 'actor_loss': -61.07099071502685, 'temp': 0.9344966638088227, 'temp_loss': 21.432473888397215, 'time_step': 0.033209869861602785, 'environment': 234.09002982961584}[0m [36mstep[0m=[35m900[0m
[2m2025-05-18 18:44.39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/CQL_20250518184404/model_900.d3[0m


Epoch 10/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.43[0m [[32m[1minfo     [0m] [1mCQL_20250518184404: epoch=10 step=1000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00242264986038208, 'time_algorithm_update': 0.031067256927490235, 'critic_loss': -43.55140731811523, 'conservative_loss': -140.8998210144043, 'alpha': 0.9128447908163071, 'actor_loss': -69.77470569610595, 'temp': 0.9274443542957306, 'temp_loss': 20.813357734680174, 'time_step': 0.03353680610656738, 'environment': -44.815990091574974}[0m [36mstep[0m=[35m1000[0m
[2m2025-05-18 18:44.43[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/CQL_20250518184404/model_1000.d3[0m
[2m2025-05-18 18:44.43[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(39,)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(28,)]), reward_signature=Signature(dtype=[

Epoch 1/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.44[0m [[32m[1minfo     [0m] [1mBC_20250518184443: epoch=1 step=100[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007015657424926758, 'time_algorithm_update': 0.0007634210586547852, 'loss': 0.0984037259966135, 'time_step': 0.0014883518218994141, 'environment': 2155.068057950307}[0m [36mstep[0m=[35m100[0m
[2m2025-05-18 18:44.44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/BC_20250518184443/model_100.d3[0m


Epoch 2/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.45[0m [[32m[1minfo     [0m] [1mBC_20250518184443: epoch=2 step=200[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006804156303405761, 'time_algorithm_update': 0.0007898116111755371, 'loss': 0.07141400754451752, 'time_step': 0.0014937210083007812, 'environment': 2830.2167029275956}[0m [36mstep[0m=[35m200[0m
[2m2025-05-18 18:44.45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/BC_20250518184443/model_200.d3[0m


Epoch 3/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.46[0m [[32m[1minfo     [0m] [1mBC_20250518184443: epoch=3 step=300[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006766486167907715, 'time_algorithm_update': 0.0008063220977783203, 'loss': 0.06970138803124427, 'time_step': 0.0015056228637695312, 'environment': 2952.001502060937}[0m [36mstep[0m=[35m300[0m
[2m2025-05-18 18:44.46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/BC_20250518184443/model_300.d3[0m


Epoch 4/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.46[0m [[32m[1minfo     [0m] [1mBC_20250518184443: epoch=4 step=400[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007039141654968261, 'time_algorithm_update': 0.0007623600959777832, 'loss': 0.06946521990001202, 'time_step': 0.001488363742828369, 'environment': 2521.006728399844}[0m [36mstep[0m=[35m400[0m
[2m2025-05-18 18:44.46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/BC_20250518184443/model_400.d3[0m


Epoch 5/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.47[0m [[32m[1minfo     [0m] [1mBC_20250518184443: epoch=5 step=500[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006727504730224609, 'time_algorithm_update': 0.0007739734649658204, 'loss': 0.06903620831668376, 'time_step': 0.0014690923690795898, 'environment': 2622.237206491485}[0m [36mstep[0m=[35m500[0m
[2m2025-05-18 18:44.47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/BC_20250518184443/model_500.d3[0m


Epoch 6/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.48[0m [[32m[1minfo     [0m] [1mBC_20250518184443: epoch=6 step=600[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006473660469055176, 'time_algorithm_update': 0.0007413673400878906, 'loss': 0.06940580368041992, 'time_step': 0.0014068913459777833, 'environment': 2507.3563171326323}[0m [36mstep[0m=[35m600[0m
[2m2025-05-18 18:44.48[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/BC_20250518184443/model_600.d3[0m


Epoch 7/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.49[0m [[32m[1minfo     [0m] [1mBC_20250518184443: epoch=7 step=700[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006449484825134278, 'time_algorithm_update': 0.0006506514549255372, 'loss': 0.06833173535764217, 'time_step': 0.0013124585151672364, 'environment': 2051.0029666246}[0m [36mstep[0m=[35m700[0m
[2m2025-05-18 18:44.49[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/BC_20250518184443/model_700.d3[0m


Epoch 8/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.49[0m [[32m[1minfo     [0m] [1mBC_20250518184443: epoch=8 step=800[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006446909904479981, 'time_algorithm_update': 0.0006837630271911621, 'loss': 0.06855830263346434, 'time_step': 0.0013457202911376952, 'environment': 2987.98370801745}[0m [36mstep[0m=[35m800[0m
[2m2025-05-18 18:44.49[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/BC_20250518184443/model_800.d3[0m


Epoch 9/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.50[0m [[32m[1minfo     [0m] [1mBC_20250518184443: epoch=9 step=900[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006399869918823243, 'time_algorithm_update': 0.0006704306602478027, 'loss': 0.06854621417820454, 'time_step': 0.001326282024383545, 'environment': 2782.8334191638246}[0m [36mstep[0m=[35m900[0m
[2m2025-05-18 18:44.50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/BC_20250518184443/model_900.d3[0m


Epoch 10/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.51[0m [[32m[1minfo     [0m] [1mBC_20250518184443: epoch=10 step=1000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006737852096557617, 'time_algorithm_update': 0.0007110452651977539, 'loss': 0.06856179200112819, 'time_step': 0.0014010000228881837, 'environment': 2568.2166423921713}[0m [36mstep[0m=[35m1000[0m
[2m2025-05-18 18:44.51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/BC_20250518184443/model_1000.d3[0m
[2m2025-05-18 18:44.51[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(39,)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(28,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_size=28)[0m
[2m2025-05-18 18:44.51[0m [[32m[1minfo     [0m] [1mDirectory is cre

Epoch 1/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.52[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184451: epoch=1 step=100[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.001702752113342285, 'time_algorithm_update': 0.0038928914070129393, 'critic_loss': 219.31231636047363, 'actor_loss': -2.1819397163391114, 'bc_loss': 0.3176113957166672, 'time_step': 0.005627646446228028, 'environment': -31.190970175505406}[0m [36mstep[0m=[35m100[0m
[2m2025-05-18 18:44.52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/TD3PlusBC_20250518184451/model_100.d3[0m


Epoch 2/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.53[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184451: epoch=2 step=200[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0018524646759033204, 'time_algorithm_update': 0.0050015711784362794, 'critic_loss': 8.285349259376526, 'actor_loss': -2.036397085189819, 'bc_loss': 0.46360290467739107, 'time_step': 0.006886637210845948, 'environment': -31.015612464175284}[0m [36mstep[0m=[35m200[0m
[2m2025-05-18 18:44.53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/TD3PlusBC_20250518184451/model_200.d3[0m


Epoch 3/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.54[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184451: epoch=3 step=300[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0017391753196716309, 'time_algorithm_update': 0.004313821792602539, 'critic_loss': 4.890757398605347, 'actor_loss': -2.0447305059432983, 'bc_loss': 0.45526946723461154, 'time_step': 0.006084170341491699, 'environment': -30.93806284362977}[0m [36mstep[0m=[35m300[0m
[2m2025-05-18 18:44.54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/TD3PlusBC_20250518184451/model_300.d3[0m


Epoch 4/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.55[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184451: epoch=4 step=400[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0016896319389343262, 'time_algorithm_update': 0.003796188831329346, 'critic_loss': 5.29280359506607, 'actor_loss': -2.069070453643799, 'bc_loss': 0.4309295165538788, 'time_step': 0.005516786575317383, 'environment': -30.80039388065357}[0m [36mstep[0m=[35m400[0m
[2m2025-05-18 18:44.55[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/TD3PlusBC_20250518184451/model_400.d3[0m


Epoch 5/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.56[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184451: epoch=5 step=500[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0017124700546264648, 'time_algorithm_update': 0.003774755001068115, 'critic_loss': 5.724323942661285, 'actor_loss': -2.1158546161651612, 'bc_loss': 0.38414536118507386, 'time_step': 0.005518853664398193, 'environment': -30.157242924076968}[0m [36mstep[0m=[35m500[0m
[2m2025-05-18 18:44.56[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/TD3PlusBC_20250518184451/model_500.d3[0m


Epoch 6/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.57[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184451: epoch=6 step=600[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.001717853546142578, 'time_algorithm_update': 0.0037784886360168455, 'critic_loss': 6.856464433670044, 'actor_loss': -2.2367027521133425, 'bc_loss': 0.2632972252368927, 'time_step': 0.005527760982513428, 'environment': -30.50573274154495}[0m [36mstep[0m=[35m600[0m
[2m2025-05-18 18:44.57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/TD3PlusBC_20250518184451/model_600.d3[0m


Epoch 7/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.58[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184451: epoch=7 step=700[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0016860032081604003, 'time_algorithm_update': 0.003747551441192627, 'critic_loss': 8.732102324962616, 'actor_loss': -2.2957974433898927, 'bc_loss': 0.20420254051685333, 'time_step': 0.0054651093482971196, 'environment': -40.347188110897854}[0m [36mstep[0m=[35m700[0m
[2m2025-05-18 18:44.58[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/TD3PlusBC_20250518184451/model_700.d3[0m


Epoch 8/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:44.59[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184451: epoch=8 step=800[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0016657209396362305, 'time_algorithm_update': 0.00367490291595459, 'critic_loss': 11.343036797046661, 'actor_loss': -2.3197416830062867, 'bc_loss': 0.18025830864906311, 'time_step': 0.005368485450744629, 'environment': -41.354564210970985}[0m [36mstep[0m=[35m800[0m
[2m2025-05-18 18:44.59[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/TD3PlusBC_20250518184451/model_800.d3[0m


Epoch 9/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:45.00[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184451: epoch=9 step=900[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0017127251625061034, 'time_algorithm_update': 0.0040260696411132816, 'critic_loss': 12.428644824028016, 'actor_loss': -2.3285902643203737, 'bc_loss': 0.1714097148180008, 'time_step': 0.005769219398498535, 'environment': -41.547081557526184}[0m [36mstep[0m=[35m900[0m
[2m2025-05-18 18:45.00[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/TD3PlusBC_20250518184451/model_900.d3[0m


Epoch 10/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:45.01[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20250518184451: epoch=10 step=1000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0019243288040161133, 'time_algorithm_update': 0.003742995262145996, 'critic_loss': 14.711155133247376, 'actor_loss': -2.339132852554321, 'bc_loss': 0.1608671650290489, 'time_step': 0.005698378086090088, 'environment': -41.353070564631196}[0m [36mstep[0m=[35m1000[0m
[2m2025-05-18 18:45.01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/TD3PlusBC_20250518184451/model_1000.d3[0m
[2m2025-05-18 18:45.01[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(39,)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(28,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_si

Epoch 1/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:45.04[0m [[32m[1minfo     [0m] [1mAWAC_20250518184501: epoch=1 step=100[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.007291326522827149, 'time_algorithm_update': 0.011093316078186035, 'critic_loss': 1337.3202682495116, 'actor_loss': 688807.849375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.01842357397079468, 'environment': -42.17264036625127}[0m [36mstep[0m=[35m100[0m
[2m2025-05-18 18:45.04[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/AWAC_20250518184501/model_100.d3[0m


Epoch 2/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:45.06[0m [[32m[1minfo     [0m] [1mAWAC_20250518184501: epoch=2 step=200[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00723168134689331, 'time_algorithm_update': 0.011776893138885499, 'critic_loss': 134.40468711853026, 'actor_loss': 368667.305, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.01904731035232544, 'environment': -43.53126984787606}[0m [36mstep[0m=[35m200[0m
[2m2025-05-18 18:45.06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/AWAC_20250518184501/model_200.d3[0m


Epoch 3/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:45.09[0m [[32m[1minfo     [0m] [1mAWAC_20250518184501: epoch=3 step=300[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.007211339473724365, 'time_algorithm_update': 0.011117961406707764, 'critic_loss': 100.6105941772461, 'actor_loss': 341866.265, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.018370838165283204, 'environment': -42.40779381954026}[0m [36mstep[0m=[35m300[0m
[2m2025-05-18 18:45.09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/AWAC_20250518184501/model_300.d3[0m


Epoch 4/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:45.11[0m [[32m[1minfo     [0m] [1mAWAC_20250518184501: epoch=4 step=400[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.007268142700195312, 'time_algorithm_update': 0.011364340782165527, 'critic_loss': 104.49254123687744, 'actor_loss': 325227.0159375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.018670458793640137, 'environment': -42.19854419526821}[0m [36mstep[0m=[35m400[0m
[2m2025-05-18 18:45.11[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/AWAC_20250518184501/model_400.d3[0m


Epoch 5/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:45.13[0m [[32m[1minfo     [0m] [1mAWAC_20250518184501: epoch=5 step=500[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0071405482292175295, 'time_algorithm_update': 0.010975489616394043, 'critic_loss': 120.34943016052246, 'actor_loss': 295196.31546875, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.01815361499786377, 'environment': -41.678876729250234}[0m [36mstep[0m=[35m500[0m
[2m2025-05-18 18:45.13[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/AWAC_20250518184501/model_500.d3[0m


Epoch 6/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:45.16[0m [[32m[1minfo     [0m] [1mAWAC_20250518184501: epoch=6 step=600[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.007406485080718994, 'time_algorithm_update': 0.012001998424530029, 'critic_loss': 136.18030532836914, 'actor_loss': 287119.81984375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.019450058937072755, 'environment': -41.643975099185894}[0m [36mstep[0m=[35m600[0m
[2m2025-05-18 18:45.16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/AWAC_20250518184501/model_600.d3[0m


Epoch 7/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:45.18[0m [[32m[1minfo     [0m] [1mAWAC_20250518184501: epoch=7 step=700[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.007288222312927246, 'time_algorithm_update': 0.01156418800354004, 'critic_loss': 148.21069126129152, 'actor_loss': 260145.12109375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.018892526626586914, 'environment': -38.694423427079656}[0m [36mstep[0m=[35m700[0m
[2m2025-05-18 18:45.18[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/AWAC_20250518184501/model_700.d3[0m


Epoch 8/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:45.21[0m [[32m[1minfo     [0m] [1mAWAC_20250518184501: epoch=8 step=800[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00739738941192627, 'time_algorithm_update': 0.011795551776885986, 'critic_loss': 167.8558322906494, 'actor_loss': 241087.1090625, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.01923227310180664, 'environment': -40.72886491738168}[0m [36mstep[0m=[35m800[0m
[2m2025-05-18 18:45.21[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/AWAC_20250518184501/model_800.d3[0m


Epoch 9/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:45.24[0m [[32m[1minfo     [0m] [1mAWAC_20250518184501: epoch=9 step=900[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0114914870262146, 'time_algorithm_update': 0.01214505434036255, 'critic_loss': 182.28158836364747, 'actor_loss': 225951.92875, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.02367981433868408, 'environment': -39.042494658037604}[0m [36mstep[0m=[35m900[0m
[2m2025-05-18 18:45.24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/AWAC_20250518184501/model_900.d3[0m


Epoch 10/10:   0%|          | 0/100 [00:00<?, ?it/s]

[2m2025-05-18 18:45.26[0m [[32m[1minfo     [0m] [1mAWAC_20250518184501: epoch=10 step=1000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.007562184333801269, 'time_algorithm_update': 0.010577442646026612, 'critic_loss': 206.5586543273926, 'actor_loss': 205129.44234375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.018180322647094727, 'environment': -39.69027961041189}[0m [36mstep[0m=[35m1000[0m
[2m2025-05-18 18:45.26[0m [[32m[1minfo     [0m] [1mModel parameters are saved to training_logs/offline/door/AWAC_20250518184501/model_1000.d3[0m


(<d3rlpy.algos.qlearning.awac.AWAC at 0x17dbc2140>,
 [(1,
   {'time_sample_batch': 0.007291326522827149,
    'time_algorithm_update': 0.011093316078186035,
    'critic_loss': 1337.3202682495116,
    'actor_loss': 688807.849375,
    'temp': 0.0,
    'temp_loss': 0.0,
    'time_step': 0.01842357397079468,
    'environment': -42.17264036625127}),
  (2,
   {'time_sample_batch': 0.00723168134689331,
    'time_algorithm_update': 0.011776893138885499,
    'critic_loss': 134.40468711853026,
    'actor_loss': 368667.305,
    'temp': 0.0,
    'temp_loss': 0.0,
    'time_step': 0.01904731035232544,
    'environment': -43.53126984787606}),
  (3,
   {'time_sample_batch': 0.007211339473724365,
    'time_algorithm_update': 0.011117961406707764,
    'critic_loss': 100.6105941772461,
    'actor_loss': 341866.265,
    'temp': 0.0,
    'temp_loss': 0.0,
    'time_step': 0.018370838165283204,
    'environment': -42.40779381954026}),
  (4,
   {'time_sample_batch': 0.007268142700195312,
    'time_algorithm_