In [1]:
# import library

import sys
import os
import d4rl
import gym
import numpy as np
import wandb
import collections
import pickle
import csv

import torch
import torch.nn as nn
import torch.nn.functional as F

from datetime import datetime
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

from utils import discount_cumsum, D4RLTrajectoryDataset, evaluate_on_env, get_d4rl_normalized_score
from model import MaskedCausalAttention, Block, DecisionTransformer


No module named 'flow'
No module named 'carla'


In [2]:
# set environment
# sys.path.append(r'C:\Develop\offlineRL-with-diffusion') 

In [3]:
# test mujoco, d4rl

!python ./test/mujoco_test.py

mujoco-py check passed
d4rl check passed


No module named 'flow'
No module named 'carla'
pybullet build time: Apr 30 2024 12:01:25
  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [4]:
# data download
# if you downloaded, don't re-start.

# !python ./data/download_d4rl_datasets.py


In [5]:
# parameter setting

env_name = 'halfcheetah'
dataset = 'medium'

if env_name == 'hopper':
    env = gym.make('Hopper-v3')
    max_ep_len = 1000
    # env_targets = [3600, 1800]  # evaluation conditioning targets
    scale = 1000.  # normalization for rewards/returns
elif env_name == 'halfcheetah':
    env = gym.make('HalfCheetah-v3')
    max_ep_len = 1000
    # env_targets = [12000, 6000]
    scale = 1000.
elif env_name == 'walker2d':
    env = gym.make('Walker2d-v3')
    max_ep_len = 1000
    # env_targets = [5000, 2500]
    scale = 1000.
    
DATA_PATH = f'data/{env_name}-{dataset}-v2.pkl'
LOG_PATH = "./log/"
DEVICE = 'cpu'

In [6]:
# data check
# check three trajectories

with open(DATA_PATH, 'rb') as f:
    trajectories = pickle.load(f)
n=0
max_rewards_list = []
for traj in trajectories:
    # print(f"{n+1}번째 trajectory")
    print("state: ", traj['observations'], "\n")
    # print("action: ", traj['actions'], "\n")
    print("next_state: ", traj['next_observations'], "\n")
    # print("reward: ", traj['rewards'], "\n")
    # print("max_rewards: ", max(traj['rewards']))
    # max_rewards_list.append(max(traj['rewards']))
    # print("")
    n+=1
    
# print(max(max_rewards_list))

    if n==3:
        break

state:  [[ 1.9831914e-02 -8.9501314e-02 -3.1969063e-03 ...  1.1365079e-01
   6.8424918e-02 -1.3811582e-01]
 [-3.8486063e-03 -5.2394319e-02  8.3050327e-03 ...  4.5068407e+00
  -9.2885571e+00  4.7328596e+00]
 [-5.5298433e-02 -7.7850236e-05 -2.3952831e-01 ... -7.0811687e+00
  -1.4037068e+00  7.5524049e+00]
 ...
 [-3.1975684e-01  5.3305399e-01 -4.8704177e-01 ...  1.5455554e+00
   2.6812897e+00  8.7905388e+00]
 [-3.2200974e-01  3.5745117e-01  1.0463273e-02 ... -6.3428599e-01
   1.6292539e+00  9.7356015e-01]
 [-3.0673215e-01  1.9843711e-01  6.9996923e-01 ...  5.0098950e-01
   1.5680059e+00  9.4733723e-02]] 

next_state:  [[-3.8486063e-03 -5.2394319e-02  8.3050327e-03 ...  4.5068407e+00
  -9.2885571e+00  4.7328596e+00]
 [-5.5298433e-02 -7.7850236e-05 -2.3952831e-01 ... -7.0811687e+00
  -1.4037068e+00  7.5524049e+00]
 [-1.2996776e-01  2.2959358e-03 -2.2985412e-01 ... -7.0144100e+00
   2.6917322e+00 -1.6729002e+00]
 ...
 [-3.2200974e-01  3.5745117e-01  1.0463273e-02 ... -6.3428599e-01
   1.6292

In [7]:
# train parameter
batch_size = 64
embed_dim = 128
activation = 'relu'
drop_out = 0.1
k = 20
n_blocks = 3    
n_heads = 1 # transformer head

# total updates = max_train_iters x num_updates_per_iter
max_train_iters = 200
num_updates_per_iter = 100
total_updates = 0
min_total_log_loss = 1e10

wt_decay = 1e-4             # weight decay
lr = 1e-4                   # learning rate
warmup_steps = 10000        # warmup steps for lr scheduler

# weight of mse loss
state_weight = 1
reward_weight = 1

# evaluation parameter
# max_eval_ep_len = 1000      # max len of one evaluation episode
# num_eval_ep = 10            # num of evaluation episodes per iteration

In [8]:
# load preprocessing(normalization, fit padding) data

traj_dataset = D4RLTrajectoryDataset(DATA_PATH, k)
traj_data_loader = DataLoader(traj_dataset,
						batch_size=batch_size,
						shuffle=True,
						pin_memory=True,
						drop_last=True)
                        
data_iter = iter(traj_data_loader)

## get state stats from dataset
state_mean, state_std = traj_dataset.get_state_stats()

In [9]:
# make environment

state_dim = env.observation_space.shape[0]
act_dim = env.action_space.shape[0]

print("state dim: ", state_dim)
print("action dim: ", act_dim)

state dim:  17
action dim:  6


In [10]:
# test data
temp_dataset = D4RLTrajectoryDataset(DATA_PATH, 2)
temp_data_loader = DataLoader(temp_dataset,
						batch_size=4,
						shuffle=True,
						pin_memory=True,
						drop_last=True)
                        
temp_data_iter = iter(temp_data_loader)

timesteps, states, next_states, actions, rewards, traj_mask = next(temp_data_iter)

timesteps = timesteps.to(DEVICE)	# B x T
states = states.to(DEVICE)			# B x T x state_dim
next_states = next_states.to(DEVICE) # B X T X state_dim
actions = actions.to(DEVICE)		# B x T x act_dim
rewards = rewards.to(DEVICE).unsqueeze(dim=-1) # B x T x 1

print("timesteps shape: ", timesteps.shape)
print("rewards shape: ", rewards.shape)
print("states shape: ", states.shape)
print("actions shape: ", actions.shape)

# print("state: ", states)
# print("action: ", actions)
# print("rewards: ", rewards)



timesteps shape:  torch.Size([4, 2])
rewards shape:  torch.Size([4, 2, 1])
states shape:  torch.Size([4, 2, 17])
actions shape:  torch.Size([4, 2, 6])


In [11]:
# test model

temp_model = DecisionTransformer(
			state_dim=state_dim,
			act_dim=act_dim,
			# reward 포함 + r0 제외
			n_blocks=n_blocks,
			h_dim=16,
			context_len=2,
			n_heads=n_heads,
			drop_p=drop_out,
		).to(DEVICE)
		
next_state_preds, rewards_preds = temp_model.forward(
												rewards=rewards,
												timesteps=timesteps,
												states=states,
												actions=actions,
											)

In [12]:
# define model

model = DecisionTransformer(
			state_dim=state_dim,
			act_dim=act_dim,
			n_blocks=n_blocks,
			h_dim=embed_dim,
			context_len=k,
			n_heads=n_heads,
			drop_p=drop_out,
		).to(DEVICE)
  
optimizer = torch.optim.AdamW(
					model.parameters(), 
					lr=lr, 
					weight_decay=wt_decay
				)

scheduler = torch.optim.lr_scheduler.LambdaLR(
		optimizer,
		lambda steps: min((steps+1)/warmup_steps, 1)
	)
	


In [13]:
start_time = datetime.now().replace(microsecond=0)

start_time_str = start_time.strftime("%y-%m-%d-%H-%M-%S")

prefix = "dt_" + env_name

save_model_name =  prefix + "_model_" + start_time_str + ".pt"
save_model_path = os.path.join(LOG_PATH, save_model_name)
save_best_model_path = save_model_path[:-3] + "_best.pt"

log_csv_name = prefix + "_log_" + start_time_str + ".csv"
log_csv_path = os.path.join(LOG_PATH, log_csv_name)


csv_writer = csv.writer(open(log_csv_path, 'a', 1))
csv_header = (["duration", "num_updates", "total_loss", "state_loss", "reward_loss"])

csv_writer.writerow(csv_header)


print("=" * 60)
print("start time: " + start_time_str)
print("=" * 60)

print("device set to: " + str(DEVICE))
print("dataset path: " + DATA_PATH)
print("model save path: " + save_model_path)
print("log csv save path: " + log_csv_path)

# train
for i_train_iter in tqdm(range(max_train_iters)):


	log_state_losses, log_reward_losses, log_total_losses = [], [], []
	model.train()
 
	for _ in range(num_updates_per_iter):
		try:
			timesteps, states, next_states, actions, rewards, traj_mask = next(data_iter)
		except StopIteration:
			data_iter = iter(traj_data_loader)
			timesteps, states, next_states, actions, rewards, traj_mask = next(data_iter)

		timesteps = timesteps.to(DEVICE)	# B x T
		states = states.to(DEVICE)			# B x T x state_dim
		next_states = next_states.to(DEVICE) # B X T X state_dim
		actions = actions.to(DEVICE)		# B x T x act_dim
		rewards = rewards.to(DEVICE).unsqueeze(dim=-1) # B x T x 1
		traj_mask = traj_mask.to(DEVICE)	# B x T

		next_states_target = torch.clone(next_states).detach().to(DEVICE)
		rewards_target = torch.clone(rewards).detach().to(DEVICE)
	
		next_state_preds, rewards_preds = model.forward(
														timesteps=timesteps,
														states=states,
														actions=actions,
														rewards=rewards,
													)

		# only consider non padded elements
		next_state_preds = next_state_preds.view(-1, state_dim)[traj_mask.view(-1,) > 0]
		next_states_target = next_states_target.view(-1, state_dim)[traj_mask.view(-1,) > 0]
		
		rewards_preds = rewards_preds.view(-1, 1)[traj_mask.view(-1,) > 0]
		rewards_target = rewards_target.view(-1, 1)[traj_mask.view(-1,) > 0]

		state_loss = F.mse_loss(next_state_preds, next_states_target, reduction='mean') * state_weight
		reward_loss = F.mse_loss(rewards_preds, rewards_target, reduction='mean') * reward_weight
		
		total_loss = state_loss.add(reward_loss)
		total_loss = torch.mean(total_loss)

		optimizer.zero_grad()
		total_loss.backward()
		torch.nn.utils.clip_grad_norm_(model.parameters(), 0.25)
		optimizer.step()
		scheduler.step()
		
		
		#save loss
		log_state_losses.append(state_loss.detach().cpu().item())
		log_reward_losses.append(reward_loss.detach().cpu().item())
		
		log_total_losses.append(total_loss.detach().cpu().item())
		
	mean_total_log_loss = np.mean(log_total_losses)
	mean_state_log_loss = np.mean(log_state_losses)
	mean_reward_log_loss = np.mean(log_reward_losses)
	
	time_elapsed = str(datetime.now().replace(microsecond=0) - start_time)

	total_updates += num_updates_per_iter

	log_str = ("=" * 60 + '\n' +
			"time elapsed: " + time_elapsed  + '\n' +
			"num of updates: " + str(total_updates) + '\n' +
			"total loss: " + format(mean_total_log_loss, ".5f") + '\n' +
			"state loss: " + format(mean_state_log_loss, ".5f") + '\n' +
			"reward loss: " +  format(mean_reward_log_loss, ".5f")
			)

	print(log_str)

	log_data = [time_elapsed, total_updates, mean_total_log_loss, mean_state_log_loss, mean_reward_log_loss]

	csv_writer.writerow(log_data)
	
	# save model
	if mean_total_log_loss <= min_total_log_loss:
		print("saving min loss model at: " + save_best_model_path)
		torch.save(model.state_dict(), save_best_model_path)
		min_total_log_loss = mean_total_log_loss

	print("saving current model at: " + save_model_path)
	torch.save(model.state_dict(), save_model_path)


print("=" * 60)
print("finished training!")
print("=" * 60)
end_time = datetime.now().replace(microsecond=0)
time_elapsed = str(end_time - start_time)
end_time_str = end_time.strftime("%y-%m-%d-%H-%M-%S")
print("started training at: " + start_time_str)
print("finished training at: " + end_time_str)
print("total training time: " + time_elapsed)
print("saved min loss model at: " + save_best_model_path)
print("saved last updated model at: " + save_model_path)
print("=" * 60)

csv_writer.close()

start time: 24-05-07-21-53-44
device set to: cpu
dataset path: data/halfcheetah-medium-v2.pkl
model save path: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt
log csv save path: ./log/dt_halfcheetah_log_24-05-07-21-53-44.csv


  0%|          | 1/200 [00:17<57:47, 17.42s/it]

time elapsed: 0:00:18
num of updates: 100
total loss: 22.93949
state loss: 1.30623
reward loss: 21.63327
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


  1%|          | 2/200 [00:34<56:52, 17.24s/it]

time elapsed: 0:00:35
num of updates: 200
total loss: 21.08028
state loss: 1.29244
reward loss: 19.78784
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


  2%|▏         | 3/200 [00:51<56:05, 17.08s/it]

time elapsed: 0:00:52
num of updates: 300
total loss: 17.56215
state loss: 1.25965
reward loss: 16.30250
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


  2%|▏         | 4/200 [01:08<55:50, 17.10s/it]

time elapsed: 0:01:09
num of updates: 400
total loss: 12.84632
state loss: 1.20227
reward loss: 11.64405
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


  2%|▎         | 5/200 [01:25<55:24, 17.05s/it]

time elapsed: 0:01:26
num of updates: 500
total loss: 7.18645
state loss: 1.14890
reward loss: 6.03756
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


  3%|▎         | 6/200 [01:42<55:03, 17.03s/it]

time elapsed: 0:01:43
num of updates: 600
total loss: 3.20127
state loss: 1.07021
reward loss: 2.13106
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


  4%|▎         | 7/200 [01:59<54:49, 17.04s/it]

time elapsed: 0:02:00
num of updates: 700
total loss: 2.17117
state loss: 1.01839
reward loss: 1.15278
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


  4%|▍         | 8/200 [02:17<55:01, 17.20s/it]

time elapsed: 0:02:17
num of updates: 800
total loss: 1.99310
state loss: 1.00130
reward loss: 0.99179
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


  4%|▍         | 9/200 [02:35<55:49, 17.53s/it]

time elapsed: 0:02:35
num of updates: 900
total loss: 1.89339
state loss: 1.01690
reward loss: 0.87650
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


  5%|▌         | 10/200 [02:53<56:29, 17.84s/it]

time elapsed: 0:02:54
num of updates: 1000
total loss: 1.71432
state loss: 0.99211
reward loss: 0.72221
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


  6%|▌         | 11/200 [03:12<57:05, 18.12s/it]

time elapsed: 0:03:13
num of updates: 1100
total loss: 1.64277
state loss: 0.99392
reward loss: 0.64885
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


  6%|▌         | 12/200 [03:31<57:44, 18.43s/it]

time elapsed: 0:03:32
num of updates: 1200
total loss: 1.54464
state loss: 0.97948
reward loss: 0.56516
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


  6%|▋         | 13/200 [03:51<58:35, 18.80s/it]

time elapsed: 0:03:52
num of updates: 1300
total loss: 1.47436
state loss: 0.96627
reward loss: 0.50809
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


  7%|▋         | 14/200 [04:11<59:31, 19.20s/it]

time elapsed: 0:04:12
num of updates: 1400
total loss: 1.42622
state loss: 0.96350
reward loss: 0.46271
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


  8%|▊         | 15/200 [04:33<1:01:30, 19.95s/it]

time elapsed: 0:04:33
num of updates: 1500
total loss: 1.37049
state loss: 0.95286
reward loss: 0.41763
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


  8%|▊         | 16/200 [04:54<1:02:44, 20.46s/it]

time elapsed: 0:04:55
num of updates: 1600
total loss: 1.32364
state loss: 0.94567
reward loss: 0.37798
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


  8%|▊         | 17/200 [05:14<1:01:26, 20.15s/it]

time elapsed: 0:05:14
num of updates: 1700
total loss: 1.29396
state loss: 0.94639
reward loss: 0.34757
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


  9%|▉         | 18/200 [05:33<1:00:22, 19.91s/it]

time elapsed: 0:05:34
num of updates: 1800
total loss: 1.26757
state loss: 0.94108
reward loss: 0.32649
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 10%|▉         | 19/200 [05:53<59:51, 19.84s/it]  

time elapsed: 0:05:53
num of updates: 1900
total loss: 1.23139
state loss: 0.92773
reward loss: 0.30366
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 10%|█         | 20/200 [06:13<59:48, 19.94s/it]

time elapsed: 0:06:14
num of updates: 2000
total loss: 1.20260
state loss: 0.91819
reward loss: 0.28441
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 10%|█         | 21/200 [06:33<59:44, 20.03s/it]

time elapsed: 0:06:34
num of updates: 2100
total loss: 1.17554
state loss: 0.90798
reward loss: 0.26756
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 11%|█         | 22/200 [06:53<59:12, 19.96s/it]

time elapsed: 0:06:54
num of updates: 2200
total loss: 1.14629
state loss: 0.89707
reward loss: 0.24922
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 12%|█▏        | 23/200 [07:14<59:21, 20.12s/it]

time elapsed: 0:07:14
num of updates: 2300
total loss: 1.11634
state loss: 0.88001
reward loss: 0.23633
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 12%|█▏        | 24/200 [07:42<1:06:32, 22.69s/it]

time elapsed: 0:07:43
num of updates: 2400
total loss: 1.07553
state loss: 0.85566
reward loss: 0.21987
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 12%|█▎        | 25/200 [08:02<1:03:43, 21.85s/it]

time elapsed: 0:08:03
num of updates: 2500
total loss: 1.04702
state loss: 0.83786
reward loss: 0.20916
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 13%|█▎        | 26/200 [08:22<1:01:32, 21.22s/it]

time elapsed: 0:08:22
num of updates: 2600
total loss: 1.00625
state loss: 0.80969
reward loss: 0.19655
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 14%|█▎        | 27/200 [08:42<1:00:22, 20.94s/it]

time elapsed: 0:08:43
num of updates: 2700
total loss: 0.96680
state loss: 0.78229
reward loss: 0.18451
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 14%|█▍        | 28/200 [09:02<58:56, 20.56s/it]  

time elapsed: 0:09:02
num of updates: 2800
total loss: 0.93051
state loss: 0.75398
reward loss: 0.17653
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 14%|█▍        | 29/200 [09:21<57:08, 20.05s/it]

time elapsed: 0:09:21
num of updates: 2900
total loss: 0.89919
state loss: 0.72990
reward loss: 0.16929
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 15%|█▌        | 30/200 [09:40<56:23, 19.90s/it]

time elapsed: 0:09:41
num of updates: 3000
total loss: 0.85545
state loss: 0.69252
reward loss: 0.16294
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 16%|█▌        | 31/200 [10:00<56:10, 19.95s/it]

time elapsed: 0:10:01
num of updates: 3100
total loss: 0.81152
state loss: 0.65896
reward loss: 0.15255
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 16%|█▌        | 32/200 [10:20<55:57, 19.99s/it]

time elapsed: 0:10:21
num of updates: 3200
total loss: 0.76762
state loss: 0.62306
reward loss: 0.14456
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 16%|█▋        | 33/200 [10:41<55:44, 20.03s/it]

time elapsed: 0:10:41
num of updates: 3300
total loss: 0.73797
state loss: 0.59946
reward loss: 0.13851
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 17%|█▋        | 34/200 [10:59<54:28, 19.69s/it]

time elapsed: 0:11:00
num of updates: 3400
total loss: 0.70175
state loss: 0.57075
reward loss: 0.13099
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 18%|█▊        | 35/200 [11:18<53:00, 19.28s/it]

time elapsed: 0:11:18
num of updates: 3500
total loss: 0.67986
state loss: 0.55326
reward loss: 0.12660
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 18%|█▊        | 36/200 [11:37<52:22, 19.16s/it]

time elapsed: 0:11:37
num of updates: 3600
total loss: 0.64345
state loss: 0.52375
reward loss: 0.11970
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 18%|█▊        | 37/200 [11:56<52:05, 19.17s/it]

time elapsed: 0:11:56
num of updates: 3700
total loss: 0.62008
state loss: 0.50529
reward loss: 0.11479
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 19%|█▉        | 38/200 [12:16<52:30, 19.45s/it]

time elapsed: 0:12:16
num of updates: 3800
total loss: 0.59307
state loss: 0.48436
reward loss: 0.10871
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 20%|█▉        | 39/200 [12:36<52:50, 19.69s/it]

time elapsed: 0:12:37
num of updates: 3900
total loss: 0.57168
state loss: 0.46796
reward loss: 0.10372
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 20%|██        | 40/200 [12:57<53:04, 19.90s/it]

time elapsed: 0:12:57
num of updates: 4000
total loss: 0.54942
state loss: 0.44920
reward loss: 0.10022
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 20%|██        | 41/200 [13:16<52:39, 19.87s/it]

time elapsed: 0:13:17
num of updates: 4100
total loss: 0.52550
state loss: 0.42942
reward loss: 0.09608
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 21%|██        | 42/200 [13:36<52:14, 19.84s/it]

time elapsed: 0:13:37
num of updates: 4200
total loss: 0.49975
state loss: 0.40772
reward loss: 0.09204
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 22%|██▏       | 43/200 [13:56<52:11, 19.94s/it]

time elapsed: 0:13:57
num of updates: 4300
total loss: 0.48200
state loss: 0.39379
reward loss: 0.08822
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 22%|██▏       | 44/200 [14:15<51:09, 19.68s/it]

time elapsed: 0:14:16
num of updates: 4400
total loss: 0.46273
state loss: 0.37773
reward loss: 0.08500
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 22%|██▎       | 45/200 [14:34<50:23, 19.51s/it]

time elapsed: 0:14:35
num of updates: 4500
total loss: 0.44555
state loss: 0.36285
reward loss: 0.08271
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 23%|██▎       | 46/200 [14:54<50:19, 19.61s/it]

time elapsed: 0:14:55
num of updates: 4600
total loss: 0.42641
state loss: 0.34740
reward loss: 0.07901
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 24%|██▎       | 47/200 [15:16<51:16, 20.11s/it]

time elapsed: 0:15:16
num of updates: 4700
total loss: 0.41468
state loss: 0.33694
reward loss: 0.07774
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 24%|██▍       | 48/200 [15:37<52:04, 20.56s/it]

time elapsed: 0:15:38
num of updates: 4800
total loss: 0.39408
state loss: 0.32118
reward loss: 0.07290
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 24%|██▍       | 49/200 [15:57<50:57, 20.25s/it]

time elapsed: 0:15:57
num of updates: 4900
total loss: 0.37933
state loss: 0.30825
reward loss: 0.07108
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 25%|██▌       | 50/200 [16:15<49:29, 19.80s/it]

time elapsed: 0:16:16
num of updates: 5000
total loss: 0.36361
state loss: 0.29584
reward loss: 0.06777
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 26%|██▌       | 51/200 [16:35<48:35, 19.57s/it]

time elapsed: 0:16:35
num of updates: 5100
total loss: 0.34618
state loss: 0.28125
reward loss: 0.06493
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 26%|██▌       | 52/200 [16:54<48:05, 19.50s/it]

time elapsed: 0:16:54
num of updates: 5200
total loss: 0.34006
state loss: 0.27615
reward loss: 0.06391
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 26%|██▋       | 53/200 [17:14<48:23, 19.75s/it]

time elapsed: 0:17:15
num of updates: 5300
total loss: 0.32813
state loss: 0.26699
reward loss: 0.06115
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 27%|██▋       | 54/200 [17:35<48:34, 19.96s/it]

time elapsed: 0:17:35
num of updates: 5400
total loss: 0.31834
state loss: 0.25819
reward loss: 0.06015
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 28%|██▊       | 55/200 [17:54<47:35, 19.70s/it]

time elapsed: 0:17:54
num of updates: 5500
total loss: 0.31045
state loss: 0.25106
reward loss: 0.05939
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 28%|██▊       | 56/200 [18:13<46:57, 19.57s/it]

time elapsed: 0:18:14
num of updates: 5600
total loss: 0.29641
state loss: 0.24019
reward loss: 0.05622
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 28%|██▊       | 57/200 [18:33<46:51, 19.66s/it]

time elapsed: 0:18:33
num of updates: 5700
total loss: 0.28976
state loss: 0.23405
reward loss: 0.05572
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 29%|██▉       | 58/200 [18:53<46:36, 19.69s/it]

time elapsed: 0:18:53
num of updates: 5800
total loss: 0.28023
state loss: 0.22640
reward loss: 0.05383
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 30%|██▉       | 59/200 [19:12<46:14, 19.68s/it]

time elapsed: 0:19:13
num of updates: 5900
total loss: 0.27509
state loss: 0.22198
reward loss: 0.05311
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 30%|███       | 60/200 [19:32<45:54, 19.68s/it]

time elapsed: 0:19:33
num of updates: 6000
total loss: 0.26418
state loss: 0.21306
reward loss: 0.05112
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 30%|███       | 61/200 [19:52<45:55, 19.82s/it]

time elapsed: 0:19:53
num of updates: 6100
total loss: 0.26004
state loss: 0.20992
reward loss: 0.05013
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 31%|███       | 62/200 [20:12<45:21, 19.72s/it]

time elapsed: 0:20:12
num of updates: 6200
total loss: 0.25486
state loss: 0.20454
reward loss: 0.05032
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 32%|███▏      | 63/200 [20:30<44:17, 19.40s/it]

time elapsed: 0:20:31
num of updates: 6300
total loss: 0.24502
state loss: 0.19674
reward loss: 0.04828
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 32%|███▏      | 64/200 [20:49<43:44, 19.30s/it]

time elapsed: 0:20:50
num of updates: 6400
total loss: 0.24362
state loss: 0.19585
reward loss: 0.04777
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 32%|███▎      | 65/200 [21:09<43:29, 19.33s/it]

time elapsed: 0:21:09
num of updates: 6500
total loss: 0.23685
state loss: 0.18978
reward loss: 0.04707
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 33%|███▎      | 66/200 [21:29<43:57, 19.69s/it]

time elapsed: 0:21:30
num of updates: 6600
total loss: 0.23099
state loss: 0.18584
reward loss: 0.04515
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 34%|███▎      | 67/200 [21:49<43:58, 19.84s/it]

time elapsed: 0:21:50
num of updates: 6700
total loss: 0.22327
state loss: 0.17957
reward loss: 0.04371
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 34%|███▍      | 68/200 [22:09<43:46, 19.90s/it]

time elapsed: 0:22:10
num of updates: 6800
total loss: 0.22088
state loss: 0.17731
reward loss: 0.04358
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 34%|███▍      | 69/200 [22:29<42:52, 19.64s/it]

time elapsed: 0:22:29
num of updates: 6900
total loss: 0.21610
state loss: 0.17351
reward loss: 0.04259
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 35%|███▌      | 70/200 [22:48<42:19, 19.53s/it]

time elapsed: 0:22:48
num of updates: 7000
total loss: 0.21501
state loss: 0.17228
reward loss: 0.04273
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 36%|███▌      | 71/200 [23:17<48:25, 22.52s/it]

time elapsed: 0:23:18
num of updates: 7100
total loss: 0.20940
state loss: 0.16813
reward loss: 0.04126
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 36%|███▌      | 72/200 [23:39<47:37, 22.32s/it]

time elapsed: 0:23:40
num of updates: 7200
total loss: 0.20226
state loss: 0.16225
reward loss: 0.04001
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 36%|███▋      | 73/200 [23:59<45:31, 21.51s/it]

time elapsed: 0:23:59
num of updates: 7300
total loss: 0.19900
state loss: 0.15922
reward loss: 0.03978
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 37%|███▋      | 74/200 [24:18<43:39, 20.79s/it]

time elapsed: 0:24:18
num of updates: 7400
total loss: 0.19820
state loss: 0.15842
reward loss: 0.03978
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 38%|███▊      | 75/200 [24:38<42:35, 20.45s/it]

time elapsed: 0:24:38
num of updates: 7500
total loss: 0.19470
state loss: 0.15555
reward loss: 0.03915
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 38%|███▊      | 76/200 [24:58<42:06, 20.37s/it]

time elapsed: 0:24:58
num of updates: 7600
total loss: 0.18871
state loss: 0.15063
reward loss: 0.03808
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 38%|███▊      | 77/200 [25:18<41:45, 20.37s/it]

time elapsed: 0:25:19
num of updates: 7700
total loss: 0.18492
state loss: 0.14744
reward loss: 0.03748
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 39%|███▉      | 78/200 [25:37<40:50, 20.09s/it]

time elapsed: 0:25:38
num of updates: 7800
total loss: 0.18100
state loss: 0.14528
reward loss: 0.03572
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 40%|███▉      | 79/200 [25:56<39:46, 19.72s/it]

time elapsed: 0:25:57
num of updates: 7900
total loss: 0.17892
state loss: 0.14290
reward loss: 0.03602
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 40%|████      | 80/200 [26:16<39:11, 19.60s/it]

time elapsed: 0:26:16
num of updates: 8000
total loss: 0.17605
state loss: 0.14031
reward loss: 0.03575
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 40%|████      | 81/200 [26:35<38:55, 19.63s/it]

time elapsed: 0:26:36
num of updates: 8100
total loss: 0.17659
state loss: 0.14064
reward loss: 0.03596
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 41%|████      | 82/200 [26:56<39:00, 19.84s/it]

time elapsed: 0:26:56
num of updates: 8200
total loss: 0.17065
state loss: 0.13558
reward loss: 0.03507
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 42%|████▏     | 83/200 [27:17<39:23, 20.20s/it]

time elapsed: 0:27:17
num of updates: 8300
total loss: 0.16604
state loss: 0.13212
reward loss: 0.03392
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 42%|████▏     | 84/200 [27:36<38:41, 20.01s/it]

time elapsed: 0:27:37
num of updates: 8400
total loss: 0.16356
state loss: 0.13023
reward loss: 0.03332
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 42%|████▎     | 85/200 [27:55<37:47, 19.71s/it]

time elapsed: 0:27:56
num of updates: 8500
total loss: 0.16114
state loss: 0.12835
reward loss: 0.03279
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 43%|████▎     | 86/200 [28:15<37:30, 19.74s/it]

time elapsed: 0:28:16
num of updates: 8600
total loss: 0.15909
state loss: 0.12665
reward loss: 0.03244
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 44%|████▎     | 87/200 [28:35<37:26, 19.88s/it]

time elapsed: 0:28:36
num of updates: 8700
total loss: 0.15416
state loss: 0.12243
reward loss: 0.03172
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 44%|████▍     | 88/200 [28:55<37:14, 19.95s/it]

time elapsed: 0:28:56
num of updates: 8800
total loss: 0.15266
state loss: 0.12125
reward loss: 0.03141
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 44%|████▍     | 89/200 [29:15<36:24, 19.68s/it]

time elapsed: 0:29:15
num of updates: 8900
total loss: 0.14988
state loss: 0.11911
reward loss: 0.03077
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 45%|████▌     | 90/200 [29:34<35:48, 19.54s/it]

time elapsed: 0:29:34
num of updates: 9000
total loss: 0.14994
state loss: 0.11843
reward loss: 0.03151
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 46%|████▌     | 91/200 [29:53<35:34, 19.58s/it]

time elapsed: 0:29:54
num of updates: 9100
total loss: 0.14734
state loss: 0.11618
reward loss: 0.03116
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 46%|████▌     | 92/200 [30:14<35:38, 19.80s/it]

time elapsed: 0:30:14
num of updates: 9200
total loss: 0.14338
state loss: 0.11356
reward loss: 0.02982
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 46%|████▋     | 93/200 [30:34<35:47, 20.07s/it]

time elapsed: 0:30:35
num of updates: 9300
total loss: 0.14187
state loss: 0.11182
reward loss: 0.03004
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 47%|████▋     | 94/200 [30:55<35:51, 20.29s/it]

time elapsed: 0:30:56
num of updates: 9400
total loss: 0.13862
state loss: 0.10934
reward loss: 0.02928
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 48%|████▊     | 95/200 [31:16<35:36, 20.35s/it]

time elapsed: 0:31:16
num of updates: 9500
total loss: 0.13616
state loss: 0.10740
reward loss: 0.02875
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 48%|████▊     | 96/200 [31:35<34:40, 20.00s/it]

time elapsed: 0:31:35
num of updates: 9600
total loss: 0.13506
state loss: 0.10655
reward loss: 0.02852
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 48%|████▊     | 97/200 [31:55<34:26, 20.07s/it]

time elapsed: 0:31:56
num of updates: 9700
total loss: 0.13266
state loss: 0.10436
reward loss: 0.02829
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 49%|████▉     | 98/200 [32:17<34:58, 20.57s/it]

time elapsed: 0:32:17
num of updates: 9800
total loss: 0.13217
state loss: 0.10383
reward loss: 0.02834
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 50%|████▉     | 99/200 [32:38<34:49, 20.69s/it]

time elapsed: 0:32:38
num of updates: 9900
total loss: 0.12957
state loss: 0.10202
reward loss: 0.02755
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 50%|█████     | 100/200 [32:59<34:44, 20.84s/it]

time elapsed: 0:33:00
num of updates: 10000
total loss: 0.12816
state loss: 0.10076
reward loss: 0.02740
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 50%|█████     | 101/200 [33:19<34:02, 20.63s/it]

time elapsed: 0:33:20
num of updates: 10100
total loss: 0.12514
state loss: 0.09813
reward loss: 0.02701
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 51%|█████     | 102/200 [33:40<33:35, 20.56s/it]

time elapsed: 0:33:40
num of updates: 10200
total loss: 0.12426
state loss: 0.09764
reward loss: 0.02662
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 52%|█████▏    | 103/200 [34:00<33:23, 20.66s/it]

time elapsed: 0:34:01
num of updates: 10300
total loss: 0.12177
state loss: 0.09542
reward loss: 0.02635
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 52%|█████▏    | 104/200 [34:22<33:27, 20.92s/it]

time elapsed: 0:34:23
num of updates: 10400
total loss: 0.11953
state loss: 0.09366
reward loss: 0.02587
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 52%|█████▎    | 105/200 [34:43<33:05, 20.90s/it]

time elapsed: 0:34:43
num of updates: 10500
total loss: 0.11837
state loss: 0.09262
reward loss: 0.02575
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 53%|█████▎    | 106/200 [35:03<32:11, 20.55s/it]

time elapsed: 0:35:03
num of updates: 10600
total loss: 0.11751
state loss: 0.09192
reward loss: 0.02559
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 54%|█████▎    | 107/200 [35:21<31:04, 20.04s/it]

time elapsed: 0:35:22
num of updates: 10700
total loss: 0.11536
state loss: 0.09007
reward loss: 0.02529
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 54%|█████▍    | 108/200 [35:41<30:25, 19.84s/it]

time elapsed: 0:35:41
num of updates: 10800
total loss: 0.11422
state loss: 0.08936
reward loss: 0.02486
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 55%|█████▍    | 109/200 [36:01<30:08, 19.87s/it]

time elapsed: 0:36:01
num of updates: 10900
total loss: 0.11312
state loss: 0.08855
reward loss: 0.02457
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 55%|█████▌    | 110/200 [36:22<30:34, 20.38s/it]

time elapsed: 0:36:23
num of updates: 11000
total loss: 0.11007
state loss: 0.08579
reward loss: 0.02427
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 56%|█████▌    | 111/200 [36:44<30:48, 20.77s/it]

time elapsed: 0:36:45
num of updates: 11100
total loss: 0.11087
state loss: 0.08637
reward loss: 0.02450
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 56%|█████▌    | 112/200 [37:05<30:29, 20.79s/it]

time elapsed: 0:37:05
num of updates: 11200
total loss: 0.10723
state loss: 0.08407
reward loss: 0.02317
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 56%|█████▋    | 113/200 [37:28<31:12, 21.52s/it]

time elapsed: 0:37:29
num of updates: 11300
total loss: 0.10765
state loss: 0.08415
reward loss: 0.02350
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 57%|█████▋    | 114/200 [37:53<32:07, 22.41s/it]

time elapsed: 0:37:53
num of updates: 11400
total loss: 0.10665
state loss: 0.08289
reward loss: 0.02376
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 57%|█████▊    | 115/200 [38:13<31:02, 21.91s/it]

time elapsed: 0:38:14
num of updates: 11500
total loss: 0.10472
state loss: 0.08149
reward loss: 0.02324
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 58%|█████▊    | 116/200 [38:47<35:40, 25.48s/it]

time elapsed: 0:38:48
num of updates: 11600
total loss: 0.10386
state loss: 0.08118
reward loss: 0.02267
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 58%|█████▊    | 117/200 [39:09<33:36, 24.29s/it]

time elapsed: 0:39:09
num of updates: 11700
total loss: 0.10111
state loss: 0.07907
reward loss: 0.02204
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 59%|█████▉    | 118/200 [39:30<31:57, 23.38s/it]

time elapsed: 0:39:30
num of updates: 11800
total loss: 0.10179
state loss: 0.07938
reward loss: 0.02240
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 60%|█████▉    | 119/200 [39:51<30:40, 22.72s/it]

time elapsed: 0:39:52
num of updates: 11900
total loss: 0.10065
state loss: 0.07835
reward loss: 0.02230
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 60%|██████    | 120/200 [40:13<30:01, 22.52s/it]

time elapsed: 0:40:14
num of updates: 12000
total loss: 0.09750
state loss: 0.07648
reward loss: 0.02102
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 60%|██████    | 121/200 [40:33<28:43, 21.81s/it]

time elapsed: 0:40:34
num of updates: 12100
total loss: 0.09867
state loss: 0.07708
reward loss: 0.02160
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 61%|██████    | 122/200 [40:53<27:40, 21.29s/it]

time elapsed: 0:40:54
num of updates: 12200
total loss: 0.09692
state loss: 0.07575
reward loss: 0.02117
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 62%|██████▏   | 123/200 [41:12<26:17, 20.48s/it]

time elapsed: 0:41:13
num of updates: 12300
total loss: 0.09665
state loss: 0.07554
reward loss: 0.02110
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 62%|██████▏   | 124/200 [41:33<26:06, 20.61s/it]

time elapsed: 0:41:33
num of updates: 12400
total loss: 0.09668
state loss: 0.07531
reward loss: 0.02138
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 62%|██████▎   | 125/200 [41:55<26:20, 21.07s/it]

time elapsed: 0:41:56
num of updates: 12500
total loss: 0.09545
state loss: 0.07449
reward loss: 0.02096
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 63%|██████▎   | 126/200 [42:17<26:10, 21.23s/it]

time elapsed: 0:42:17
num of updates: 12600
total loss: 0.09303
state loss: 0.07290
reward loss: 0.02013
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 64%|██████▎   | 127/200 [42:38<25:50, 21.24s/it]

time elapsed: 0:42:38
num of updates: 12700
total loss: 0.09320
state loss: 0.07283
reward loss: 0.02037
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 64%|██████▍   | 128/200 [42:58<25:00, 20.84s/it]

time elapsed: 0:42:58
num of updates: 12800
total loss: 0.09197
state loss: 0.07203
reward loss: 0.01994
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 64%|██████▍   | 129/200 [43:17<24:00, 20.29s/it]

time elapsed: 0:43:17
num of updates: 12900
total loss: 0.09193
state loss: 0.07164
reward loss: 0.02029
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 65%|██████▌   | 130/200 [43:36<23:13, 19.90s/it]

time elapsed: 0:43:36
num of updates: 13000
total loss: 0.09018
state loss: 0.07069
reward loss: 0.01949
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 66%|██████▌   | 131/200 [43:55<22:45, 19.79s/it]

time elapsed: 0:43:56
num of updates: 13100
total loss: 0.08979
state loss: 0.06974
reward loss: 0.02004
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 66%|██████▌   | 132/200 [44:15<22:30, 19.86s/it]

time elapsed: 0:44:16
num of updates: 13200
total loss: 0.09033
state loss: 0.07040
reward loss: 0.01993
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 66%|██████▋   | 133/200 [44:36<22:26, 20.10s/it]

time elapsed: 0:44:37
num of updates: 13300
total loss: 0.08730
state loss: 0.06834
reward loss: 0.01896
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 67%|██████▋   | 134/200 [44:57<22:29, 20.45s/it]

time elapsed: 0:44:58
num of updates: 13400
total loss: 0.08819
state loss: 0.06889
reward loss: 0.01930
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 68%|██████▊   | 135/200 [45:17<22:01, 20.34s/it]

time elapsed: 0:45:18
num of updates: 13500
total loss: 0.08773
state loss: 0.06853
reward loss: 0.01919
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 68%|██████▊   | 136/200 [45:37<21:33, 20.21s/it]

time elapsed: 0:45:38
num of updates: 13600
total loss: 0.08715
state loss: 0.06822
reward loss: 0.01893
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 68%|██████▊   | 137/200 [45:58<21:18, 20.29s/it]

time elapsed: 0:45:58
num of updates: 13700
total loss: 0.08558
state loss: 0.06679
reward loss: 0.01878
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 69%|██████▉   | 138/200 [46:19<21:14, 20.56s/it]

time elapsed: 0:46:19
num of updates: 13800
total loss: 0.08443
state loss: 0.06602
reward loss: 0.01841
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 70%|██████▉   | 139/200 [46:40<20:55, 20.59s/it]

time elapsed: 0:46:40
num of updates: 13900
total loss: 0.08352
state loss: 0.06558
reward loss: 0.01795
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 70%|███████   | 140/200 [46:59<20:12, 20.21s/it]

time elapsed: 0:46:59
num of updates: 14000
total loss: 0.08219
state loss: 0.06467
reward loss: 0.01751
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 70%|███████   | 141/200 [47:19<19:56, 20.29s/it]

time elapsed: 0:47:20
num of updates: 14100
total loss: 0.08323
state loss: 0.06500
reward loss: 0.01823
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 71%|███████   | 142/200 [47:39<19:27, 20.13s/it]

time elapsed: 0:47:40
num of updates: 14200
total loss: 0.08175
state loss: 0.06401
reward loss: 0.01774
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 72%|███████▏  | 143/200 [47:59<19:09, 20.16s/it]

time elapsed: 0:48:00
num of updates: 14300
total loss: 0.08122
state loss: 0.06331
reward loss: 0.01791
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 72%|███████▏  | 144/200 [48:20<19:04, 20.44s/it]

time elapsed: 0:48:21
num of updates: 14400
total loss: 0.08078
state loss: 0.06304
reward loss: 0.01774
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 72%|███████▎  | 145/200 [48:41<18:41, 20.40s/it]

time elapsed: 0:48:41
num of updates: 14500
total loss: 0.07983
state loss: 0.06263
reward loss: 0.01720
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 73%|███████▎  | 146/200 [49:00<18:04, 20.08s/it]

time elapsed: 0:49:01
num of updates: 14600
total loss: 0.07854
state loss: 0.06170
reward loss: 0.01684
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 74%|███████▎  | 147/200 [49:20<17:35, 19.92s/it]

time elapsed: 0:49:20
num of updates: 14700
total loss: 0.07918
state loss: 0.06207
reward loss: 0.01711
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 74%|███████▍  | 148/200 [49:40<17:18, 19.98s/it]

time elapsed: 0:49:40
num of updates: 14800
total loss: 0.07921
state loss: 0.06225
reward loss: 0.01695
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 74%|███████▍  | 149/200 [50:00<17:10, 20.21s/it]

time elapsed: 0:50:01
num of updates: 14900
total loss: 0.07852
state loss: 0.06160
reward loss: 0.01692
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 75%|███████▌  | 150/200 [50:21<16:47, 20.16s/it]

time elapsed: 0:50:21
num of updates: 15000
total loss: 0.07659
state loss: 0.06044
reward loss: 0.01614
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 76%|███████▌  | 151/200 [50:41<16:31, 20.23s/it]

time elapsed: 0:50:42
num of updates: 15100
total loss: 0.07720
state loss: 0.06056
reward loss: 0.01664
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 76%|███████▌  | 152/200 [51:01<16:10, 20.21s/it]

time elapsed: 0:51:02
num of updates: 15200
total loss: 0.07726
state loss: 0.06039
reward loss: 0.01687
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 76%|███████▋  | 153/200 [51:22<15:54, 20.31s/it]

time elapsed: 0:51:22
num of updates: 15300
total loss: 0.07553
state loss: 0.05920
reward loss: 0.01633
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 77%|███████▋  | 154/200 [51:42<15:31, 20.25s/it]

time elapsed: 0:51:42
num of updates: 15400
total loss: 0.07524
state loss: 0.05901
reward loss: 0.01623
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 78%|███████▊  | 155/200 [52:01<14:59, 19.99s/it]

time elapsed: 0:52:02
num of updates: 15500
total loss: 0.07525
state loss: 0.05904
reward loss: 0.01621
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 78%|███████▊  | 156/200 [52:21<14:44, 20.10s/it]

time elapsed: 0:52:22
num of updates: 15600
total loss: 0.07655
state loss: 0.05994
reward loss: 0.01661
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 78%|███████▊  | 157/200 [52:42<14:31, 20.26s/it]

time elapsed: 0:52:43
num of updates: 15700
total loss: 0.07324
state loss: 0.05776
reward loss: 0.01549
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 79%|███████▉  | 158/200 [53:02<14:11, 20.28s/it]

time elapsed: 0:53:03
num of updates: 15800
total loss: 0.07412
state loss: 0.05818
reward loss: 0.01594
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 80%|███████▉  | 159/200 [53:22<13:39, 19.99s/it]

time elapsed: 0:53:22
num of updates: 15900
total loss: 0.07346
state loss: 0.05751
reward loss: 0.01595
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 80%|████████  | 160/200 [53:41<13:12, 19.80s/it]

time elapsed: 0:53:42
num of updates: 16000
total loss: 0.07373
state loss: 0.05809
reward loss: 0.01564
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 80%|████████  | 161/200 [54:01<12:52, 19.81s/it]

time elapsed: 0:54:02
num of updates: 16100
total loss: 0.07388
state loss: 0.05801
reward loss: 0.01587
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 81%|████████  | 162/200 [54:32<14:37, 23.10s/it]

time elapsed: 0:54:32
num of updates: 16200
total loss: 0.07244
state loss: 0.05706
reward loss: 0.01538
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 82%|████████▏ | 163/200 [54:54<14:09, 22.96s/it]

time elapsed: 0:54:55
num of updates: 16300
total loss: 0.07145
state loss: 0.05607
reward loss: 0.01538
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 82%|████████▏ | 164/200 [55:16<13:28, 22.46s/it]

time elapsed: 0:55:16
num of updates: 16400
total loss: 0.07187
state loss: 0.05654
reward loss: 0.01533
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 82%|████████▎ | 165/200 [55:36<12:46, 21.90s/it]

time elapsed: 0:55:37
num of updates: 16500
total loss: 0.07081
state loss: 0.05589
reward loss: 0.01492
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 83%|████████▎ | 166/200 [55:56<11:59, 21.17s/it]

time elapsed: 0:55:56
num of updates: 16600
total loss: 0.06989
state loss: 0.05488
reward loss: 0.01501
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 84%|████████▎ | 167/200 [56:15<11:19, 20.59s/it]

time elapsed: 0:56:16
num of updates: 16700
total loss: 0.06879
state loss: 0.05428
reward loss: 0.01451
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 84%|████████▍ | 168/200 [56:34<10:48, 20.27s/it]

time elapsed: 0:56:35
num of updates: 16800
total loss: 0.07012
state loss: 0.05518
reward loss: 0.01494
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 84%|████████▍ | 169/200 [56:55<10:26, 20.20s/it]

time elapsed: 0:56:55
num of updates: 16900
total loss: 0.06888
state loss: 0.05423
reward loss: 0.01466
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 85%|████████▌ | 170/200 [57:16<10:14, 20.50s/it]

time elapsed: 0:57:16
num of updates: 17000
total loss: 0.06839
state loss: 0.05383
reward loss: 0.01456
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 86%|████████▌ | 171/200 [57:38<10:05, 20.89s/it]

time elapsed: 0:57:38
num of updates: 17100
total loss: 0.06920
state loss: 0.05452
reward loss: 0.01468
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 86%|████████▌ | 172/200 [57:57<09:37, 20.62s/it]

time elapsed: 0:57:58
num of updates: 17200
total loss: 0.06870
state loss: 0.05393
reward loss: 0.01477
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 86%|████████▋ | 173/200 [58:17<09:03, 20.15s/it]

time elapsed: 0:58:17
num of updates: 17300
total loss: 0.06777
state loss: 0.05339
reward loss: 0.01437
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 87%|████████▋ | 174/200 [58:36<08:38, 19.95s/it]

time elapsed: 0:58:37
num of updates: 17400
total loss: 0.06709
state loss: 0.05299
reward loss: 0.01409
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 88%|████████▊ | 175/200 [58:56<08:19, 19.99s/it]

time elapsed: 0:58:57
num of updates: 17500
total loss: 0.06660
state loss: 0.05272
reward loss: 0.01387
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 88%|████████▊ | 176/200 [59:17<08:03, 20.15s/it]

time elapsed: 0:59:17
num of updates: 17600
total loss: 0.06729
state loss: 0.05283
reward loss: 0.01445
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 88%|████████▊ | 177/200 [59:38<07:48, 20.37s/it]

time elapsed: 0:59:38
num of updates: 17700
total loss: 0.06551
state loss: 0.05182
reward loss: 0.01368
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 89%|████████▉ | 178/200 [59:57<07:23, 20.16s/it]

time elapsed: 0:59:58
num of updates: 17800
total loss: 0.06551
state loss: 0.05172
reward loss: 0.01379
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 90%|████████▉ | 179/200 [1:00:17<06:59, 19.95s/it]

time elapsed: 1:00:17
num of updates: 17900
total loss: 0.06613
state loss: 0.05227
reward loss: 0.01386
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 90%|█████████ | 180/200 [1:00:37<06:39, 20.00s/it]

time elapsed: 1:00:37
num of updates: 18000
total loss: 0.06606
state loss: 0.05189
reward loss: 0.01417
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 90%|█████████ | 181/200 [1:00:57<06:23, 20.17s/it]

time elapsed: 1:00:58
num of updates: 18100
total loss: 0.06446
state loss: 0.05083
reward loss: 0.01363
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 91%|█████████ | 182/200 [1:01:17<06:01, 20.09s/it]

time elapsed: 1:01:18
num of updates: 18200
total loss: 0.06520
state loss: 0.05119
reward loss: 0.01401
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 92%|█████████▏| 183/200 [1:01:37<05:38, 19.89s/it]

time elapsed: 1:01:37
num of updates: 18300
total loss: 0.06474
state loss: 0.05107
reward loss: 0.01366
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 92%|█████████▏| 184/200 [1:01:57<05:20, 20.00s/it]

time elapsed: 1:01:58
num of updates: 18400
total loss: 0.06383
state loss: 0.05058
reward loss: 0.01325
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 92%|█████████▎| 185/200 [1:02:18<05:03, 20.20s/it]

time elapsed: 1:02:18
num of updates: 18500
total loss: 0.06487
state loss: 0.05122
reward loss: 0.01366
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 93%|█████████▎| 186/200 [1:02:39<04:45, 20.42s/it]

time elapsed: 1:02:39
num of updates: 18600
total loss: 0.06285
state loss: 0.04967
reward loss: 0.01318
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 94%|█████████▎| 187/200 [1:02:58<04:21, 20.08s/it]

time elapsed: 1:02:58
num of updates: 18700
total loss: 0.06415
state loss: 0.05035
reward loss: 0.01380
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 94%|█████████▍| 188/200 [1:03:17<03:57, 19.76s/it]

time elapsed: 1:03:17
num of updates: 18800
total loss: 0.06305
state loss: 0.04984
reward loss: 0.01321
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 94%|█████████▍| 189/200 [1:03:36<03:36, 19.68s/it]

time elapsed: 1:03:37
num of updates: 18900
total loss: 0.06250
state loss: 0.04924
reward loss: 0.01326
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 95%|█████████▌| 190/200 [1:03:57<03:18, 19.85s/it]

time elapsed: 1:03:57
num of updates: 19000
total loss: 0.06349
state loss: 0.05012
reward loss: 0.01338
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 96%|█████████▌| 191/200 [1:04:16<02:58, 19.82s/it]

time elapsed: 1:04:17
num of updates: 19100
total loss: 0.06262
state loss: 0.04943
reward loss: 0.01319
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 96%|█████████▌| 192/200 [1:04:36<02:37, 19.66s/it]

time elapsed: 1:04:36
num of updates: 19200
total loss: 0.06225
state loss: 0.04911
reward loss: 0.01313
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 96%|█████████▋| 193/200 [1:04:55<02:17, 19.65s/it]

time elapsed: 1:04:56
num of updates: 19300
total loss: 0.06257
state loss: 0.04935
reward loss: 0.01322
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 97%|█████████▋| 194/200 [1:05:15<01:58, 19.75s/it]

time elapsed: 1:05:16
num of updates: 19400
total loss: 0.06212
state loss: 0.04896
reward loss: 0.01316
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 98%|█████████▊| 195/200 [1:05:35<01:39, 19.85s/it]

time elapsed: 1:05:36
num of updates: 19500
total loss: 0.06178
state loss: 0.04887
reward loss: 0.01291
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 98%|█████████▊| 196/200 [1:05:55<01:18, 19.66s/it]

time elapsed: 1:05:55
num of updates: 19600
total loss: 0.06188
state loss: 0.04855
reward loss: 0.01332
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 98%|█████████▊| 197/200 [1:06:13<00:58, 19.42s/it]

time elapsed: 1:06:14
num of updates: 19700
total loss: 0.06008
state loss: 0.04756
reward loss: 0.01252
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


 99%|█████████▉| 198/200 [1:06:33<00:38, 19.46s/it]

time elapsed: 1:06:34
num of updates: 19800
total loss: 0.06007
state loss: 0.04755
reward loss: 0.01252
saving min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


100%|█████████▉| 199/200 [1:06:53<00:19, 19.72s/it]

time elapsed: 1:06:54
num of updates: 19900
total loss: 0.06066
state loss: 0.04800
reward loss: 0.01266
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt


100%|██████████| 200/200 [1:07:15<00:00, 20.18s/it]

time elapsed: 1:07:15
num of updates: 20000
total loss: 0.06093
state loss: 0.04796
reward loss: 0.01297
saving current model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt
finished training!
started training at: 24-05-07-21-53-44
finished training at: 24-05-07-23-00-59
total training time: 1:07:15
saved min loss model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44_best.pt
saved last updated model at: ./log/dt_halfcheetah_model_24-05-07-21-53-44.pt





AttributeError: '_csv.writer' object has no attribute 'close'