In [1]:
# import library

import sys
import os
import d4rl
import gym
import numpy as np
import wandb
import collections
import pickle
import csv

import torch
import torch.nn as nn
import torch.nn.functional as F

from datetime import datetime
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

from utils import discount_cumsum, D4RLTrajectoryDataset, evaluate_on_env, get_d4rl_normalized_score
from model import MaskedCausalAttention, Block, DecisionTransformer


No module named 'flow'
No module named 'carla'


In [2]:
# set environment
# sys.path.append(r'C:\Develop\offlineRL-with-diffusion') 

In [3]:
# test mujoco, d4rl

!python ./test/mujoco_test.py

mujoco-py check passed
d4rl check passed


No module named 'flow'
No module named 'carla'
pybullet build time: Apr 30 2024 12:01:25
  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [4]:
# data download
# if you downloaded, don't re-start.

# !python ./data/download_d4rl_datasets.py


In [5]:
# parameter setting

env_name = 'halfcheetah'
dataset = 'medium'

if env_name == 'hopper':
    env = gym.make('Hopper-v3')
    max_ep_len = 1000
    # env_targets = [3600, 1800]  # evaluation conditioning targets
    # scale = 1000.  # normalization for rewards/returns
elif env_name == 'halfcheetah':
    env = gym.make('HalfCheetah-v3')
    max_ep_len = 1000
    # env_targets = [12000, 6000]
    # scale = 1000.
elif env_name == 'walker2d':
    env = gym.make('Walker2d-v3')
    max_ep_len = 1000
    # env_targets = [5000, 2500]
    # scale = 1000.

DATA_PATH = f'data/train/{env_name}-{dataset}-v2.pkl'
VAL_DATA_PATH = f'data/val/val_{env_name}-{dataset}-v2.pkl'
TEMP_DATA_PATH = f'data/temp/{env_name}-{dataset}-v2.pkl'
LOG_PATH = "./log/"
DEVICE = 'cpu'

In [6]:
# env dataset check
env = gym.make('halfcheetah-medium-v2')
dataset = env.get_dataset()

# print(dataset['observations'][1]) # trajectory 단위로 뽑힘.


  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
load datafile: 100%|██████████| 21/21 [00:02<00:00,  7.50it/s]


In [7]:
print("state shape: ", dataset['observations'].shape)
print("action shape: ", dataset['actions'].shape)
print("reward shape: ", dataset['rewards'].shape)
print("N: ", dataset['rewards'].shape[0])
print("train_size: ", int(0.8 * dataset['rewards'].shape[0]))


state shape:  (1000000, 17)
action shape:  (1000000, 6)
reward shape:  (1000000,)
N:  1000000
train_size:  800000


In [8]:
# data check
# check three trajectories

with open(DATA_PATH, 'rb') as f:
    trajectories = pickle.load(f)
n=0
max_rewards_list = []
for traj in trajectories:
    # print(f"{n+1}번째 trajectory")
    # print("traj: ", traj)
    print("state: ", traj['observations'], "\n")
    # print("action: ", traj['actions'], "\n")
    # print("next_state: ", traj['next_observations'], "\n")
    # print("reward: ", traj['rewards'], "\n")
    # print("max_rewards: ", max(traj['rewards']))
    # max_rewards_list.append(max(traj['rewards']))
    # print("")
    n+=1
    
# print(max(max_rewards_list))

    if n==3:
        break

state:  [[ 1.9831914e-02 -8.9501314e-02 -3.1969063e-03 ...  1.1365079e-01
   6.8424918e-02 -1.3811582e-01]
 [-3.8486063e-03 -5.2394319e-02  8.3050327e-03 ...  4.5068407e+00
  -9.2885571e+00  4.7328596e+00]
 [-5.5298433e-02 -7.7850236e-05 -2.3952831e-01 ... -7.0811687e+00
  -1.4037068e+00  7.5524049e+00]
 ...
 [-3.1975684e-01  5.3305399e-01 -4.8704177e-01 ...  1.5455554e+00
   2.6812897e+00  8.7905388e+00]
 [-3.2200974e-01  3.5745117e-01  1.0463273e-02 ... -6.3428599e-01
   1.6292539e+00  9.7356015e-01]
 [-3.0673215e-01  1.9843711e-01  6.9996923e-01 ...  5.0098950e-01
   1.5680059e+00  9.4733723e-02]] 

state:  [[ 4.7026437e-02 -2.1588113e-02  4.9151547e-02 ...  5.5219561e-02
  -1.5351681e-01 -4.6239123e-02]
 [ 4.1392505e-02  5.3802542e-02 -1.5022255e-01 ...  6.1133021e-01
  -7.4645710e+00  7.9509692e+00]
 [ 9.8547200e-04  8.8533267e-02 -4.3876743e-01 ...  8.5824745e-04
   5.9796906e+00  4.9521341e+00]
 ...
 [-1.4081973e-01 -7.7957302e-02 -2.6429656e-01 ...  1.0316861e+00
  -7.5645506e-

In [9]:
# check original data shape
with open(TEMP_DATA_PATH, 'rb') as f:
    temp_trajectories = pickle.load(f)
    
print("length: ", len(temp_trajectories)*len(temp_trajectories[0]['observations']))
print("n of epi: ", len(temp_trajectories))
print("n of traj in one epi: ", len(temp_trajectories[0]['observations']))

length:  1000000
n of epi:  1000
n of traj in one epi:  1000


In [10]:
# check train data shape
with open(DATA_PATH, 'rb') as f:
    train_trajectories = pickle.load(f)

print("length: ", len(train_trajectories)*len(train_trajectories[0]['observations']))
print("n of epi: ", len(train_trajectories))
print("n of traj in one epi: ", len(train_trajectories[0]['observations']))
# print("train state shape: ", train_trajectories['observations'].shape)
# print("train action shape: ", train_trajectories['actions'].shape)
# print("train reward shape: ", train_trajectories['rewards'].shape)


length:  950000
n of epi:  950
n of traj in one epi:  1000


In [11]:
# check valid data shape
with open(VAL_DATA_PATH, 'rb') as f:
    val_trajectories = pickle.load(f)

print("length: ", len(val_trajectories)*len(val_trajectories[0]['observations']))
print("n of epi: ", len(val_trajectories))
print("n of traj in one epi: ", len(val_trajectories[0]['observations']))
# print("val state shape: ", val_trajectories['observations'].shape)
# print("val action shape: ", val_trajectories['actions'].shape)
# print("val reward shape: ", val_trajectories['rewards'].shape)

length:  50000
n of epi:  50
n of traj in one epi:  1000


In [20]:
# train parameter
batch_size = 32
embed_dim = 128
activation = 'relu'
drop_out = 0.1
k = 20
n_blocks = 3    
n_heads = 1 # transformer head

# total updates = max_train_iters x num_updates_per_iter
max_train_iters = 200
num_updates_per_iter = 100
num_val_iter = 100
total_updates = 0
min_total_loss = 1e10

wt_decay = 1e-4             # weight decay
lr = 1e-4                   # learning rate
warmup_steps = 10000        # warmup steps for lr scheduler

# weight of mse loss
state_weight = 1
reward_weight = 1

# evaluation parameter
# max_eval_ep_len = 1000      # max len of one evaluation episode
# num_eval_ep = 10            # num of evaluation episodes per iteration

In [21]:
# check dim

state_dim = env.observation_space.shape[0]
act_dim = env.action_space.shape[0]

print("state dim: ", state_dim)
print("action dim: ", act_dim)

state dim:  17
action dim:  6


In [22]:
# test data
temp_dataset = D4RLTrajectoryDataset(DATA_PATH, 2)
temp_data_loader = DataLoader(temp_dataset,
						batch_size=32,
						shuffle=True,
						pin_memory=True,
						drop_last=True)
                        
temp_data_iter = iter(temp_data_loader)

timesteps, states, next_states, actions, rewards, traj_mask = next(temp_data_iter)

timesteps = timesteps.to(DEVICE)	# B x T
states = states.to(DEVICE)			# B x T x state_dim
next_states = next_states.to(DEVICE) # B X T X state_dim
actions = actions.to(DEVICE)		# B x T x act_dim
rewards = rewards.to(DEVICE).unsqueeze(dim=-1) # B x T x 1

print("timesteps shape: ", timesteps.shape)
print("rewards shape: ", rewards.shape)
print("states shape: ", states.shape)
print("actions shape: ", actions.shape)

# print("state: ", states)
# print("action: ", actions)
# print("rewards: ", rewards)



timesteps shape:  torch.Size([32, 2])
rewards shape:  torch.Size([32, 2, 1])
states shape:  torch.Size([32, 2, 17])
actions shape:  torch.Size([32, 2, 6])


In [23]:
# test model

temp_model = DecisionTransformer(
			state_dim=state_dim,
			act_dim=act_dim,
			# reward 포함 + r0 제외
			n_blocks=n_blocks,
			h_dim=16,
			context_len=2,
			n_heads=n_heads,
			drop_p=drop_out,
		).to(DEVICE)
		
next_state_preds, rewards_preds = temp_model.forward(
												rewards=rewards,
												timesteps=timesteps,
												states=states,
												actions=actions,
											)

In [24]:
# load train preprocessing(normalization, fit padding) data

traj_dataset = D4RLTrajectoryDataset(DATA_PATH, k)
traj_data_loader = DataLoader(traj_dataset,
						batch_size=batch_size,
						shuffle=True,
						pin_memory=True,
						drop_last=True)
                        
data_iter = iter(traj_data_loader)

## get state stats from dataset
state_mean, state_std = traj_dataset.get_state_stats()

In [25]:
# load validate preprocessing(normalization, fit padding) data

val_traj_dataset = D4RLTrajectoryDataset(DATA_PATH, k, val=True, val_dataset_path=VAL_DATA_PATH)
val_traj_data_loader = DataLoader(val_traj_dataset,
						batch_size=batch_size,
						shuffle=True,
						pin_memory=True,
						drop_last=True)
                        
val_data_iter = iter(val_traj_data_loader)


In [26]:
# define model

model = DecisionTransformer(
			state_dim=state_dim,
			act_dim=act_dim,
			n_blocks=n_blocks,
			h_dim=embed_dim,
			context_len=k,
			n_heads=n_heads,
			drop_p=drop_out,
		).to(DEVICE)
  
optimizer = torch.optim.AdamW(
					model.parameters(), 
					lr=lr, 
					weight_decay=wt_decay
				)

scheduler = torch.optim.lr_scheduler.LambdaLR(
		optimizer,
		lambda steps: min((steps+1)/warmup_steps, 1)
	)
	


In [27]:
start_time = datetime.now().replace(microsecond=0)

start_time_str = start_time.strftime("%y-%m-%d-%H-%M-%S")

prefix = "dt_" + env_name

save_model_name =  prefix + "_model_" + start_time_str + ".pt"
save_model_path = os.path.join(LOG_PATH, save_model_name)
save_best_model_path = save_model_path[:-3] + "_best.pt"

log_csv_name = prefix + "_log_" + start_time_str + ".csv"
log_csv_path = os.path.join(LOG_PATH, log_csv_name)


csv_writer = csv.writer(open(log_csv_path, 'a', 1))
csv_header = (["duration", "num_updates", "total_loss", "state_loss", "reward_loss", "val_total_loss", "val_state_loss", "val_reward_loss"])

csv_writer.writerow(csv_header)


print("=" * 60)
print("start time: " + start_time_str)
print("=" * 60)

print("device set to: " + str(DEVICE))
print("dataset path: " + DATA_PATH)
print("model save path: " + save_model_path)
print("log csv save path: " + log_csv_path)

# train
for i_train_iter in tqdm(range(max_train_iters)):


	log_state_losses, log_reward_losses, log_total_losses = [], [], []
	val_log_state_losses, val_log_reward_losses, val_log_total_losses = [], [], []
	model.train()
 
	for _ in range(num_updates_per_iter):
		try:
			timesteps, states, next_states, actions, rewards, traj_mask = next(data_iter)
		except StopIteration:
			data_iter = iter(traj_data_loader)
			timesteps, states, next_states, actions, rewards, traj_mask = next(data_iter)

		timesteps = timesteps.to(DEVICE)	# B x T
		states = states.to(DEVICE)			# B x T x state_dim
		next_states = next_states.to(DEVICE) # B X T X state_dim
		actions = actions.to(DEVICE)		# B x T x act_dim
		rewards = rewards.to(DEVICE).unsqueeze(dim=-1) # B x T x 1
		traj_mask = traj_mask.to(DEVICE)	# B x T

		next_states_target = torch.clone(next_states).detach().to(DEVICE)
		rewards_target = torch.clone(rewards).detach().to(DEVICE)
	
		next_state_preds, rewards_preds = model.forward(
														timesteps=timesteps,
														states=states,
														actions=actions,
														rewards=rewards,
													)

		# only consider non padded elements
		next_state_preds = next_state_preds.view(-1, state_dim)[traj_mask.view(-1,) > 0]
		next_states_target = next_states_target.view(-1, state_dim)[traj_mask.view(-1,) > 0]
		
		rewards_preds = rewards_preds.view(-1, 1)[traj_mask.view(-1,) > 0]
		rewards_target = rewards_target.view(-1, 1)[traj_mask.view(-1,) > 0]

		state_loss = F.mse_loss(next_state_preds, next_states_target, reduction='mean') * state_weight
		reward_loss = F.mse_loss(rewards_preds, rewards_target, reduction='mean') * reward_weight
		
		total_loss = state_loss.add(reward_loss)
		total_loss = torch.mean(total_loss)

		optimizer.zero_grad()
		total_loss.backward()
		torch.nn.utils.clip_grad_norm_(model.parameters(), 0.25)
		optimizer.step()
		scheduler.step()
		
		
		#save loss
		log_state_losses.append(state_loss.detach().cpu().item())
		log_reward_losses.append(reward_loss.detach().cpu().item())
		
		log_total_losses.append(total_loss.detach().cpu().item())
		
	# validation
	model.eval()
	for _ in range(num_val_iter):
		try:
			val_timesteps, val_states, val_next_states, val_actions, val_rewards, val_traj_mask = next(val_data_iter)
		except StopIteration:
			val_data_iter = iter(val_traj_data_loader)
			val_timesteps, val_states, val_next_states, val_actions, val_rewards, val_traj_mask = next(val_data_iter)
			
		val_timesteps = val_timesteps.to(DEVICE)	# B x T
		val_states = val_states.to(DEVICE)			# B x T x state_dim
		val_next_states = val_next_states.to(DEVICE) # B X T X state_dim
		val_actions = val_actions.to(DEVICE)		# B x T x act_dim
		val_rewards = val_rewards.to(DEVICE).unsqueeze(dim=-1) # B x T x 1
		val_traj_mask = val_traj_mask.to(DEVICE)	# B x T
				
		val_next_states_target = torch.clone(val_next_states).detach().to(DEVICE)
		val_rewards_target = torch.clone(val_rewards).detach().to(DEVICE)
		
		val_next_state_preds, val_rewards_preds = model.forward(
														timesteps=val_timesteps,
														states=val_states,
														actions=val_actions,
														rewards=val_rewards,
													)
													
		# only consider non padded elements
		val_next_state_preds = val_next_state_preds.view(-1, state_dim)[traj_mask.view(-1,) > 0]
		val_next_states_target = val_next_states_target.view(-1, state_dim)[traj_mask.view(-1,) > 0]
		
		val_rewards_preds = val_rewards_preds.view(-1, 1)[traj_mask.view(-1,) > 0]
		val_rewards_target = val_rewards_target.view(-1, 1)[traj_mask.view(-1,) > 0]

		val_state_loss = F.mse_loss(val_next_state_preds, val_next_states_target, reduction='mean') * state_weight
		val_reward_loss = F.mse_loss(val_rewards_preds, val_rewards_target, reduction='mean') * reward_weight
		
		val_total_loss = val_state_loss.add(val_reward_loss)
		val_total_loss = torch.mean(val_total_loss)
		
		# save val loss
		val_log_state_losses.append(val_state_loss.detach().cpu().item())
		val_log_reward_losses.append(val_reward_loss.detach().cpu().item())
		
		val_log_total_losses.append(val_total_loss.detach().cpu().item())
	
	mean_total_log_loss = np.mean(log_total_losses)
	mean_state_log_loss = np.mean(log_state_losses)
	mean_reward_log_loss = np.mean(log_reward_losses)
	
	mean_val_total_log_loss = np.mean(val_log_total_losses)
	mean_val_state_log_loss = np.mean(val_log_state_losses)
	mean_val_reward_log_loss = np.mean(val_log_reward_losses)

	time_elapsed = str(datetime.now().replace(microsecond=0) - start_time)

	total_updates += num_updates_per_iter

	log_str = ("=" * 60 + '\n' +
			"time elapsed: " + time_elapsed  + '\n' +
			"num of updates: " + str(total_updates) + '\n' +
			"train total loss: " + format(mean_total_log_loss, ".5f") + '\n' +
			"train state loss: " + format(mean_state_log_loss, ".5f") + '\n' +
			"train reward loss: " +  format(mean_reward_log_loss, ".5f") + '\n' +
			"val total loss: " + format(mean_val_total_log_loss, ".5f") + '\n' +
			"val state loss: " + format(mean_val_state_log_loss, ".5f") + '\n' +
			"val reward loss: " +  format(mean_val_reward_log_loss, ".5f")
			)

	print(log_str)

	log_data = [time_elapsed, total_updates, mean_total_log_loss, mean_state_log_loss, mean_reward_log_loss, \
		 mean_val_total_log_loss, mean_val_state_log_loss, mean_val_reward_log_loss]

	csv_writer.writerow(log_data)
	
	# save model
	if mean_val_total_log_loss <= min_total_loss:
		print("saving min loss model at: " + save_best_model_path)
		torch.save(model.state_dict(), save_best_model_path)
		min_total_log_loss = mean_val_total_log_loss

	print("saving current model at: " + save_model_path)
	torch.save(model.state_dict(), save_model_path)


print("=" * 60)
print("finished training!")
print("=" * 60)
end_time = datetime.now().replace(microsecond=0)
time_elapsed = str(end_time - start_time)
end_time_str = end_time.strftime("%y-%m-%d-%H-%M-%S")
print("started training at: " + start_time_str)
print("finished training at: " + end_time_str)
print("total training time: " + time_elapsed)
print("saved min loss model at: " + save_best_model_path)
print("saved last updated model at: " + save_model_path)
print("=" * 60)

csv_writer.close()

start time: 24-05-08-16-16-00
device set to: cpu
dataset path: data/train/halfcheetah-medium-v2.pkl
model save path: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt
log csv save path: ./log/dt_halfcheetah_log_24-05-08-16-16-00.csv


  0%|          | 1/200 [00:17<57:39, 17.38s/it]

time elapsed: 0:00:18
num of updates: 100
train total loss: 2.37590
train state loss: 1.31804
train reward loss: 1.05786
val total loss: 2.50211
val state loss: 1.34479
val reward loss: 1.15732
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


  1%|          | 2/200 [00:32<52:01, 15.76s/it]

time elapsed: 0:00:32
num of updates: 200
train total loss: 2.42065
train state loss: 1.31751
train reward loss: 1.10314
val total loss: 2.43323
val state loss: 1.31195
val reward loss: 1.12128
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


  2%|▏         | 3/200 [00:46<50:03, 15.25s/it]

time elapsed: 0:00:47
num of updates: 300
train total loss: 2.24365
train state loss: 1.26805
train reward loss: 0.97560
val total loss: 2.16872
val state loss: 1.23280
val reward loss: 0.93592
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


  2%|▏         | 4/200 [01:02<50:28, 15.45s/it]

time elapsed: 0:01:03
num of updates: 400
train total loss: 2.10258
train state loss: 1.22623
train reward loss: 0.87635
val total loss: 2.10883
val state loss: 1.21988
val reward loss: 0.88895
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


  2%|▎         | 5/200 [01:17<49:35, 15.26s/it]

time elapsed: 0:01:18
num of updates: 500
train total loss: 2.00585
train state loss: 1.20634
train reward loss: 0.79951
val total loss: 1.89001
val state loss: 1.18378
val reward loss: 0.70623
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


  3%|▎         | 6/200 [01:32<49:45, 15.39s/it]

time elapsed: 0:01:33
num of updates: 600
train total loss: 1.76777
train state loss: 1.13463
train reward loss: 0.63313
val total loss: 1.65825
val state loss: 1.12216
val reward loss: 0.53609
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


  4%|▎         | 7/200 [01:47<48:18, 15.02s/it]

time elapsed: 0:01:48
num of updates: 700
train total loss: 1.60868
train state loss: 1.11479
train reward loss: 0.49389
val total loss: 1.55150
val state loss: 1.10777
val reward loss: 0.44373
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


  4%|▍         | 8/200 [02:04<50:41, 15.84s/it]

time elapsed: 0:02:05
num of updates: 800
train total loss: 1.45833
train state loss: 1.04708
train reward loss: 0.41126
val total loss: 1.43258
val state loss: 1.05632
val reward loss: 0.37625
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


  4%|▍         | 9/200 [02:19<49:30, 15.55s/it]

time elapsed: 0:02:20
num of updates: 900
train total loss: 1.43920
train state loss: 1.04658
train reward loss: 0.39262
val total loss: 1.33043
val state loss: 1.00733
val reward loss: 0.32310
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


  5%|▌         | 10/200 [02:35<49:04, 15.50s/it]

time elapsed: 0:02:35
num of updates: 1000
train total loss: 1.37597
train state loss: 1.01877
train reward loss: 0.35720
val total loss: 1.29461
val state loss: 0.99421
val reward loss: 0.30040
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


  6%|▌         | 11/200 [02:50<48:25, 15.38s/it]

time elapsed: 0:02:51
num of updates: 1100
train total loss: 1.32067
train state loss: 0.99246
train reward loss: 0.32821
val total loss: 1.23268
val state loss: 0.96469
val reward loss: 0.26799
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


  6%|▌         | 12/200 [03:05<48:14, 15.40s/it]

time elapsed: 0:03:06
num of updates: 1200
train total loss: 1.31199
train state loss: 0.99349
train reward loss: 0.31851
val total loss: 1.20510
val state loss: 0.95332
val reward loss: 0.25178
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


  6%|▋         | 13/200 [03:19<46:55, 15.06s/it]

time elapsed: 0:03:20
num of updates: 1300
train total loss: 1.24791
train state loss: 0.95451
train reward loss: 0.29340
val total loss: 1.18397
val state loss: 0.94087
val reward loss: 0.24309
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


  7%|▋         | 14/200 [03:34<45:46, 14.77s/it]

time elapsed: 0:03:34
num of updates: 1400
train total loss: 1.24942
train state loss: 0.96284
train reward loss: 0.28657
val total loss: 1.14061
val state loss: 0.92338
val reward loss: 0.21723
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


  8%|▊         | 15/200 [03:48<45:06, 14.63s/it]

time elapsed: 0:03:49
num of updates: 1500
train total loss: 1.22657
train state loss: 0.95535
train reward loss: 0.27123
val total loss: 1.13946
val state loss: 0.91936
val reward loss: 0.22010
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


  8%|▊         | 16/200 [04:03<45:27, 14.82s/it]

time elapsed: 0:04:04
num of updates: 1600
train total loss: 1.20720
train state loss: 0.94636
train reward loss: 0.26084
val total loss: 1.11925
val state loss: 0.91632
val reward loss: 0.20293
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


  8%|▊         | 17/200 [04:17<44:35, 14.62s/it]

time elapsed: 0:04:18
num of updates: 1700
train total loss: 1.19150
train state loss: 0.94076
train reward loss: 0.25074
val total loss: 1.10733
val state loss: 0.91142
val reward loss: 0.19590
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


  9%|▉         | 18/200 [04:32<44:24, 14.64s/it]

time elapsed: 0:04:33
num of updates: 1800
train total loss: 1.17209
train state loss: 0.93316
train reward loss: 0.23893
val total loss: 1.08347
val state loss: 0.90015
val reward loss: 0.18332
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 10%|▉         | 19/200 [04:55<51:20, 17.02s/it]

time elapsed: 0:04:55
num of updates: 1900
train total loss: 1.14555
train state loss: 0.91943
train reward loss: 0.22612
val total loss: 1.07413
val state loss: 0.89329
val reward loss: 0.18084
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 10%|█         | 20/200 [05:10<49:54, 16.64s/it]

time elapsed: 0:05:11
num of updates: 2000
train total loss: 1.14481
train state loss: 0.92275
train reward loss: 0.22207
val total loss: 1.05430
val state loss: 0.88772
val reward loss: 0.16658
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 10%|█         | 21/200 [05:25<47:32, 15.93s/it]

time elapsed: 0:05:25
num of updates: 2100
train total loss: 1.12256
train state loss: 0.91180
train reward loss: 0.21076
val total loss: 1.04273
val state loss: 0.88076
val reward loss: 0.16197
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 11%|█         | 22/200 [05:38<45:19, 15.28s/it]

time elapsed: 0:05:39
num of updates: 2200
train total loss: 1.11023
train state loss: 0.90619
train reward loss: 0.20404
val total loss: 1.02684
val state loss: 0.86985
val reward loss: 0.15699
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 12%|█▏        | 23/200 [05:52<43:51, 14.87s/it]

time elapsed: 0:05:53
num of updates: 2300
train total loss: 1.08360
train state loss: 0.89436
train reward loss: 0.18924
val total loss: 1.00559
val state loss: 0.85603
val reward loss: 0.14955
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 12%|█▏        | 24/200 [06:06<42:37, 14.53s/it]

time elapsed: 0:06:07
num of updates: 2400
train total loss: 1.09062
train state loss: 0.90306
train reward loss: 0.18755
val total loss: 0.99051
val state loss: 0.85023
val reward loss: 0.14028
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 12%|█▎        | 25/200 [06:20<41:47, 14.33s/it]

time elapsed: 0:06:21
num of updates: 2500
train total loss: 1.05443
train state loss: 0.87963
train reward loss: 0.17481
val total loss: 0.96965
val state loss: 0.83658
val reward loss: 0.13307
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 13%|█▎        | 26/200 [06:34<41:27, 14.30s/it]

time elapsed: 0:06:35
num of updates: 2600
train total loss: 1.02619
train state loss: 0.86046
train reward loss: 0.16573
val total loss: 0.94521
val state loss: 0.81624
val reward loss: 0.12896
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 14%|█▎        | 27/200 [06:48<41:16, 14.32s/it]

time elapsed: 0:06:49
num of updates: 2700
train total loss: 0.99460
train state loss: 0.83929
train reward loss: 0.15530
val total loss: 0.91626
val state loss: 0.79481
val reward loss: 0.12145
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 14%|█▍        | 28/200 [07:03<41:27, 14.46s/it]

time elapsed: 0:07:04
num of updates: 2800
train total loss: 0.96787
train state loss: 0.81765
train reward loss: 0.15022
val total loss: 0.87825
val state loss: 0.76617
val reward loss: 0.11208
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 14%|█▍        | 29/200 [07:18<41:15, 14.48s/it]

time elapsed: 0:07:19
num of updates: 2900
train total loss: 0.93510
train state loss: 0.79270
train reward loss: 0.14240
val total loss: 0.85130
val state loss: 0.73560
val reward loss: 0.11570
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 15%|█▌        | 30/200 [07:32<40:51, 14.42s/it]

time elapsed: 0:07:33
num of updates: 3000
train total loss: 0.90431
train state loss: 0.76569
train reward loss: 0.13862
val total loss: 0.80853
val state loss: 0.70253
val reward loss: 0.10600
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 16%|█▌        | 31/200 [07:46<40:25, 14.35s/it]

time elapsed: 0:07:47
num of updates: 3100
train total loss: 0.84656
train state loss: 0.71902
train reward loss: 0.12753
val total loss: 0.76364
val state loss: 0.66406
val reward loss: 0.09958
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 16%|█▌        | 32/200 [08:01<40:11, 14.35s/it]

time elapsed: 0:08:01
num of updates: 3200
train total loss: 0.81057
train state loss: 0.68855
train reward loss: 0.12202
val total loss: 0.72454
val state loss: 0.63026
val reward loss: 0.09429
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 16%|█▋        | 33/200 [08:15<39:38, 14.24s/it]

time elapsed: 0:08:15
num of updates: 3300
train total loss: 0.77317
train state loss: 0.65679
train reward loss: 0.11638
val total loss: 0.68801
val state loss: 0.59538
val reward loss: 0.09263
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 17%|█▋        | 34/200 [08:29<39:33, 14.30s/it]

time elapsed: 0:08:30
num of updates: 3400
train total loss: 0.73359
train state loss: 0.62306
train reward loss: 0.11053
val total loss: 0.64547
val state loss: 0.55950
val reward loss: 0.08597
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 18%|█▊        | 35/200 [08:44<39:34, 14.39s/it]

time elapsed: 0:08:44
num of updates: 3500
train total loss: 0.69175
train state loss: 0.58877
train reward loss: 0.10298
val total loss: 0.60976
val state loss: 0.52906
val reward loss: 0.08070
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 18%|█▊        | 36/200 [08:58<39:45, 14.54s/it]

time elapsed: 0:08:59
num of updates: 3600
train total loss: 0.66206
train state loss: 0.56243
train reward loss: 0.09963
val total loss: 0.57419
val state loss: 0.49700
val reward loss: 0.07719
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 18%|█▊        | 37/200 [09:13<39:08, 14.41s/it]

time elapsed: 0:09:13
num of updates: 3700
train total loss: 0.62625
train state loss: 0.53300
train reward loss: 0.09325
val total loss: 0.54456
val state loss: 0.47183
val reward loss: 0.07273
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 19%|█▉        | 38/200 [09:26<38:24, 14.23s/it]

time elapsed: 0:09:27
num of updates: 3800
train total loss: 0.59479
train state loss: 0.50536
train reward loss: 0.08944
val total loss: 0.51753
val state loss: 0.44950
val reward loss: 0.06803
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 20%|█▉        | 39/200 [09:40<37:52, 14.12s/it]

time elapsed: 0:09:41
num of updates: 3900
train total loss: 0.55860
train state loss: 0.47454
train reward loss: 0.08407
val total loss: 0.47990
val state loss: 0.41707
val reward loss: 0.06283
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 20%|██        | 40/200 [09:54<37:45, 14.16s/it]

time elapsed: 0:09:55
num of updates: 4000
train total loss: 0.53205
train state loss: 0.45277
train reward loss: 0.07928
val total loss: 0.45515
val state loss: 0.39366
val reward loss: 0.06149
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 20%|██        | 41/200 [10:09<37:34, 14.18s/it]

time elapsed: 0:10:10
num of updates: 4100
train total loss: 0.49236
train state loss: 0.41872
train reward loss: 0.07364
val total loss: 0.42257
val state loss: 0.36388
val reward loss: 0.05869
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 21%|██        | 42/200 [10:23<37:32, 14.26s/it]

time elapsed: 0:10:24
num of updates: 4200
train total loss: 0.46259
train state loss: 0.39112
train reward loss: 0.07146
val total loss: 0.39740
val state loss: 0.34335
val reward loss: 0.05405
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 22%|██▏       | 43/200 [10:38<37:45, 14.43s/it]

time elapsed: 0:10:39
num of updates: 4300
train total loss: 0.44004
train state loss: 0.37179
train reward loss: 0.06825
val total loss: 0.36977
val state loss: 0.31915
val reward loss: 0.05061
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 22%|██▏       | 44/200 [10:53<38:06, 14.65s/it]

time elapsed: 0:10:54
num of updates: 4400
train total loss: 0.41248
train state loss: 0.34893
train reward loss: 0.06356
val total loss: 0.34744
val state loss: 0.29887
val reward loss: 0.04857
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 22%|██▎       | 45/200 [11:08<37:41, 14.59s/it]

time elapsed: 0:11:08
num of updates: 4500
train total loss: 0.39871
train state loss: 0.33640
train reward loss: 0.06231
val total loss: 0.33413
val state loss: 0.28771
val reward loss: 0.04641
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 23%|██▎       | 46/200 [11:22<37:06, 14.46s/it]

time elapsed: 0:11:23
num of updates: 4600
train total loss: 0.37158
train state loss: 0.31421
train reward loss: 0.05738
val total loss: 0.31897
val state loss: 0.27215
val reward loss: 0.04683
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 24%|██▎       | 47/200 [11:36<36:29, 14.31s/it]

time elapsed: 0:11:37
num of updates: 4700
train total loss: 0.36176
train state loss: 0.30437
train reward loss: 0.05739
val total loss: 0.30361
val state loss: 0.25855
val reward loss: 0.04506
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 24%|██▍       | 48/200 [11:50<36:33, 14.43s/it]

time elapsed: 0:11:51
num of updates: 4800
train total loss: 0.34259
train state loss: 0.28818
train reward loss: 0.05442
val total loss: 0.28382
val state loss: 0.24059
val reward loss: 0.04323
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 24%|██▍       | 49/200 [12:05<36:32, 14.52s/it]

time elapsed: 0:12:06
num of updates: 4900
train total loss: 0.32402
train state loss: 0.27185
train reward loss: 0.05216
val total loss: 0.26851
val state loss: 0.22591
val reward loss: 0.04261
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 25%|██▌       | 50/200 [12:20<36:21, 14.54s/it]

time elapsed: 0:12:21
num of updates: 5000
train total loss: 0.31108
train state loss: 0.26080
train reward loss: 0.05028
val total loss: 0.25552
val state loss: 0.21276
val reward loss: 0.04276
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 26%|██▌       | 51/200 [12:34<36:15, 14.60s/it]

time elapsed: 0:12:35
num of updates: 5100
train total loss: 0.29580
train state loss: 0.24753
train reward loss: 0.04826
val total loss: 0.23675
val state loss: 0.19823
val reward loss: 0.03852
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 26%|██▌       | 52/200 [12:49<36:00, 14.60s/it]

time elapsed: 0:12:50
num of updates: 5200
train total loss: 0.28495
train state loss: 0.23667
train reward loss: 0.04828
val total loss: 0.22788
val state loss: 0.19148
val reward loss: 0.03640
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 26%|██▋       | 53/200 [13:03<35:23, 14.44s/it]

time elapsed: 0:13:04
num of updates: 5300
train total loss: 0.27389
train state loss: 0.22688
train reward loss: 0.04701
val total loss: 0.22081
val state loss: 0.17981
val reward loss: 0.04100
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 27%|██▋       | 54/200 [13:17<34:38, 14.24s/it]

time elapsed: 0:13:18
num of updates: 5400
train total loss: 0.25783
train state loss: 0.21357
train reward loss: 0.04426
val total loss: 0.21293
val state loss: 0.17379
val reward loss: 0.03914
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 28%|██▊       | 55/200 [13:31<34:10, 14.14s/it]

time elapsed: 0:13:32
num of updates: 5500
train total loss: 0.24605
train state loss: 0.20358
train reward loss: 0.04247
val total loss: 0.19537
val state loss: 0.16039
val reward loss: 0.03498
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 28%|██▊       | 56/200 [13:45<33:51, 14.11s/it]

time elapsed: 0:13:46
num of updates: 5600
train total loss: 0.24517
train state loss: 0.20222
train reward loss: 0.04294
val total loss: 0.19029
val state loss: 0.15747
val reward loss: 0.03281
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 28%|██▊       | 57/200 [13:59<33:58, 14.26s/it]

time elapsed: 0:14:00
num of updates: 5700
train total loss: 0.23444
train state loss: 0.19311
train reward loss: 0.04133
val total loss: 0.18315
val state loss: 0.14998
val reward loss: 0.03318
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 29%|██▉       | 58/200 [14:14<33:45, 14.27s/it]

time elapsed: 0:14:15
num of updates: 5800
train total loss: 0.22867
train state loss: 0.18714
train reward loss: 0.04153
val total loss: 0.17957
val state loss: 0.14444
val reward loss: 0.03512
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 30%|██▉       | 59/200 [14:29<33:55, 14.44s/it]

time elapsed: 0:14:29
num of updates: 5900
train total loss: 0.22299
train state loss: 0.18345
train reward loss: 0.03954
val total loss: 0.17436
val state loss: 0.14069
val reward loss: 0.03367
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 30%|███       | 60/200 [14:44<34:21, 14.72s/it]

time elapsed: 0:14:45
num of updates: 6000
train total loss: 0.21253
train state loss: 0.17406
train reward loss: 0.03846
val total loss: 0.16759
val state loss: 0.13584
val reward loss: 0.03175
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 30%|███       | 61/200 [14:59<34:04, 14.71s/it]

time elapsed: 0:15:00
num of updates: 6100
train total loss: 0.20735
train state loss: 0.16956
train reward loss: 0.03780
val total loss: 0.17333
val state loss: 0.13521
val reward loss: 0.03812
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 31%|███       | 62/200 [15:13<33:24, 14.53s/it]

time elapsed: 0:15:14
num of updates: 6200
train total loss: 0.19819
train state loss: 0.16220
train reward loss: 0.03598
val total loss: 0.15762
val state loss: 0.12774
val reward loss: 0.02988
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 32%|███▏      | 63/200 [15:27<32:38, 14.30s/it]

time elapsed: 0:15:27
num of updates: 6300
train total loss: 0.19541
train state loss: 0.16024
train reward loss: 0.03517
val total loss: 0.15941
val state loss: 0.12675
val reward loss: 0.03266
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 32%|███▏      | 64/200 [15:40<32:04, 14.15s/it]

time elapsed: 0:15:41
num of updates: 6400
train total loss: 0.19159
train state loss: 0.15686
train reward loss: 0.03473
val total loss: 0.15565
val state loss: 0.12457
val reward loss: 0.03108
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 32%|███▎      | 65/200 [15:55<31:53, 14.18s/it]

time elapsed: 0:15:55
num of updates: 6500
train total loss: 0.18479
train state loss: 0.15134
train reward loss: 0.03345
val total loss: 0.15644
val state loss: 0.12243
val reward loss: 0.03401
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 33%|███▎      | 66/200 [16:09<31:41, 14.19s/it]

time elapsed: 0:16:10
num of updates: 6600
train total loss: 0.17712
train state loss: 0.14507
train reward loss: 0.03205
val total loss: 0.14501
val state loss: 0.11857
val reward loss: 0.02644
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 34%|███▎      | 67/200 [16:24<32:08, 14.50s/it]

time elapsed: 0:16:25
num of updates: 6700
train total loss: 0.18116
train state loss: 0.14846
train reward loss: 0.03270
val total loss: 0.14821
val state loss: 0.11746
val reward loss: 0.03075
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 34%|███▍      | 68/200 [16:40<32:44, 14.88s/it]

time elapsed: 0:16:41
num of updates: 6800
train total loss: 0.17492
train state loss: 0.14274
train reward loss: 0.03218
val total loss: 0.14989
val state loss: 0.11576
val reward loss: 0.03413
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 34%|███▍      | 69/200 [16:55<32:47, 15.02s/it]

time elapsed: 0:16:56
num of updates: 6900
train total loss: 0.16509
train state loss: 0.13473
train reward loss: 0.03036
val total loss: 0.14293
val state loss: 0.11358
val reward loss: 0.02935
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 35%|███▌      | 70/200 [17:10<32:29, 14.99s/it]

time elapsed: 0:17:11
num of updates: 7000
train total loss: 0.16623
train state loss: 0.13579
train reward loss: 0.03045
val total loss: 0.13720
val state loss: 0.11138
val reward loss: 0.02582
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 36%|███▌      | 71/200 [17:25<31:53, 14.83s/it]

time elapsed: 0:17:25
num of updates: 7100
train total loss: 0.16318
train state loss: 0.13305
train reward loss: 0.03013
val total loss: 0.13182
val state loss: 0.10512
val reward loss: 0.02670
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 36%|███▌      | 72/200 [17:39<31:08, 14.60s/it]

time elapsed: 0:17:39
num of updates: 7200
train total loss: 0.15978
train state loss: 0.12986
train reward loss: 0.02991
val total loss: 0.14000
val state loss: 0.10957
val reward loss: 0.03043
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 36%|███▋      | 73/200 [17:53<30:42, 14.50s/it]

time elapsed: 0:17:54
num of updates: 7300
train total loss: 0.15410
train state loss: 0.12587
train reward loss: 0.02823
val total loss: 0.13689
val state loss: 0.10546
val reward loss: 0.03142
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 37%|███▋      | 74/200 [18:07<30:00, 14.29s/it]

time elapsed: 0:18:08
num of updates: 7400
train total loss: 0.15340
train state loss: 0.12505
train reward loss: 0.02835
val total loss: 0.13429
val state loss: 0.10512
val reward loss: 0.02917
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 38%|███▊      | 75/200 [18:21<29:37, 14.22s/it]

time elapsed: 0:18:22
num of updates: 7500
train total loss: 0.15363
train state loss: 0.12539
train reward loss: 0.02825
val total loss: 0.13556
val state loss: 0.10299
val reward loss: 0.03256
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 38%|███▊      | 76/200 [18:35<29:27, 14.25s/it]

time elapsed: 0:18:36
num of updates: 7600
train total loss: 0.14585
train state loss: 0.11951
train reward loss: 0.02634
val total loss: 0.12909
val state loss: 0.10015
val reward loss: 0.02894
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 38%|███▊      | 77/200 [18:50<29:22, 14.33s/it]

time elapsed: 0:18:50
num of updates: 7700
train total loss: 0.14734
train state loss: 0.12028
train reward loss: 0.02706
val total loss: 0.11934
val state loss: 0.09524
val reward loss: 0.02410
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 39%|███▉      | 78/200 [19:05<29:34, 14.54s/it]

time elapsed: 0:19:05
num of updates: 7800
train total loss: 0.13969
train state loss: 0.11424
train reward loss: 0.02545
val total loss: 0.12949
val state loss: 0.10017
val reward loss: 0.02932
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 40%|███▉      | 79/200 [19:20<29:34, 14.67s/it]

time elapsed: 0:19:20
num of updates: 7900
train total loss: 0.14196
train state loss: 0.11598
train reward loss: 0.02597
val total loss: 0.12907
val state loss: 0.10005
val reward loss: 0.02902
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 40%|████      | 80/200 [19:34<29:19, 14.66s/it]

time elapsed: 0:19:35
num of updates: 8000
train total loss: 0.13728
train state loss: 0.11191
train reward loss: 0.02537
val total loss: 0.12317
val state loss: 0.09440
val reward loss: 0.02878
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 40%|████      | 81/200 [19:49<29:23, 14.82s/it]

time elapsed: 0:19:50
num of updates: 8100
train total loss: 0.13505
train state loss: 0.11006
train reward loss: 0.02498
val total loss: 0.11848
val state loss: 0.09152
val reward loss: 0.02697
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 41%|████      | 82/200 [20:04<28:46, 14.63s/it]

time elapsed: 0:20:04
num of updates: 8200
train total loss: 0.13084
train state loss: 0.10656
train reward loss: 0.02428
val total loss: 0.11339
val state loss: 0.09136
val reward loss: 0.02202
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 42%|████▏     | 83/200 [20:25<32:37, 16.73s/it]

time elapsed: 0:20:26
num of updates: 8300
train total loss: 0.12882
train state loss: 0.10529
train reward loss: 0.02353
val total loss: 0.11099
val state loss: 0.08926
val reward loss: 0.02173
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 42%|████▏     | 84/200 [20:42<32:18, 16.71s/it]

time elapsed: 0:20:43
num of updates: 8400
train total loss: 0.12939
train state loss: 0.10524
train reward loss: 0.02415
val total loss: 0.11821
val state loss: 0.09269
val reward loss: 0.02552
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 42%|████▎     | 85/200 [20:57<31:03, 16.21s/it]

time elapsed: 0:20:58
num of updates: 8500
train total loss: 0.12681
train state loss: 0.10282
train reward loss: 0.02399
val total loss: 0.11713
val state loss: 0.09020
val reward loss: 0.02693
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 43%|████▎     | 86/200 [21:12<30:06, 15.84s/it]

time elapsed: 0:21:13
num of updates: 8600
train total loss: 0.12163
train state loss: 0.09915
train reward loss: 0.02248
val total loss: 0.11440
val state loss: 0.08841
val reward loss: 0.02599
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 44%|████▎     | 87/200 [21:26<29:01, 15.41s/it]

time elapsed: 0:21:27
num of updates: 8700
train total loss: 0.12394
train state loss: 0.10105
train reward loss: 0.02289
val total loss: 0.11893
val state loss: 0.09210
val reward loss: 0.02683
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 44%|████▍     | 88/200 [21:40<28:01, 15.01s/it]

time elapsed: 0:21:41
num of updates: 8800
train total loss: 0.12173
train state loss: 0.09924
train reward loss: 0.02249
val total loss: 0.11569
val state loss: 0.08764
val reward loss: 0.02806
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 44%|████▍     | 89/200 [21:55<27:45, 15.00s/it]

time elapsed: 0:21:56
num of updates: 8900
train total loss: 0.11707
train state loss: 0.09535
train reward loss: 0.02172
val total loss: 0.11303
val state loss: 0.08813
val reward loss: 0.02491
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 45%|████▌     | 90/200 [22:10<27:20, 14.91s/it]

time elapsed: 0:22:11
num of updates: 9000
train total loss: 0.12029
train state loss: 0.09784
train reward loss: 0.02244
val total loss: 0.11711
val state loss: 0.08416
val reward loss: 0.03296
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 46%|████▌     | 91/200 [22:27<28:12, 15.53s/it]

time elapsed: 0:22:28
num of updates: 9100
train total loss: 0.11512
train state loss: 0.09363
train reward loss: 0.02149
val total loss: 0.11243
val state loss: 0.08491
val reward loss: 0.02752
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 46%|████▌     | 92/200 [22:47<30:35, 17.00s/it]

time elapsed: 0:22:48
num of updates: 9200
train total loss: 0.11262
train state loss: 0.09204
train reward loss: 0.02058
val total loss: 0.10751
val state loss: 0.08410
val reward loss: 0.02342
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 46%|████▋     | 93/200 [23:07<31:36, 17.72s/it]

time elapsed: 0:23:08
num of updates: 9300
train total loss: 0.11343
train state loss: 0.09204
train reward loss: 0.02139
val total loss: 0.11289
val state loss: 0.08875
val reward loss: 0.02414
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 47%|████▋     | 94/200 [23:24<30:58, 17.54s/it]

time elapsed: 0:23:25
num of updates: 9400
train total loss: 0.11034
train state loss: 0.08990
train reward loss: 0.02044
val total loss: 0.10760
val state loss: 0.08001
val reward loss: 0.02759
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 48%|████▊     | 95/200 [23:40<30:06, 17.20s/it]

time elapsed: 0:23:41
num of updates: 9500
train total loss: 0.10975
train state loss: 0.08925
train reward loss: 0.02050
val total loss: 0.10760
val state loss: 0.07979
val reward loss: 0.02781
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 48%|████▊     | 96/200 [23:57<29:17, 16.89s/it]

time elapsed: 0:23:57
num of updates: 9600
train total loss: 0.10973
train state loss: 0.08922
train reward loss: 0.02051
val total loss: 0.10964
val state loss: 0.08162
val reward loss: 0.02801
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 48%|████▊     | 97/200 [24:12<28:16, 16.47s/it]

time elapsed: 0:24:13
num of updates: 9700
train total loss: 0.10555
train state loss: 0.08627
train reward loss: 0.01929
val total loss: 0.09371
val state loss: 0.07174
val reward loss: 0.02197
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 49%|████▉     | 98/200 [24:27<27:26, 16.14s/it]

time elapsed: 0:24:28
num of updates: 9800
train total loss: 0.10552
train state loss: 0.08593
train reward loss: 0.01959
val total loss: 0.10338
val state loss: 0.07603
val reward loss: 0.02735
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 50%|████▉     | 99/200 [24:44<27:11, 16.15s/it]

time elapsed: 0:24:44
num of updates: 9900
train total loss: 0.10584
train state loss: 0.08608
train reward loss: 0.01976
val total loss: 0.10257
val state loss: 0.07943
val reward loss: 0.02314
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 50%|█████     | 100/200 [25:01<27:22, 16.42s/it]

time elapsed: 0:25:02
num of updates: 10000
train total loss: 0.10477
train state loss: 0.08558
train reward loss: 0.01919
val total loss: 0.09831
val state loss: 0.07592
val reward loss: 0.02239
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 50%|█████     | 101/200 [25:18<27:31, 16.68s/it]

time elapsed: 0:25:19
num of updates: 10100
train total loss: 0.10108
train state loss: 0.08244
train reward loss: 0.01864
val total loss: 0.10040
val state loss: 0.07778
val reward loss: 0.02262
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 51%|█████     | 102/200 [25:34<27:02, 16.55s/it]

time elapsed: 0:25:35
num of updates: 10200
train total loss: 0.10167
train state loss: 0.08304
train reward loss: 0.01863
val total loss: 0.09592
val state loss: 0.07370
val reward loss: 0.02221
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 52%|█████▏    | 103/200 [25:49<26:06, 16.15s/it]

time elapsed: 0:25:50
num of updates: 10300
train total loss: 0.10143
train state loss: 0.08285
train reward loss: 0.01858
val total loss: 0.09874
val state loss: 0.07413
val reward loss: 0.02461
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 52%|█████▏    | 104/200 [26:05<25:37, 16.02s/it]

time elapsed: 0:26:06
num of updates: 10400
train total loss: 0.09892
train state loss: 0.08073
train reward loss: 0.01819
val total loss: 0.09601
val state loss: 0.07239
val reward loss: 0.02363
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 52%|█████▎    | 105/200 [26:19<24:34, 15.52s/it]

time elapsed: 0:26:20
num of updates: 10500
train total loss: 0.09676
train state loss: 0.07949
train reward loss: 0.01728
val total loss: 0.09924
val state loss: 0.07773
val reward loss: 0.02151
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 53%|█████▎    | 106/200 [26:34<23:51, 15.23s/it]

time elapsed: 0:26:35
num of updates: 10600
train total loss: 0.09588
train state loss: 0.07849
train reward loss: 0.01739
val total loss: 0.09409
val state loss: 0.07052
val reward loss: 0.02357
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 54%|█████▎    | 107/200 [26:49<23:30, 15.17s/it]

time elapsed: 0:26:50
num of updates: 10700
train total loss: 0.09573
train state loss: 0.07845
train reward loss: 0.01728
val total loss: 0.10028
val state loss: 0.07546
val reward loss: 0.02482
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 54%|█████▍    | 108/200 [27:04<23:16, 15.18s/it]

time elapsed: 0:27:05
num of updates: 10800
train total loss: 0.09486
train state loss: 0.07776
train reward loss: 0.01710
val total loss: 0.09869
val state loss: 0.07586
val reward loss: 0.02283
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 55%|█████▍    | 109/200 [27:19<22:50, 15.06s/it]

time elapsed: 0:27:20
num of updates: 10900
train total loss: 0.09286
train state loss: 0.07615
train reward loss: 0.01672
val total loss: 0.09203
val state loss: 0.07008
val reward loss: 0.02195
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 55%|█████▌    | 110/200 [27:33<22:14, 14.83s/it]

time elapsed: 0:27:34
num of updates: 11000
train total loss: 0.09278
train state loss: 0.07594
train reward loss: 0.01684
val total loss: 0.09166
val state loss: 0.07116
val reward loss: 0.02050
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 56%|█████▌    | 111/200 [27:48<21:48, 14.70s/it]

time elapsed: 0:27:49
num of updates: 11100
train total loss: 0.09088
train state loss: 0.07447
train reward loss: 0.01642
val total loss: 0.10085
val state loss: 0.07630
val reward loss: 0.02455
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 56%|█████▌    | 112/200 [28:03<21:44, 14.82s/it]

time elapsed: 0:28:04
num of updates: 11200
train total loss: 0.09215
train state loss: 0.07521
train reward loss: 0.01694
val total loss: 0.09344
val state loss: 0.07217
val reward loss: 0.02128
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 56%|█████▋    | 113/200 [28:17<21:24, 14.76s/it]

time elapsed: 0:28:18
num of updates: 11300
train total loss: 0.09095
train state loss: 0.07409
train reward loss: 0.01687
val total loss: 0.09155
val state loss: 0.06858
val reward loss: 0.02297
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 57%|█████▋    | 114/200 [28:32<21:00, 14.65s/it]

time elapsed: 0:28:33
num of updates: 11400
train total loss: 0.09039
train state loss: 0.07425
train reward loss: 0.01614
val total loss: 0.08720
val state loss: 0.06731
val reward loss: 0.01990
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 57%|█████▊    | 115/200 [28:47<20:48, 14.69s/it]

time elapsed: 0:28:47
num of updates: 11500
train total loss: 0.08926
train state loss: 0.07329
train reward loss: 0.01597
val total loss: 0.09040
val state loss: 0.06847
val reward loss: 0.02193
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 58%|█████▊    | 116/200 [29:02<20:43, 14.80s/it]

time elapsed: 0:29:03
num of updates: 11600
train total loss: 0.08800
train state loss: 0.07215
train reward loss: 0.01584
val total loss: 0.09151
val state loss: 0.06881
val reward loss: 0.02270
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 58%|█████▊    | 117/200 [29:16<20:22, 14.72s/it]

time elapsed: 0:29:17
num of updates: 11700
train total loss: 0.08663
train state loss: 0.07121
train reward loss: 0.01541
val total loss: 0.09572
val state loss: 0.07124
val reward loss: 0.02448
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 59%|█████▉    | 118/200 [29:31<20:12, 14.79s/it]

time elapsed: 0:29:32
num of updates: 11800
train total loss: 0.08854
train state loss: 0.07228
train reward loss: 0.01626
val total loss: 0.09163
val state loss: 0.06607
val reward loss: 0.02555
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 60%|█████▉    | 119/200 [29:46<19:52, 14.73s/it]

time elapsed: 0:29:47
num of updates: 11900
train total loss: 0.08635
train state loss: 0.07054
train reward loss: 0.01581
val total loss: 0.08844
val state loss: 0.06811
val reward loss: 0.02033
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 60%|██████    | 120/200 [30:01<19:48, 14.85s/it]

time elapsed: 0:30:02
num of updates: 12000
train total loss: 0.08718
train state loss: 0.07125
train reward loss: 0.01593
val total loss: 0.08765
val state loss: 0.06627
val reward loss: 0.02138
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 60%|██████    | 121/200 [30:16<19:29, 14.81s/it]

time elapsed: 0:30:16
num of updates: 12100
train total loss: 0.08310
train state loss: 0.06856
train reward loss: 0.01454
val total loss: 0.08525
val state loss: 0.06461
val reward loss: 0.02064
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 61%|██████    | 122/200 [30:30<18:58, 14.60s/it]

time elapsed: 0:30:31
num of updates: 12200
train total loss: 0.08293
train state loss: 0.06829
train reward loss: 0.01464
val total loss: 0.09026
val state loss: 0.06894
val reward loss: 0.02132
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 62%|██████▏   | 123/200 [30:44<18:43, 14.59s/it]

time elapsed: 0:30:45
num of updates: 12300
train total loss: 0.08531
train state loss: 0.06997
train reward loss: 0.01534
val total loss: 0.09393
val state loss: 0.06957
val reward loss: 0.02436
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 62%|██████▏   | 124/200 [30:59<18:41, 14.76s/it]

time elapsed: 0:31:00
num of updates: 12400
train total loss: 0.08367
train state loss: 0.06843
train reward loss: 0.01524
val total loss: 0.08703
val state loss: 0.06499
val reward loss: 0.02204
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 62%|██████▎   | 125/200 [31:14<18:31, 14.83s/it]

time elapsed: 0:31:15
num of updates: 12500
train total loss: 0.08262
train state loss: 0.06797
train reward loss: 0.01466
val total loss: 0.08391
val state loss: 0.06445
val reward loss: 0.01946
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 63%|██████▎   | 126/200 [31:29<18:04, 14.66s/it]

time elapsed: 0:31:30
num of updates: 12600
train total loss: 0.08193
train state loss: 0.06758
train reward loss: 0.01435
val total loss: 0.08532
val state loss: 0.06639
val reward loss: 0.01893
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 64%|██████▎   | 127/200 [31:43<17:34, 14.45s/it]

time elapsed: 0:31:44
num of updates: 12700
train total loss: 0.08176
train state loss: 0.06725
train reward loss: 0.01451
val total loss: 0.08230
val state loss: 0.06494
val reward loss: 0.01737
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 64%|██████▍   | 128/200 [31:57<17:16, 14.40s/it]

time elapsed: 0:31:58
num of updates: 12800
train total loss: 0.08165
train state loss: 0.06736
train reward loss: 0.01428
val total loss: 0.08415
val state loss: 0.06475
val reward loss: 0.01941
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 64%|██████▍   | 129/200 [32:11<16:59, 14.36s/it]

time elapsed: 0:32:12
num of updates: 12900
train total loss: 0.08155
train state loss: 0.06713
train reward loss: 0.01442
val total loss: 0.08705
val state loss: 0.06319
val reward loss: 0.02386
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 65%|██████▌   | 130/200 [32:26<16:47, 14.40s/it]

time elapsed: 0:32:27
num of updates: 13000
train total loss: 0.08006
train state loss: 0.06602
train reward loss: 0.01405
val total loss: 0.08444
val state loss: 0.06721
val reward loss: 0.01723
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 66%|██████▌   | 131/200 [32:40<16:40, 14.51s/it]

time elapsed: 0:32:41
num of updates: 13100
train total loss: 0.07987
train state loss: 0.06568
train reward loss: 0.01419
val total loss: 0.08445
val state loss: 0.06289
val reward loss: 0.02156
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 66%|██████▌   | 132/200 [32:56<16:45, 14.79s/it]

time elapsed: 0:32:57
num of updates: 13200
train total loss: 0.07878
train state loss: 0.06499
train reward loss: 0.01380
val total loss: 0.08772
val state loss: 0.06565
val reward loss: 0.02207
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 66%|██████▋   | 133/200 [33:11<16:37, 14.89s/it]

time elapsed: 0:33:12
num of updates: 13300
train total loss: 0.07820
train state loss: 0.06479
train reward loss: 0.01340
val total loss: 0.08402
val state loss: 0.06298
val reward loss: 0.02104
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 67%|██████▋   | 134/200 [33:26<16:22, 14.89s/it]

time elapsed: 0:33:27
num of updates: 13400
train total loss: 0.07826
train state loss: 0.06456
train reward loss: 0.01370
val total loss: 0.08734
val state loss: 0.06327
val reward loss: 0.02407
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 68%|██████▊   | 135/200 [33:40<15:56, 14.71s/it]

time elapsed: 0:33:41
num of updates: 13500
train total loss: 0.07680
train state loss: 0.06371
train reward loss: 0.01309
val total loss: 0.08378
val state loss: 0.06184
val reward loss: 0.02193
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 68%|██████▊   | 136/200 [33:55<15:37, 14.65s/it]

time elapsed: 0:33:56
num of updates: 13600
train total loss: 0.07792
train state loss: 0.06404
train reward loss: 0.01388
val total loss: 0.08419
val state loss: 0.06268
val reward loss: 0.02151
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 68%|██████▊   | 137/200 [34:09<15:25, 14.69s/it]

time elapsed: 0:34:10
num of updates: 13700
train total loss: 0.07519
train state loss: 0.06231
train reward loss: 0.01289
val total loss: 0.08527
val state loss: 0.06608
val reward loss: 0.01919
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 69%|██████▉   | 138/200 [34:24<15:12, 14.72s/it]

time elapsed: 0:34:25
num of updates: 13800
train total loss: 0.07698
train state loss: 0.06376
train reward loss: 0.01322
val total loss: 0.08686
val state loss: 0.06379
val reward loss: 0.02307
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 70%|██████▉   | 139/200 [34:40<15:18, 15.05s/it]

time elapsed: 0:34:41
num of updates: 13900
train total loss: 0.07597
train state loss: 0.06275
train reward loss: 0.01322
val total loss: 0.08317
val state loss: 0.06182
val reward loss: 0.02135
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 70%|███████   | 140/200 [34:56<15:10, 15.17s/it]

time elapsed: 0:34:56
num of updates: 14000
train total loss: 0.07588
train state loss: 0.06202
train reward loss: 0.01386
val total loss: 0.08547
val state loss: 0.06540
val reward loss: 0.02007
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 70%|███████   | 141/200 [35:10<14:50, 15.10s/it]

time elapsed: 0:35:11
num of updates: 14100
train total loss: 0.07512
train state loss: 0.06207
train reward loss: 0.01305
val total loss: 0.08448
val state loss: 0.06491
val reward loss: 0.01958
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 71%|███████   | 142/200 [35:25<14:22, 14.87s/it]

time elapsed: 0:35:26
num of updates: 14200
train total loss: 0.07521
train state loss: 0.06211
train reward loss: 0.01310
val total loss: 0.08433
val state loss: 0.06217
val reward loss: 0.02216
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 72%|███████▏  | 143/200 [35:41<14:24, 15.16s/it]

time elapsed: 0:35:42
num of updates: 14300
train total loss: 0.07426
train state loss: 0.06134
train reward loss: 0.01291
val total loss: 0.08292
val state loss: 0.06145
val reward loss: 0.02147
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 72%|███████▏  | 144/200 [36:01<15:40, 16.80s/it]

time elapsed: 0:36:02
num of updates: 14400
train total loss: 0.07481
train state loss: 0.06154
train reward loss: 0.01327
val total loss: 0.08774
val state loss: 0.05987
val reward loss: 0.02787
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 72%|███████▎  | 145/200 [36:16<14:51, 16.21s/it]

time elapsed: 0:36:17
num of updates: 14500
train total loss: 0.07452
train state loss: 0.06137
train reward loss: 0.01315
val total loss: 0.08161
val state loss: 0.06066
val reward loss: 0.02095
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 73%|███████▎  | 146/200 [36:31<14:09, 15.73s/it]

time elapsed: 0:36:32
num of updates: 14600
train total loss: 0.07320
train state loss: 0.06042
train reward loss: 0.01278
val total loss: 0.08554
val state loss: 0.06209
val reward loss: 0.02346
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 74%|███████▎  | 147/200 [36:46<13:39, 15.47s/it]

time elapsed: 0:36:46
num of updates: 14700
train total loss: 0.07324
train state loss: 0.06063
train reward loss: 0.01261
val total loss: 0.07900
val state loss: 0.06049
val reward loss: 0.01851
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 74%|███████▍  | 148/200 [37:01<13:25, 15.49s/it]

time elapsed: 0:37:02
num of updates: 14800
train total loss: 0.07298
train state loss: 0.06002
train reward loss: 0.01296
val total loss: 0.07924
val state loss: 0.06073
val reward loss: 0.01851
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 74%|███████▍  | 149/200 [37:16<12:56, 15.23s/it]

time elapsed: 0:37:17
num of updates: 14900
train total loss: 0.07223
train state loss: 0.05977
train reward loss: 0.01247
val total loss: 0.08211
val state loss: 0.06198
val reward loss: 0.02012
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 75%|███████▌  | 150/200 [37:30<12:29, 14.99s/it]

time elapsed: 0:37:31
num of updates: 15000
train total loss: 0.07260
train state loss: 0.05993
train reward loss: 0.01267
val total loss: 0.08237
val state loss: 0.06205
val reward loss: 0.02032
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 76%|███████▌  | 151/200 [37:44<11:58, 14.66s/it]

time elapsed: 0:37:45
num of updates: 15100
train total loss: 0.07135
train state loss: 0.05921
train reward loss: 0.01214
val total loss: 0.07884
val state loss: 0.05802
val reward loss: 0.02082
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 76%|███████▌  | 152/200 [37:59<11:44, 14.67s/it]

time elapsed: 0:38:00
num of updates: 15200
train total loss: 0.07181
train state loss: 0.05928
train reward loss: 0.01252
val total loss: 0.08389
val state loss: 0.06316
val reward loss: 0.02073
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 76%|███████▋  | 153/200 [38:13<11:25, 14.59s/it]

time elapsed: 0:38:14
num of updates: 15300
train total loss: 0.07038
train state loss: 0.05815
train reward loss: 0.01222
val total loss: 0.08102
val state loss: 0.05916
val reward loss: 0.02186
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 77%|███████▋  | 154/200 [38:28<11:12, 14.63s/it]

time elapsed: 0:38:29
num of updates: 15400
train total loss: 0.07111
train state loss: 0.05889
train reward loss: 0.01222
val total loss: 0.08101
val state loss: 0.06310
val reward loss: 0.01791
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 78%|███████▊  | 155/200 [38:43<10:59, 14.65s/it]

time elapsed: 0:38:43
num of updates: 15500
train total loss: 0.07135
train state loss: 0.05900
train reward loss: 0.01235
val total loss: 0.07797
val state loss: 0.05838
val reward loss: 0.01959
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 78%|███████▊  | 156/200 [38:58<10:55, 14.90s/it]

time elapsed: 0:38:59
num of updates: 15600
train total loss: 0.06906
train state loss: 0.05708
train reward loss: 0.01197
val total loss: 0.08152
val state loss: 0.06160
val reward loss: 0.01991
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 78%|███████▊  | 157/200 [39:13<10:44, 14.98s/it]

time elapsed: 0:39:14
num of updates: 15700
train total loss: 0.07067
train state loss: 0.05818
train reward loss: 0.01249
val total loss: 0.08670
val state loss: 0.06323
val reward loss: 0.02348
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 79%|███████▉  | 158/200 [39:28<10:30, 15.01s/it]

time elapsed: 0:39:29
num of updates: 15800
train total loss: 0.06942
train state loss: 0.05752
train reward loss: 0.01190
val total loss: 0.08343
val state loss: 0.06282
val reward loss: 0.02060
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 80%|███████▉  | 159/200 [39:43<10:05, 14.76s/it]

time elapsed: 0:39:43
num of updates: 15900
train total loss: 0.06937
train state loss: 0.05721
train reward loss: 0.01216
val total loss: 0.07915
val state loss: 0.05834
val reward loss: 0.02081
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 80%|████████  | 160/200 [39:57<09:51, 14.78s/it]

time elapsed: 0:39:58
num of updates: 16000
train total loss: 0.06874
train state loss: 0.05687
train reward loss: 0.01187
val total loss: 0.07954
val state loss: 0.05998
val reward loss: 0.01956
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 80%|████████  | 161/200 [40:12<09:30, 14.62s/it]

time elapsed: 0:40:12
num of updates: 16100
train total loss: 0.06691
train state loss: 0.05547
train reward loss: 0.01144
val total loss: 0.07464
val state loss: 0.05922
val reward loss: 0.01542
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 81%|████████  | 162/200 [40:26<09:14, 14.59s/it]

time elapsed: 0:40:27
num of updates: 16200
train total loss: 0.06746
train state loss: 0.05568
train reward loss: 0.01177
val total loss: 0.08131
val state loss: 0.06015
val reward loss: 0.02116
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 82%|████████▏ | 163/200 [40:41<08:59, 14.58s/it]

time elapsed: 0:40:42
num of updates: 16300
train total loss: 0.06750
train state loss: 0.05578
train reward loss: 0.01172
val total loss: 0.07867
val state loss: 0.05804
val reward loss: 0.02063
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 82%|████████▏ | 164/200 [40:56<08:57, 14.93s/it]

time elapsed: 0:40:57
num of updates: 16400
train total loss: 0.06797
train state loss: 0.05588
train reward loss: 0.01208
val total loss: 0.07960
val state loss: 0.05801
val reward loss: 0.02159
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 82%|████████▎ | 165/200 [41:11<08:43, 14.96s/it]

time elapsed: 0:41:12
num of updates: 16500
train total loss: 0.06779
train state loss: 0.05572
train reward loss: 0.01207
val total loss: 0.07830
val state loss: 0.05883
val reward loss: 0.01947
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 83%|████████▎ | 166/200 [41:26<08:24, 14.84s/it]

time elapsed: 0:41:27
num of updates: 16600
train total loss: 0.06524
train state loss: 0.05405
train reward loss: 0.01118
val total loss: 0.08303
val state loss: 0.06131
val reward loss: 0.02172
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 84%|████████▎ | 167/200 [41:40<08:02, 14.62s/it]

time elapsed: 0:41:41
num of updates: 16700
train total loss: 0.06471
train state loss: 0.05378
train reward loss: 0.01093
val total loss: 0.08106
val state loss: 0.06199
val reward loss: 0.01907
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 84%|████████▍ | 168/200 [41:54<07:45, 14.56s/it]

time elapsed: 0:41:55
num of updates: 16800
train total loss: 0.06585
train state loss: 0.05426
train reward loss: 0.01160
val total loss: 0.07928
val state loss: 0.05869
val reward loss: 0.02059
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 84%|████████▍ | 169/200 [42:09<07:31, 14.55s/it]

time elapsed: 0:42:10
num of updates: 16900
train total loss: 0.06621
train state loss: 0.05475
train reward loss: 0.01146
val total loss: 0.07673
val state loss: 0.05877
val reward loss: 0.01796
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 85%|████████▌ | 170/200 [42:24<07:16, 14.56s/it]

time elapsed: 0:42:24
num of updates: 17000
train total loss: 0.06514
train state loss: 0.05388
train reward loss: 0.01126
val total loss: 0.07662
val state loss: 0.05671
val reward loss: 0.01991
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 86%|████████▌ | 171/200 [42:38<07:04, 14.65s/it]

time elapsed: 0:42:39
num of updates: 17100
train total loss: 0.06571
train state loss: 0.05436
train reward loss: 0.01135
val total loss: 0.08016
val state loss: 0.06128
val reward loss: 0.01888
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 86%|████████▌ | 172/200 [42:54<06:57, 14.90s/it]

time elapsed: 0:42:55
num of updates: 17200
train total loss: 0.06408
train state loss: 0.05312
train reward loss: 0.01096
val total loss: 0.07300
val state loss: 0.05268
val reward loss: 0.02031
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 86%|████████▋ | 173/200 [43:09<06:39, 14.81s/it]

time elapsed: 0:43:09
num of updates: 17300
train total loss: 0.06549
train state loss: 0.05406
train reward loss: 0.01143
val total loss: 0.07776
val state loss: 0.05870
val reward loss: 0.01906
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 87%|████████▋ | 174/200 [43:23<06:21, 14.66s/it]

time elapsed: 0:43:24
num of updates: 17400
train total loss: 0.06489
train state loss: 0.05362
train reward loss: 0.01127
val total loss: 0.07430
val state loss: 0.05668
val reward loss: 0.01762
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 88%|████████▊ | 175/200 [43:37<06:03, 14.53s/it]

time elapsed: 0:43:38
num of updates: 17500
train total loss: 0.06482
train state loss: 0.05352
train reward loss: 0.01130
val total loss: 0.08416
val state loss: 0.05980
val reward loss: 0.02436
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 88%|████████▊ | 176/200 [43:52<05:53, 14.75s/it]

time elapsed: 0:43:53
num of updates: 17600
train total loss: 0.06391
train state loss: 0.05288
train reward loss: 0.01103
val total loss: 0.07598
val state loss: 0.05805
val reward loss: 0.01793
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 88%|████████▊ | 177/200 [44:07<05:37, 14.70s/it]

time elapsed: 0:44:08
num of updates: 17700
train total loss: 0.06425
train state loss: 0.05319
train reward loss: 0.01106
val total loss: 0.07619
val state loss: 0.05739
val reward loss: 0.01881
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 89%|████████▉ | 178/200 [44:22<05:25, 14.78s/it]

time elapsed: 0:44:23
num of updates: 17800
train total loss: 0.06345
train state loss: 0.05244
train reward loss: 0.01101
val total loss: 0.07576
val state loss: 0.05686
val reward loss: 0.01890
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 90%|████████▉ | 179/200 [44:38<05:17, 15.12s/it]

time elapsed: 0:44:39
num of updates: 17900
train total loss: 0.06427
train state loss: 0.05297
train reward loss: 0.01129
val total loss: 0.08244
val state loss: 0.05946
val reward loss: 0.02298
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 90%|█████████ | 180/200 [44:53<05:04, 15.24s/it]

time elapsed: 0:44:54
num of updates: 18000
train total loss: 0.06376
train state loss: 0.05270
train reward loss: 0.01106
val total loss: 0.07628
val state loss: 0.05801
val reward loss: 0.01827
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 90%|█████████ | 181/200 [45:08<04:45, 15.04s/it]

time elapsed: 0:45:09
num of updates: 18100
train total loss: 0.06222
train state loss: 0.05159
train reward loss: 0.01063
val total loss: 0.07610
val state loss: 0.05769
val reward loss: 0.01841
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 91%|█████████ | 182/200 [45:22<04:25, 14.77s/it]

time elapsed: 0:45:23
num of updates: 18200
train total loss: 0.06335
train state loss: 0.05244
train reward loss: 0.01092
val total loss: 0.08142
val state loss: 0.05850
val reward loss: 0.02292
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 92%|█████████▏| 183/200 [45:36<04:07, 14.55s/it]

time elapsed: 0:45:37
num of updates: 18300
train total loss: 0.06079
train state loss: 0.05053
train reward loss: 0.01026
val total loss: 0.08153
val state loss: 0.05849
val reward loss: 0.02303
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 92%|█████████▏| 184/200 [45:50<03:52, 14.50s/it]

time elapsed: 0:45:51
num of updates: 18400
train total loss: 0.06303
train state loss: 0.05170
train reward loss: 0.01133
val total loss: 0.07880
val state loss: 0.05915
val reward loss: 0.01966
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 92%|█████████▎| 185/200 [46:05<03:37, 14.50s/it]

time elapsed: 0:46:06
num of updates: 18500
train total loss: 0.06152
train state loss: 0.05078
train reward loss: 0.01074
val total loss: 0.07605
val state loss: 0.05653
val reward loss: 0.01952
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 93%|█████████▎| 186/200 [46:20<03:23, 14.54s/it]

time elapsed: 0:46:20
num of updates: 18600
train total loss: 0.06140
train state loss: 0.05091
train reward loss: 0.01049
val total loss: 0.06865
val state loss: 0.05363
val reward loss: 0.01502
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 94%|█████████▎| 187/200 [46:34<03:10, 14.62s/it]

time elapsed: 0:46:35
num of updates: 18700
train total loss: 0.06200
train state loss: 0.05111
train reward loss: 0.01089
val total loss: 0.06931
val state loss: 0.05287
val reward loss: 0.01644
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 94%|█████████▍| 188/200 [46:50<02:58, 14.89s/it]

time elapsed: 0:46:51
num of updates: 18800
train total loss: 0.06018
train state loss: 0.04976
train reward loss: 0.01042
val total loss: 0.07034
val state loss: 0.05581
val reward loss: 0.01453
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 94%|█████████▍| 189/200 [47:05<02:43, 14.88s/it]

time elapsed: 0:47:06
num of updates: 18900
train total loss: 0.06083
train state loss: 0.05055
train reward loss: 0.01028
val total loss: 0.07200
val state loss: 0.05553
val reward loss: 0.01648
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 95%|█████████▌| 190/200 [47:19<02:27, 14.74s/it]

time elapsed: 0:47:20
num of updates: 19000
train total loss: 0.06095
train state loss: 0.05023
train reward loss: 0.01073
val total loss: 0.07241
val state loss: 0.05324
val reward loss: 0.01917
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 96%|█████████▌| 191/200 [47:33<02:10, 14.48s/it]

time elapsed: 0:47:34
num of updates: 19100
train total loss: 0.06134
train state loss: 0.05054
train reward loss: 0.01081
val total loss: 0.07957
val state loss: 0.05833
val reward loss: 0.02124
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 96%|█████████▌| 192/200 [47:47<01:54, 14.37s/it]

time elapsed: 0:47:48
num of updates: 19200
train total loss: 0.05983
train state loss: 0.04950
train reward loss: 0.01034
val total loss: 0.07181
val state loss: 0.05393
val reward loss: 0.01788
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 96%|█████████▋| 193/200 [48:02<01:41, 14.46s/it]

time elapsed: 0:48:03
num of updates: 19300
train total loss: 0.06041
train state loss: 0.04986
train reward loss: 0.01056
val total loss: 0.07631
val state loss: 0.05804
val reward loss: 0.01827
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 97%|█████████▋| 194/200 [48:17<01:27, 14.61s/it]

time elapsed: 0:48:18
num of updates: 19400
train total loss: 0.05980
train state loss: 0.04942
train reward loss: 0.01037
val total loss: 0.07019
val state loss: 0.05470
val reward loss: 0.01548
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 98%|█████████▊| 195/200 [48:32<01:13, 14.68s/it]

time elapsed: 0:48:33
num of updates: 19500
train total loss: 0.05908
train state loss: 0.04898
train reward loss: 0.01010
val total loss: 0.07731
val state loss: 0.05715
val reward loss: 0.02016
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 98%|█████████▊| 196/200 [48:47<00:59, 14.88s/it]

time elapsed: 0:48:48
num of updates: 19600
train total loss: 0.05855
train state loss: 0.04861
train reward loss: 0.00994
val total loss: 0.07537
val state loss: 0.05679
val reward loss: 0.01857
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 98%|█████████▊| 197/200 [49:02<00:45, 15.05s/it]

time elapsed: 0:49:03
num of updates: 19700
train total loss: 0.05785
train state loss: 0.04799
train reward loss: 0.00987
val total loss: 0.07570
val state loss: 0.05457
val reward loss: 0.02113
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


 99%|█████████▉| 198/200 [49:17<00:30, 15.03s/it]

time elapsed: 0:49:18
num of updates: 19800
train total loss: 0.05900
train state loss: 0.04889
train reward loss: 0.01012
val total loss: 0.07684
val state loss: 0.05480
val reward loss: 0.02204
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


100%|█████████▉| 199/200 [49:32<00:14, 14.76s/it]

time elapsed: 0:49:32
num of updates: 19900
train total loss: 0.05970
train state loss: 0.04938
train reward loss: 0.01032
val total loss: 0.07327
val state loss: 0.05498
val reward loss: 0.01829
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt


100%|██████████| 200/200 [49:46<00:00, 14.93s/it]

time elapsed: 0:49:46
num of updates: 20000
train total loss: 0.05931
train state loss: 0.04904
train reward loss: 0.01026
val total loss: 0.07241
val state loss: 0.05611
val reward loss: 0.01630
saving min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saving current model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt
finished training!
started training at: 24-05-08-16-16-00
finished training at: 24-05-08-17-05-46
total training time: 0:49:46
saved min loss model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00_best.pt
saved last updated model at: ./log/dt_halfcheetah_model_24-05-08-16-16-00.pt





AttributeError: '_csv.writer' object has no attribute 'close'