In [1]:
# import library

import sys
import os
import d4rl
import gym
import numpy as np
import collections
import pickle
import csv

import torch
import torch.nn as nn
import torch.nn.functional as F

from datetime import datetime
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

from utils import D4RLTrajectoryDataset
from model import DecisionTransformer


No module named 'flow'
No module named 'carla'


In [2]:
# set environment
# sys.path.append(r'C:\Develop\offlineRL-with-diffusion') 

In [3]:
# test mujoco, d4rl

!python ./test/mujoco_test.py

mujoco-py check passed
d4rl check passed


No module named 'flow'
No module named 'carla'
pybullet build time: Apr 30 2024 12:01:25
  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [4]:
# data download
# if you downloaded, don't re-start.

# !python ./data/download_d4rl_datasets.py


In [5]:
# parameter setting

env_name = 'halfcheetah'
dataset = 'medium'

if env_name == 'hopper':
    env = gym.make('Hopper-v2')
    max_ep_len = 1000
    # env_targets = [3600, 1800]  # evaluation conditioning targets
    # scale = 1000.  # normalization for rewards/returns
elif env_name == 'halfcheetah':
    env = gym.make('HalfCheetah-v2')
    max_ep_len = 1000
    # env_targets = [12000, 6000]
    # scale = 1000.
elif env_name == 'walker2d':
    env = gym.make('Walker2d-v2')
    max_ep_len = 1000
    # env_targets = [5000, 2500]
    # scale = 1000.

DATA_PATH = f'data/train/{env_name}-{dataset}-v2.pkl'
VAL_DATA_PATH = f'data/val/val_{env_name}-{dataset}-v2.pkl'
TEMP_DATA_PATH = f'data/temp/{env_name}-{dataset}-v2.pkl'
LOG_PATH = "./log/"
if torch.cuda.is_available():
    DEVICE = torch.device('cuda:0')
else:
    DEVICE = torch.device('cpu')

  logger.warn(


In [6]:
# env dataset check
check_env = gym.make('halfcheetah-medium-v2')
dataset = check_env.get_dataset()

# print(dataset['observations'][1]) # trajectory 단위로 뽑힘.


  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
load datafile: 100%|██████████| 21/21 [00:02<00:00,  8.04it/s]


In [7]:
# print("overall len: ", dataset.shape)

In [8]:
print("state shape: ", dataset['observations'].shape)
print("action shape: ", dataset['actions'].shape)
print("reward shape: ", dataset['rewards'].shape)
print("N: ", dataset['rewards'].shape[0])
print("train_size: ", int(0.8 * dataset['rewards'].shape[0]))


state shape:  (1000000, 17)
action shape:  (1000000, 6)
reward shape:  (1000000,)
N:  1000000
train_size:  800000


In [9]:
# data check
# check three trajectories

with open(DATA_PATH, 'rb') as f:
    trajectories = pickle.load(f)
n=0
max_rewards_list = []
for traj in trajectories:
    # print(f"{n+1}번째 trajectory")
    # print("traj: ", traj)
    print("state: ", traj['observations'], "\n")
    # print("action: ", traj['actions'], "\n")
    # print("next_state: ", traj['next_observations'], "\n")
    # print("reward: ", traj['rewards'], "\n")
    # print("max_rewards: ", max(traj['rewards']))
    # max_rewards_list.append(max(traj['rewards']))
    # print("")
    n+=1
    
# print(max(max_rewards_list))

    if n==3:
        break

state:  [[ 1.9831914e-02 -8.9501314e-02 -3.1969063e-03 ...  1.1365079e-01
   6.8424918e-02 -1.3811582e-01]
 [-3.8486063e-03 -5.2394319e-02  8.3050327e-03 ...  4.5068407e+00
  -9.2885571e+00  4.7328596e+00]
 [-5.5298433e-02 -7.7850236e-05 -2.3952831e-01 ... -7.0811687e+00
  -1.4037068e+00  7.5524049e+00]
 ...
 [-3.1975684e-01  5.3305399e-01 -4.8704177e-01 ...  1.5455554e+00
   2.6812897e+00  8.7905388e+00]
 [-3.2200974e-01  3.5745117e-01  1.0463273e-02 ... -6.3428599e-01
   1.6292539e+00  9.7356015e-01]
 [-3.0673215e-01  1.9843711e-01  6.9996923e-01 ...  5.0098950e-01
   1.5680059e+00  9.4733723e-02]] 

state:  [[ 4.7026437e-02 -2.1588113e-02  4.9151547e-02 ...  5.5219561e-02
  -1.5351681e-01 -4.6239123e-02]
 [ 4.1392505e-02  5.3802542e-02 -1.5022255e-01 ...  6.1133021e-01
  -7.4645710e+00  7.9509692e+00]
 [ 9.8547200e-04  8.8533267e-02 -4.3876743e-01 ...  8.5824745e-04
   5.9796906e+00  4.9521341e+00]
 ...
 [-1.4081973e-01 -7.7957302e-02 -2.6429656e-01 ...  1.0316861e+00
  -7.5645506e-

In [10]:
# check original data shape
with open(TEMP_DATA_PATH, 'rb') as f:
    temp_trajectories = pickle.load(f)
    
print("length: ", len(temp_trajectories)*len(temp_trajectories[0]['observations']))
print("n of epi: ", len(temp_trajectories))
print("n of traj in one epi: ", len(temp_trajectories[0]['observations']))

length:  1000000
n of epi:  1000
n of traj in one epi:  1000


In [11]:
# check original dataset
cnt = 0


for ori in temp_trajectories:
    print(ori['observations'].shape)
    
    if cnt >= 5:
        break
    
    cnt+=1

(1000, 17)
(1000, 17)
(1000, 17)
(1000, 17)
(1000, 17)
(1000, 17)


In [12]:
states, next_states, rewards = [], [], []
for traj in temp_trajectories:
    # print(traj)
    traj_len = traj['observations'].shape[0]
    states.append(traj['observations'])
    next_states.append(traj['next_observations'])
    rewards.append(traj['rewards'])
    # # calculate returns to go and rescale them
    # traj['returns_to_go'] = discount_cumsum(traj['rewards'], 1.0) / rtg_scale
    
states = np.concatenate(states, axis=0)
print("state shape: ", states.shape)

state shape:  (1000000, 17)


In [13]:
# check train data shape
with open(DATA_PATH, 'rb') as f:
    train_trajectories = pickle.load(f)

print("length: ", len(train_trajectories)*len(train_trajectories[0]['observations']))
print("n of epi: ", len(train_trajectories))
print("n of traj in one epi: ", len(train_trajectories[0]['observations']))
# print("train state shape: ", train_trajectories['observations'].shape)
# print("train action shape: ", train_trajectories['actions'].shape)
# print("train reward shape: ", train_trajectories['rewards'].shape)


length:  800000
n of epi:  800
n of traj in one epi:  1000


In [14]:
# check valid data shape
with open(VAL_DATA_PATH, 'rb') as f:
    val_trajectories = pickle.load(f)

print("length: ", len(val_trajectories)*len(val_trajectories[0]['observations']))
print("n of epi: ", len(val_trajectories))
print("n of traj in one epi: ", len(val_trajectories[0]['observations']))
# print("val state shape: ", val_trajectories['observations'].shape)
# print("val action shape: ", val_trajectories['actions'].shape)
# print("val reward shape: ", val_trajectories['rewards'].shape)

length:  200000
n of epi:  200
n of traj in one epi:  1000


In [15]:
# train parameter
batch_size = 64
embed_dim = 128
activation = 'relu'
drop_out = 0.1
k = 31 # content len
n_blocks = 3
n_heads = 1 # transformer head

# total updates = max_train_iters x num_updates_per_iter
max_train_iters = 400
num_updates_per_iter = 100
# num_val_iter = 100
total_updates = 0
min_total_log_loss = 1e10

wt_decay = 1e-4             # weight decay
lr = 1e-4                   # learning rate
warmup_steps = 10000        # warmup steps for lr scheduler

# weight of mse loss
state_weight = 1
reward_weight = 1

# evaluation parameter
# max_eval_ep_len = 1000      # max len of one evaluation episode
# num_eval_ep = 10            # num of evaluation episodes per iteration

In [16]:
# check dim

state_dim = env.observation_space.shape[0]
act_dim = env.action_space.shape[0]

print("state dim: ", state_dim)
print("action dim: ", act_dim)

state dim:  17
action dim:  6


In [17]:
# test data
temp_dataset = D4RLTrajectoryDataset(DATA_PATH, 2)
temp_data_loader = DataLoader(temp_dataset,
						batch_size=32,
						shuffle=True,
						pin_memory=True,
						drop_last=True)
                        
temp_data_iter = iter(temp_data_loader)

timesteps, states, next_states, actions, rewards, traj_mask = next(temp_data_iter)

timesteps = timesteps.to(DEVICE)	# B x T
states = states.to(DEVICE)			# B x T x state_dim
next_states = next_states.to(DEVICE) # B X T X state_dim
actions = actions.to(DEVICE)		# B x T x act_dim
rewards = rewards.to(DEVICE).unsqueeze(dim=-1) # B x T x 1

print("timesteps shape: ", timesteps.shape)
print("rewards shape: ", rewards.shape)
print("states shape: ", states.shape)
print("actions shape: ", actions.shape)

# print("state: ", states)
# print("action: ", actions)
# print("rewards: ", rewards)



timesteps shape:  torch.Size([32, 2])
rewards shape:  torch.Size([32, 2, 1])
states shape:  torch.Size([32, 2, 17])
actions shape:  torch.Size([32, 2, 6])


In [18]:
# test model

temp_model = DecisionTransformer(
			state_dim=state_dim,
			act_dim=act_dim,
			# reward 포함 + r0 제외
			n_blocks=n_blocks,
			h_dim=16,
			context_len=2,
			n_heads=n_heads,
			drop_p=drop_out,
		).to(DEVICE)
		
next_state_preds, rewards_preds = temp_model.forward(
												rewards=rewards,
												timesteps=timesteps,
												states=states,
												actions=actions,
											)

In [19]:
# continue train test
test_traj_dataset = D4RLTrajectoryDataset(DATA_PATH, k)
test_traj_data_loader = DataLoader(test_traj_dataset,
						batch_size=batch_size,
						shuffle=True,
						pin_memory=True,
						drop_last=True)
                        
test_data_iter = iter(test_traj_data_loader)


for i_train_iter in tqdm(range(max_train_iters)):
	
	for _ in range(num_updates_per_iter):
		try:
			timesteps, states, next_states, actions, rewards, traj_mask = next(test_data_iter)
		except StopIteration:
			test_traj_data_loader = DataLoader(test_traj_dataset,
						batch_size=batch_size,
						shuffle=True,
						pin_memory=True,
						drop_last=True)
						
			test_data_iter = iter(test_traj_data_loader)
			timesteps, states, next_states, actions, rewards, traj_mask = next(test_data_iter)

100%|██████████| 400/400 [01:34<00:00,  4.25it/s]


In [20]:
# load train preprocessing(normalization, fit padding) data

traj_dataset = D4RLTrajectoryDataset(DATA_PATH, k)
traj_data_loader = DataLoader(traj_dataset,
						batch_size=batch_size,
						shuffle=True,
						pin_memory=True,
						drop_last=True)
                        
data_iter = iter(traj_data_loader)

## get state stats from dataset
state_mean, state_std = traj_dataset.get_state_stats()

In [21]:
# load validate preprocessing(normalization, fit padding) data

val_traj_dataset = D4RLTrajectoryDataset(DATA_PATH, k, val=True, val_dataset_path=VAL_DATA_PATH)
val_traj_data_loader = DataLoader(val_traj_dataset,
						batch_size=batch_size,
						shuffle=True,
						pin_memory=True,
						drop_last=True)
                        


In [22]:
# define model

model = DecisionTransformer(
			state_dim=state_dim,
			act_dim=act_dim,
			n_blocks=n_blocks,
			h_dim=embed_dim,
			context_len=k,
			n_heads=n_heads,
			drop_p=drop_out,
		).to(DEVICE)
  
optimizer = torch.optim.AdamW(
					model.parameters(), 
					lr=lr, 
					weight_decay=wt_decay
				)

scheduler = torch.optim.lr_scheduler.LambdaLR(
		optimizer,
		lambda steps: min((steps+1)/warmup_steps, 1)
	)
	


In [23]:
start_time = datetime.now().replace(microsecond=0)

start_time_str = start_time.strftime("%y-%m-%d-%H-%M-%S")

prefix = "dt_" + env_name

save_model_name =  prefix + "_model_" + str(max_train_iters) + "_" + str(batch_size) + ".pt"
save_model_path = os.path.join(LOG_PATH, save_model_name)
save_best_model_path = save_model_path[:-3] + "_best.pt"

log_csv_name = prefix + "_log_" + start_time_str + ".csv"
log_csv_path = os.path.join(LOG_PATH, log_csv_name)


csv_writer = csv.writer(open(log_csv_path, 'a', 1))
csv_header = (["duration", "num_updates", "total_loss", "state_loss", "reward_loss", "val_total_loss", "val_state_loss", "val_reward_loss"])

csv_writer.writerow(csv_header)


print("=" * 60)
print("start time: " + start_time_str)
print("=" * 60)

print("device set to: " + str(DEVICE))
print("dataset path: " + DATA_PATH)
print("model save path: " + save_model_path)
print("log csv save path: " + log_csv_path)

# train
for i_train_iter in tqdm(range(max_train_iters)):


	log_state_losses, log_reward_losses, log_total_losses = [], [], []
	val_log_state_losses, val_log_reward_losses, val_log_total_losses = [], [], []
	model.train()
	
	for _ in range(num_updates_per_iter):
		try:
			timesteps, states, next_states, actions, rewards, traj_mask = next(data_iter)
		except StopIteration:
			traj_data_loader = DataLoader(traj_dataset,
						batch_size=batch_size,
						shuffle=True,
						pin_memory=True,
						drop_last=True)
			data_iter = iter(traj_data_loader)
			timesteps, states, next_states, actions, rewards, traj_mask = next(data_iter)

		timesteps = timesteps.to(DEVICE)	# B x T
		states = states.to(DEVICE)			# B x T x state_dim
		next_states = next_states.to(DEVICE) # B X T X state_dim
		actions = actions.to(DEVICE)		# B x T x act_dim
		rewards = rewards.to(DEVICE).unsqueeze(dim=-1) # B x T x 1
		traj_mask = traj_mask.to(DEVICE)	# B x T

		next_states_target = torch.clone(next_states).detach().to(DEVICE)
		rewards_target = torch.clone(rewards).detach().to(DEVICE)
	
		next_state_preds, rewards_preds = model.forward(
														timesteps=timesteps,
														states=states,
														actions=actions,
														rewards=rewards,
													)

		# only consider non padded elements
		next_state_preds = next_state_preds.view(-1, state_dim)[traj_mask.view(-1,) > 0]
		next_states_target = next_states_target.view(-1, state_dim)[traj_mask.view(-1,) > 0]
		
		rewards_preds = rewards_preds.view(-1, 1)[traj_mask.view(-1,) > 0]
		rewards_target = rewards_target.view(-1, 1)[traj_mask.view(-1,) > 0]

		state_loss = F.mse_loss(next_state_preds, next_states_target, reduction='mean') * state_weight
		reward_loss = F.mse_loss(rewards_preds, rewards_target, reduction='mean') * reward_weight
		
		total_loss = state_loss.add(reward_loss)
		total_loss = torch.mean(total_loss)

		optimizer.zero_grad()
		total_loss.backward()
		torch.nn.utils.clip_grad_norm_(model.parameters(), 0.25)
		optimizer.step()
		scheduler.step()
		
		
		#save loss
		log_state_losses.append(state_loss.detach().cpu().item())
		log_reward_losses.append(reward_loss.detach().cpu().item())
		
		log_total_losses.append(total_loss.detach().cpu().item())
		
	# validation
	model.eval()
	for val_timesteps, val_states, val_next_states, val_actions, val_rewards, val_traj_mask in val_traj_data_loader:
		
		val_timesteps = val_timesteps.to(DEVICE)	# B x T
		val_states = val_states.to(DEVICE)			# B x T x state_dim
		val_next_states = val_next_states.to(DEVICE) # B X T X state_dim
		val_actions = val_actions.to(DEVICE)		# B x T x act_dim
		val_rewards = val_rewards.to(DEVICE).unsqueeze(dim=-1) # B x T x 1
		val_traj_mask = val_traj_mask.to(DEVICE)	# B x T
				
		val_next_states_target = torch.clone(val_next_states).detach().to(DEVICE)
		val_rewards_target = torch.clone(val_rewards).detach().to(DEVICE)
		
		val_next_state_preds, val_rewards_preds = model.forward(
														timesteps=val_timesteps,
														states=val_states,
														actions=val_actions,
														rewards=val_rewards,
													)
													
		# only consider non padded elements
		val_next_state_preds = val_next_state_preds.view(-1, state_dim)[traj_mask.view(-1,) > 0]
		val_next_states_target = val_next_states_target.view(-1, state_dim)[traj_mask.view(-1,) > 0]
		
		val_rewards_preds = val_rewards_preds.view(-1, 1)[traj_mask.view(-1,) > 0]
		val_rewards_target = val_rewards_target.view(-1, 1)[traj_mask.view(-1,) > 0]

		val_state_loss = F.mse_loss(val_next_state_preds, val_next_states_target, reduction='mean') * state_weight
		val_reward_loss = F.mse_loss(val_rewards_preds, val_rewards_target, reduction='mean') * reward_weight

		# todo: try to use mae
		
		val_total_loss = val_state_loss.add(val_reward_loss)
		val_total_loss = torch.mean(val_total_loss)
		
		# save val loss
		val_log_state_losses.append(val_state_loss.detach().cpu().item())
		val_log_reward_losses.append(val_reward_loss.detach().cpu().item())
		
		val_log_total_losses.append(val_total_loss.detach().cpu().item())
	
	mean_total_log_loss = np.mean(log_total_losses)
	mean_state_log_loss = np.mean(log_state_losses)
	mean_reward_log_loss = np.mean(log_reward_losses)
	
	mean_val_total_log_loss = np.mean(val_log_total_losses)
	mean_val_state_log_loss = np.mean(val_log_state_losses)
	mean_val_reward_log_loss = np.mean(val_log_reward_losses)

	time_elapsed = str(datetime.now().replace(microsecond=0) - start_time)

	total_updates += num_updates_per_iter

	log_str = ("=" * 60 + '\n' +
			"time elapsed: " + time_elapsed  + '\n' +
			"num of updates: " + str(total_updates) + '\n' +
			"train total loss: " + format(mean_total_log_loss, ".5f") + '\n' +
			"train state loss: " + format(mean_state_log_loss, ".5f") + '\n' +
			"train reward loss: " +  format(mean_reward_log_loss, ".5f") + '\n' +
			"val total loss: " + format(mean_val_total_log_loss, ".5f") + '\n' +
			"val state loss: " + format(mean_val_state_log_loss, ".5f") + '\n' +
			"val reward loss: " +  format(mean_val_reward_log_loss, ".5f")
			)

	print(log_str)

	log_data = [time_elapsed, total_updates, mean_total_log_loss, mean_state_log_loss, mean_reward_log_loss, \
		 mean_val_total_log_loss, mean_val_state_log_loss, mean_val_reward_log_loss]

	csv_writer.writerow(log_data)
	
	# save model
	if mean_val_total_log_loss <= min_total_log_loss:
		print("saving min loss model at: " + save_best_model_path)
		torch.save(model.state_dict(), save_best_model_path)
		min_total_log_loss = mean_val_total_log_loss

	print("saving current model at: " + save_model_path)
	torch.save(model.state_dict(), save_model_path)


print("=" * 60)
print("finished training!")
print("=" * 60)
end_time = datetime.now().replace(microsecond=0)
time_elapsed = str(end_time - start_time)
end_time_str = end_time.strftime("%y-%m-%d-%H-%M-%S")
print("started training at: " + start_time_str)
print("finished training at: " + end_time_str)
print("total training time: " + time_elapsed)
print("saved min loss model at: " + save_best_model_path)
print("saved last updated model at: " + save_model_path)
print("=" * 60)

start time: 24-05-19-18-07-32
device set to: cpu
dataset path: data/train/halfcheetah-medium-v2.pkl
model save path: ./log/dt_halfcheetah_model_400_64.pt
log csv save path: ./log/dt_halfcheetah_log_24-05-19-18-07-32.csv


  0%|          | 1/400 [00:27<3:05:17, 27.86s/it]

time elapsed: 0:00:28
num of updates: 100
train total loss: 2.42268
train state loss: 1.36661
train reward loss: 1.05607
val total loss: 2.56406
val state loss: 1.42128
val reward loss: 1.14278
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  0%|          | 2/400 [00:54<3:00:42, 27.24s/it]

time elapsed: 0:00:54
num of updates: 200
train total loss: 2.32456
train state loss: 1.32182
train reward loss: 1.00274
val total loss: 2.44359
val state loss: 1.34692
val reward loss: 1.09667
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  1%|          | 3/400 [01:21<2:59:26, 27.12s/it]

time elapsed: 0:01:21
num of updates: 300
train total loss: 2.18691
train state loss: 1.27678
train reward loss: 0.91014
val total loss: 2.16324
val state loss: 1.31858
val reward loss: 0.84466
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  1%|          | 4/400 [01:48<2:57:41, 26.92s/it]

time elapsed: 0:01:48
num of updates: 400
train total loss: 1.97501
train state loss: 1.19889
train reward loss: 0.77612
val total loss: 1.99127
val state loss: 1.18875
val reward loss: 0.80252
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  1%|▏         | 5/400 [02:17<3:01:46, 27.61s/it]

time elapsed: 0:02:17
num of updates: 500
train total loss: 1.81728
train state loss: 1.16591
train reward loss: 0.65138
val total loss: 1.71986
val state loss: 1.09280
val reward loss: 0.62707
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  2%|▏         | 6/400 [02:54<3:23:57, 31.06s/it]

time elapsed: 0:02:55
num of updates: 600
train total loss: 1.64821
train state loss: 1.11867
train reward loss: 0.52953
val total loss: 1.54799
val state loss: 1.10220
val reward loss: 0.44579
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  2%|▏         | 7/400 [03:26<3:25:09, 31.32s/it]

time elapsed: 0:03:26
num of updates: 700
train total loss: 1.50106
train state loss: 1.06689
train reward loss: 0.43416
val total loss: 1.48773
val state loss: 1.08354
val reward loss: 0.40419
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  2%|▏         | 8/400 [03:58<3:25:14, 31.41s/it]

time elapsed: 0:03:58
num of updates: 800
train total loss: 1.42158
train state loss: 1.03447
train reward loss: 0.38711
val total loss: 1.35423
val state loss: 1.01275
val reward loss: 0.34147
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  2%|▏         | 9/400 [04:31<3:27:39, 31.87s/it]

time elapsed: 0:04:31
num of updates: 900
train total loss: 1.35314
train state loss: 1.00105
train reward loss: 0.35209
val total loss: 1.29039
val state loss: 0.98157
val reward loss: 0.30882
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  2%|▎         | 10/400 [05:01<3:24:45, 31.50s/it]

time elapsed: 0:05:02
num of updates: 1000
train total loss: 1.30901
train state loss: 0.98494
train reward loss: 0.32407
val total loss: 1.24030
val state loss: 0.94753
val reward loss: 0.29277
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  3%|▎         | 11/400 [05:31<3:20:28, 30.92s/it]

time elapsed: 0:05:31
num of updates: 1100
train total loss: 1.26553
train state loss: 0.96569
train reward loss: 0.29984
val total loss: 1.28595
val state loss: 0.99871
val reward loss: 0.28723
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  3%|▎         | 12/400 [06:04<3:23:23, 31.45s/it]

time elapsed: 0:06:04
num of updates: 1200
train total loss: 1.23147
train state loss: 0.94976
train reward loss: 0.28172
val total loss: 1.24564
val state loss: 0.97224
val reward loss: 0.27340
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  3%|▎         | 13/400 [06:37<3:26:33, 32.03s/it]

time elapsed: 0:06:37
num of updates: 1300
train total loss: 1.21183
train state loss: 0.94674
train reward loss: 0.26510
val total loss: 1.19731
val state loss: 0.95824
val reward loss: 0.23907
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  4%|▎         | 14/400 [07:10<3:28:25, 32.40s/it]

time elapsed: 0:07:10
num of updates: 1400
train total loss: 1.19872
train state loss: 0.94622
train reward loss: 0.25249
val total loss: 1.14651
val state loss: 0.92816
val reward loss: 0.21836
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  4%|▍         | 15/400 [07:40<3:22:48, 31.61s/it]

time elapsed: 0:07:40
num of updates: 1500
train total loss: 1.16633
train state loss: 0.93515
train reward loss: 0.23118
val total loss: 1.06393
val state loss: 0.87455
val reward loss: 0.18938
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  4%|▍         | 16/400 [08:09<3:17:30, 30.86s/it]

time elapsed: 0:08:09
num of updates: 1600
train total loss: 1.15432
train state loss: 0.93363
train reward loss: 0.22069
val total loss: 1.13807
val state loss: 0.93971
val reward loss: 0.19836
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  4%|▍         | 17/400 [08:39<3:14:59, 30.55s/it]

time elapsed: 0:08:39
num of updates: 1700
train total loss: 1.13313
train state loss: 0.92154
train reward loss: 0.21159
val total loss: 1.10677
val state loss: 0.91551
val reward loss: 0.19126
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  4%|▍         | 18/400 [09:10<3:15:10, 30.66s/it]

time elapsed: 0:09:10
num of updates: 1800
train total loss: 1.11729
train state loss: 0.91589
train reward loss: 0.20140
val total loss: 1.06509
val state loss: 0.87906
val reward loss: 0.18603
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  5%|▍         | 19/400 [09:40<3:14:04, 30.56s/it]

time elapsed: 0:09:40
num of updates: 1900
train total loss: 1.10050
train state loss: 0.90812
train reward loss: 0.19238
val total loss: 1.07685
val state loss: 0.90838
val reward loss: 0.16847
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  5%|▌         | 20/400 [10:11<3:13:58, 30.63s/it]

time elapsed: 0:10:11
num of updates: 2000
train total loss: 1.08657
train state loss: 0.90472
train reward loss: 0.18185
val total loss: 1.06323
val state loss: 0.89644
val reward loss: 0.16679
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  5%|▌         | 21/400 [10:41<3:13:11, 30.59s/it]

time elapsed: 0:10:42
num of updates: 2100
train total loss: 1.07684
train state loss: 0.90139
train reward loss: 0.17545
val total loss: 1.01005
val state loss: 0.85258
val reward loss: 0.15747
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  6%|▌         | 22/400 [11:14<3:16:22, 31.17s/it]

time elapsed: 0:11:14
num of updates: 2200
train total loss: 1.06181
train state loss: 0.89062
train reward loss: 0.17119
val total loss: 1.02833
val state loss: 0.87511
val reward loss: 0.15322
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  6%|▌         | 23/400 [11:44<3:13:34, 30.81s/it]

time elapsed: 0:11:44
num of updates: 2300
train total loss: 1.05760
train state loss: 0.89339
train reward loss: 0.16421
val total loss: 0.99049
val state loss: 0.85270
val reward loss: 0.13778
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  6%|▌         | 24/400 [12:13<3:10:34, 30.41s/it]

time elapsed: 0:12:14
num of updates: 2400
train total loss: 1.04049
train state loss: 0.88258
train reward loss: 0.15791
val total loss: 0.99990
val state loss: 0.85603
val reward loss: 0.14387
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  6%|▋         | 25/400 [12:54<3:28:47, 33.41s/it]

time elapsed: 0:12:54
num of updates: 2500
train total loss: 1.02625
train state loss: 0.87337
train reward loss: 0.15287
val total loss: 0.95944
val state loss: 0.82657
val reward loss: 0.13288
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  6%|▋         | 26/400 [13:50<4:11:25, 40.34s/it]

time elapsed: 0:13:51
num of updates: 2600
train total loss: 1.00530
train state loss: 0.85922
train reward loss: 0.14607
val total loss: 0.95409
val state loss: 0.82606
val reward loss: 0.12803
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  7%|▋         | 27/400 [14:34<4:16:29, 41.26s/it]

time elapsed: 0:14:34
num of updates: 2700
train total loss: 0.98579
train state loss: 0.84483
train reward loss: 0.14097
val total loss: 0.93795
val state loss: 0.81411
val reward loss: 0.12383
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  7%|▋         | 28/400 [15:09<4:04:53, 39.50s/it]

time elapsed: 0:15:09
num of updates: 2800
train total loss: 0.96134
train state loss: 0.82565
train reward loss: 0.13569
val total loss: 0.93040
val state loss: 0.80959
val reward loss: 0.12081
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  7%|▋         | 29/400 [15:46<3:58:36, 38.59s/it]

time elapsed: 0:15:46
num of updates: 2900
train total loss: 0.92459
train state loss: 0.79560
train reward loss: 0.12898
val total loss: 0.87415
val state loss: 0.76176
val reward loss: 0.11239
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  8%|▊         | 30/400 [16:25<3:59:50, 38.89s/it]

time elapsed: 0:16:25
num of updates: 3000
train total loss: 0.89285
train state loss: 0.76885
train reward loss: 0.12400
val total loss: 0.82710
val state loss: 0.71853
val reward loss: 0.10857
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  8%|▊         | 31/400 [17:02<3:55:17, 38.26s/it]

time elapsed: 0:17:02
num of updates: 3100
train total loss: 0.85148
train state loss: 0.73220
train reward loss: 0.11928
val total loss: 0.79710
val state loss: 0.69224
val reward loss: 0.10485
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  8%|▊         | 32/400 [17:38<3:51:09, 37.69s/it]

time elapsed: 0:17:39
num of updates: 3200
train total loss: 0.80966
train state loss: 0.69483
train reward loss: 0.11483
val total loss: 0.74456
val state loss: 0.65427
val reward loss: 0.09030
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  8%|▊         | 33/400 [18:34<4:22:56, 42.99s/it]

time elapsed: 0:18:34
num of updates: 3300
train total loss: 0.76167
train state loss: 0.65385
train reward loss: 0.10782
val total loss: 0.69421
val state loss: 0.60486
val reward loss: 0.08935
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  8%|▊         | 34/400 [19:12<4:13:20, 41.53s/it]

time elapsed: 0:19:12
num of updates: 3400
train total loss: 0.72156
train state loss: 0.61882
train reward loss: 0.10274
val total loss: 0.66727
val state loss: 0.58238
val reward loss: 0.08489
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  9%|▉         | 35/400 [19:47<4:01:38, 39.72s/it]

time elapsed: 0:19:48
num of updates: 3500
train total loss: 0.67696
train state loss: 0.58022
train reward loss: 0.09674
val total loss: 0.61531
val state loss: 0.53481
val reward loss: 0.08050
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  9%|▉         | 36/400 [20:28<4:02:30, 39.97s/it]

time elapsed: 0:20:28
num of updates: 3600
train total loss: 0.63169
train state loss: 0.54087
train reward loss: 0.09082
val total loss: 0.54717
val state loss: 0.47571
val reward loss: 0.07146
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


  9%|▉         | 37/400 [21:09<4:03:35, 40.26s/it]

time elapsed: 0:21:09
num of updates: 3700
train total loss: 0.58259
train state loss: 0.49882
train reward loss: 0.08377
val total loss: 0.51112
val state loss: 0.44691
val reward loss: 0.06421
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 10%|▉         | 38/400 [21:49<4:02:17, 40.16s/it]

time elapsed: 0:21:49
num of updates: 3800
train total loss: 0.52745
train state loss: 0.45114
train reward loss: 0.07631
val total loss: 0.45550
val state loss: 0.39888
val reward loss: 0.05662
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 10%|▉         | 39/400 [22:34<4:11:22, 41.78s/it]

time elapsed: 0:22:35
num of updates: 3900
train total loss: 0.48912
train state loss: 0.41738
train reward loss: 0.07174
val total loss: 0.41257
val state loss: 0.36571
val reward loss: 0.04686
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 10%|█         | 40/400 [23:14<4:06:54, 41.15s/it]

time elapsed: 0:23:14
num of updates: 4000
train total loss: 0.44630
train state loss: 0.38041
train reward loss: 0.06589
val total loss: 0.37631
val state loss: 0.32834
val reward loss: 0.04798
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 10%|█         | 41/400 [23:59<4:13:28, 42.36s/it]

time elapsed: 0:23:59
num of updates: 4100
train total loss: 0.41162
train state loss: 0.35081
train reward loss: 0.06081
val total loss: 0.33200
val state loss: 0.29228
val reward loss: 0.03972
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 10%|█         | 42/400 [24:36<4:03:22, 40.79s/it]

time elapsed: 0:24:37
num of updates: 4200
train total loss: 0.38412
train state loss: 0.32739
train reward loss: 0.05673
val total loss: 0.31903
val state loss: 0.27885
val reward loss: 0.04018
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 11%|█         | 43/400 [25:14<3:57:53, 39.98s/it]

time elapsed: 0:25:15
num of updates: 4300
train total loss: 0.36117
train state loss: 0.30773
train reward loss: 0.05344
val total loss: 0.29946
val state loss: 0.26074
val reward loss: 0.03873
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 11%|█         | 44/400 [25:50<3:50:14, 38.81s/it]

time elapsed: 0:25:51
num of updates: 4400
train total loss: 0.33543
train state loss: 0.28546
train reward loss: 0.04997
val total loss: 0.26832
val state loss: 0.23702
val reward loss: 0.03130
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 11%|█▏        | 45/400 [26:29<3:49:43, 38.83s/it]

time elapsed: 0:26:30
num of updates: 4500
train total loss: 0.32149
train state loss: 0.27305
train reward loss: 0.04844
val total loss: 0.25694
val state loss: 0.22482
val reward loss: 0.03211
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 12%|█▏        | 46/400 [27:07<3:47:14, 38.52s/it]

time elapsed: 0:27:07
num of updates: 4600
train total loss: 0.30627
train state loss: 0.25965
train reward loss: 0.04662
val total loss: 0.25611
val state loss: 0.22494
val reward loss: 0.03117
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 12%|█▏        | 47/400 [27:40<3:36:58, 36.88s/it]

time elapsed: 0:27:40
num of updates: 4700
train total loss: 0.29318
train state loss: 0.24915
train reward loss: 0.04403
val total loss: 0.23108
val state loss: 0.20318
val reward loss: 0.02790
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 12%|█▏        | 48/400 [28:16<3:33:49, 36.45s/it]

time elapsed: 0:28:16
num of updates: 4800
train total loss: 0.27541
train state loss: 0.23379
train reward loss: 0.04162
val total loss: 0.22271
val state loss: 0.19734
val reward loss: 0.02538
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 12%|█▏        | 49/400 [28:52<3:33:20, 36.47s/it]

time elapsed: 0:28:52
num of updates: 4900
train total loss: 0.26768
train state loss: 0.22695
train reward loss: 0.04073
val total loss: 0.21785
val state loss: 0.18992
val reward loss: 0.02793
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 12%|█▎        | 50/400 [29:30<3:35:54, 37.01s/it]

time elapsed: 0:29:31
num of updates: 5000
train total loss: 0.25543
train state loss: 0.21517
train reward loss: 0.04025
val total loss: 0.20232
val state loss: 0.17593
val reward loss: 0.02639
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 13%|█▎        | 51/400 [30:05<3:31:23, 36.34s/it]

time elapsed: 0:30:05
num of updates: 5100
train total loss: 0.24426
train state loss: 0.20581
train reward loss: 0.03844
val total loss: 0.18322
val state loss: 0.15835
val reward loss: 0.02487
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 13%|█▎        | 52/400 [30:42<3:31:38, 36.49s/it]

time elapsed: 0:30:42
num of updates: 5200
train total loss: 0.23243
train state loss: 0.19511
train reward loss: 0.03732
val total loss: 0.17166
val state loss: 0.14740
val reward loss: 0.02426
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 13%|█▎        | 53/400 [31:21<3:35:33, 37.27s/it]

time elapsed: 0:31:21
num of updates: 5300
train total loss: 0.22587
train state loss: 0.18803
train reward loss: 0.03784
val total loss: 0.16360
val state loss: 0.14089
val reward loss: 0.02271
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 14%|█▎        | 54/400 [31:57<3:32:31, 36.85s/it]

time elapsed: 0:31:57
num of updates: 5400
train total loss: 0.21445
train state loss: 0.17912
train reward loss: 0.03533
val total loss: 0.16709
val state loss: 0.14145
val reward loss: 0.02564
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 14%|█▍        | 55/400 [32:32<3:28:24, 36.24s/it]

time elapsed: 0:32:32
num of updates: 5500
train total loss: 0.20262
train state loss: 0.16850
train reward loss: 0.03412
val total loss: 0.14192
val state loss: 0.12354
val reward loss: 0.01837
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 14%|█▍        | 56/400 [33:07<3:26:40, 36.05s/it]

time elapsed: 0:33:08
num of updates: 5600
train total loss: 0.19567
train state loss: 0.16214
train reward loss: 0.03353
val total loss: 0.14113
val state loss: 0.12123
val reward loss: 0.01990
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 14%|█▍        | 57/400 [33:56<3:47:28, 39.79s/it]

time elapsed: 0:33:56
num of updates: 5700
train total loss: 0.19085
train state loss: 0.15749
train reward loss: 0.03336
val total loss: 0.13599
val state loss: 0.11641
val reward loss: 0.01958
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 14%|█▍        | 58/400 [34:38<3:51:03, 40.54s/it]

time elapsed: 0:34:38
num of updates: 5800
train total loss: 0.17989
train state loss: 0.14858
train reward loss: 0.03130
val total loss: 0.13096
val state loss: 0.11075
val reward loss: 0.02021
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 15%|█▍        | 59/400 [35:20<3:52:44, 40.95s/it]

time elapsed: 0:35:20
num of updates: 5900
train total loss: 0.17855
train state loss: 0.14696
train reward loss: 0.03159
val total loss: 0.13491
val state loss: 0.11303
val reward loss: 0.02188
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 15%|█▌        | 60/400 [36:01<3:51:21, 40.83s/it]

time elapsed: 0:36:01
num of updates: 6000
train total loss: 0.17150
train state loss: 0.14080
train reward loss: 0.03070
val total loss: 0.11642
val state loss: 0.09926
val reward loss: 0.01716
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 15%|█▌        | 61/400 [36:37<3:43:08, 39.49s/it]

time elapsed: 0:36:37
num of updates: 6100
train total loss: 0.16608
train state loss: 0.13621
train reward loss: 0.02987
val total loss: 0.12200
val state loss: 0.10194
val reward loss: 0.02006
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 16%|█▌        | 62/400 [37:14<3:38:33, 38.80s/it]

time elapsed: 0:37:14
num of updates: 6200
train total loss: 0.15888
train state loss: 0.13035
train reward loss: 0.02853
val total loss: 0.10695
val state loss: 0.09031
val reward loss: 0.01664
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 16%|█▌        | 63/400 [37:55<3:40:24, 39.24s/it]

time elapsed: 0:37:55
num of updates: 6300
train total loss: 0.15521
train state loss: 0.12699
train reward loss: 0.02822
val total loss: 0.10400
val state loss: 0.08801
val reward loss: 0.01599
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 16%|█▌        | 64/400 [38:32<3:35:57, 38.56s/it]

time elapsed: 0:38:32
num of updates: 6400
train total loss: 0.15284
train state loss: 0.12508
train reward loss: 0.02776
val total loss: 0.10974
val state loss: 0.09300
val reward loss: 0.01674
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 16%|█▋        | 65/400 [39:07<3:29:33, 37.53s/it]

time elapsed: 0:39:07
num of updates: 6500
train total loss: 0.14864
train state loss: 0.12166
train reward loss: 0.02698
val total loss: 0.09608
val state loss: 0.08150
val reward loss: 0.01458
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 16%|█▋        | 66/400 [39:45<3:29:57, 37.72s/it]

time elapsed: 0:39:45
num of updates: 6600
train total loss: 0.14554
train state loss: 0.11859
train reward loss: 0.02695
val total loss: 0.10336
val state loss: 0.08635
val reward loss: 0.01701
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 17%|█▋        | 67/400 [40:28<3:38:36, 39.39s/it]

time elapsed: 0:40:28
num of updates: 6700
train total loss: 0.13951
train state loss: 0.11383
train reward loss: 0.02568
val total loss: 0.10323
val state loss: 0.08507
val reward loss: 0.01816
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 17%|█▋        | 68/400 [41:05<3:33:36, 38.60s/it]

time elapsed: 0:41:05
num of updates: 6800
train total loss: 0.13872
train state loss: 0.11315
train reward loss: 0.02557
val total loss: 0.08880
val state loss: 0.07491
val reward loss: 0.01389
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 17%|█▋        | 69/400 [41:43<3:31:38, 38.37s/it]

time elapsed: 0:41:43
num of updates: 6900
train total loss: 0.13371
train state loss: 0.10919
train reward loss: 0.02452
val total loss: 0.09376
val state loss: 0.07847
val reward loss: 0.01529
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 18%|█▊        | 70/400 [42:23<3:34:39, 39.03s/it]

time elapsed: 0:42:23
num of updates: 7000
train total loss: 0.13271
train state loss: 0.10837
train reward loss: 0.02434
val total loss: 0.08844
val state loss: 0.07274
val reward loss: 0.01571
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 18%|█▊        | 71/400 [43:06<3:40:27, 40.20s/it]

time elapsed: 0:43:06
num of updates: 7100
train total loss: 0.12860
train state loss: 0.10492
train reward loss: 0.02368
val total loss: 0.08635
val state loss: 0.07371
val reward loss: 0.01265
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 18%|█▊        | 72/400 [43:46<3:38:25, 39.96s/it]

time elapsed: 0:43:46
num of updates: 7200
train total loss: 0.12719
train state loss: 0.10367
train reward loss: 0.02352
val total loss: 0.08332
val state loss: 0.06989
val reward loss: 0.01342
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 18%|█▊        | 73/400 [44:23<3:33:14, 39.13s/it]

time elapsed: 0:44:23
num of updates: 7300
train total loss: 0.12442
train state loss: 0.10133
train reward loss: 0.02309
val total loss: 0.07436
val state loss: 0.06286
val reward loss: 0.01149
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 18%|█▊        | 74/400 [45:04<3:35:14, 39.62s/it]

time elapsed: 0:45:04
num of updates: 7400
train total loss: 0.12093
train state loss: 0.09871
train reward loss: 0.02222
val total loss: 0.08388
val state loss: 0.06976
val reward loss: 0.01411
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 19%|█▉        | 75/400 [45:42<3:33:20, 39.39s/it]

time elapsed: 0:45:43
num of updates: 7500
train total loss: 0.11887
train state loss: 0.09711
train reward loss: 0.02176
val total loss: 0.07892
val state loss: 0.06654
val reward loss: 0.01239
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 19%|█▉        | 76/400 [46:22<3:33:28, 39.53s/it]

time elapsed: 0:46:22
num of updates: 7600
train total loss: 0.11980
train state loss: 0.09762
train reward loss: 0.02219
val total loss: 0.08164
val state loss: 0.06772
val reward loss: 0.01392
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 19%|█▉        | 77/400 [47:01<3:32:18, 39.44s/it]

time elapsed: 0:47:02
num of updates: 7700
train total loss: 0.11316
train state loss: 0.09257
train reward loss: 0.02059
val total loss: 0.07571
val state loss: 0.06287
val reward loss: 0.01284
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 20%|█▉        | 78/400 [47:39<3:28:33, 38.86s/it]

time elapsed: 0:47:39
num of updates: 7800
train total loss: 0.11272
train state loss: 0.09220
train reward loss: 0.02052
val total loss: 0.08244
val state loss: 0.06762
val reward loss: 0.01482
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 20%|█▉        | 79/400 [48:19<3:29:25, 39.14s/it]

time elapsed: 0:48:19
num of updates: 7900
train total loss: 0.11155
train state loss: 0.09110
train reward loss: 0.02046
val total loss: 0.06989
val state loss: 0.05860
val reward loss: 0.01129
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 20%|██        | 80/400 [49:01<3:34:04, 40.14s/it]

time elapsed: 0:49:01
num of updates: 8000
train total loss: 0.10917
train state loss: 0.08916
train reward loss: 0.02000
val total loss: 0.06595
val state loss: 0.05562
val reward loss: 0.01033
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 20%|██        | 81/400 [49:49<3:45:06, 42.34s/it]

time elapsed: 0:49:49
num of updates: 8100
train total loss: 0.10836
train state loss: 0.08863
train reward loss: 0.01974
val total loss: 0.06770
val state loss: 0.05624
val reward loss: 0.01146
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 20%|██        | 82/400 [50:25<3:34:26, 40.46s/it]

time elapsed: 0:50:25
num of updates: 8200
train total loss: 0.10752
train state loss: 0.08768
train reward loss: 0.01984
val total loss: 0.06636
val state loss: 0.05607
val reward loss: 0.01029
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 21%|██        | 83/400 [50:59<3:24:10, 38.64s/it]

time elapsed: 0:50:59
num of updates: 8300
train total loss: 0.10586
train state loss: 0.08652
train reward loss: 0.01934
val total loss: 0.06421
val state loss: 0.05382
val reward loss: 0.01039
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 21%|██        | 84/400 [51:35<3:19:13, 37.83s/it]

time elapsed: 0:51:35
num of updates: 8400
train total loss: 0.10287
train state loss: 0.08417
train reward loss: 0.01870
val total loss: 0.06553
val state loss: 0.05560
val reward loss: 0.00994
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 21%|██▏       | 85/400 [52:11<3:15:17, 37.20s/it]

time elapsed: 0:52:11
num of updates: 8500
train total loss: 0.10179
train state loss: 0.08333
train reward loss: 0.01846
val total loss: 0.06641
val state loss: 0.05469
val reward loss: 0.01172
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 22%|██▏       | 86/400 [52:47<3:13:39, 37.00s/it]

time elapsed: 0:52:48
num of updates: 8600
train total loss: 0.09989
train state loss: 0.08196
train reward loss: 0.01792
val total loss: 0.06292
val state loss: 0.05328
val reward loss: 0.00964
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 22%|██▏       | 87/400 [53:30<3:21:08, 38.56s/it]

time elapsed: 0:53:30
num of updates: 8700
train total loss: 0.09907
train state loss: 0.08123
train reward loss: 0.01784
val total loss: 0.06256
val state loss: 0.05293
val reward loss: 0.00963
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 22%|██▏       | 88/400 [54:04<3:13:26, 37.20s/it]

time elapsed: 0:54:04
num of updates: 8800
train total loss: 0.09752
train state loss: 0.08007
train reward loss: 0.01744
val total loss: 0.06021
val state loss: 0.04995
val reward loss: 0.01026
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 22%|██▏       | 89/400 [54:39<3:10:19, 36.72s/it]

time elapsed: 0:54:39
num of updates: 8900
train total loss: 0.09600
train state loss: 0.07890
train reward loss: 0.01710
val total loss: 0.06243
val state loss: 0.05313
val reward loss: 0.00929
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 22%|██▎       | 90/400 [55:18<3:13:29, 37.45s/it]

time elapsed: 0:55:19
num of updates: 9000
train total loss: 0.09558
train state loss: 0.07857
train reward loss: 0.01700
val total loss: 0.06237
val state loss: 0.05159
val reward loss: 0.01078
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 23%|██▎       | 91/400 [55:53<3:09:15, 36.75s/it]

time elapsed: 0:55:54
num of updates: 9100
train total loss: 0.09358
train state loss: 0.07690
train reward loss: 0.01668
val total loss: 0.06125
val state loss: 0.05188
val reward loss: 0.00936
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 23%|██▎       | 92/400 [56:30<3:07:30, 36.53s/it]

time elapsed: 0:56:30
num of updates: 9200
train total loss: 0.09315
train state loss: 0.07653
train reward loss: 0.01662
val total loss: 0.06244
val state loss: 0.05243
val reward loss: 0.01001
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 23%|██▎       | 93/400 [57:05<3:06:00, 36.35s/it]

time elapsed: 0:57:06
num of updates: 9300
train total loss: 0.09177
train state loss: 0.07554
train reward loss: 0.01622
val total loss: 0.05453
val state loss: 0.04672
val reward loss: 0.00782
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 24%|██▎       | 94/400 [57:39<3:01:43, 35.63s/it]

time elapsed: 0:57:40
num of updates: 9400
train total loss: 0.09078
train state loss: 0.07482
train reward loss: 0.01596
val total loss: 0.05772
val state loss: 0.04861
val reward loss: 0.00912
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 24%|██▍       | 95/400 [58:14<2:59:47, 35.37s/it]

time elapsed: 0:58:14
num of updates: 9500
train total loss: 0.09090
train state loss: 0.07473
train reward loss: 0.01616
val total loss: 0.05396
val state loss: 0.04629
val reward loss: 0.00767
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 24%|██▍       | 96/400 [58:52<3:03:27, 36.21s/it]

time elapsed: 0:58:53
num of updates: 9600
train total loss: 0.08911
train state loss: 0.07335
train reward loss: 0.01576
val total loss: 0.05303
val state loss: 0.04472
val reward loss: 0.00831
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 24%|██▍       | 97/400 [59:28<3:02:05, 36.06s/it]

time elapsed: 0:59:28
num of updates: 9700
train total loss: 0.08725
train state loss: 0.07208
train reward loss: 0.01517
val total loss: 0.05600
val state loss: 0.04666
val reward loss: 0.00934
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 24%|██▍       | 98/400 [1:00:04<3:01:53, 36.14s/it]

time elapsed: 1:00:05
num of updates: 9800
train total loss: 0.08746
train state loss: 0.07201
train reward loss: 0.01546
val total loss: 0.05396
val state loss: 0.04553
val reward loss: 0.00843
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 25%|██▍       | 99/400 [1:00:42<3:03:04, 36.49s/it]

time elapsed: 1:00:42
num of updates: 9900
train total loss: 0.08662
train state loss: 0.07148
train reward loss: 0.01514
val total loss: 0.05543
val state loss: 0.04698
val reward loss: 0.00844
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 25%|██▌       | 100/400 [1:01:22<3:08:10, 37.64s/it]

time elapsed: 1:01:22
num of updates: 10000
train total loss: 0.08513
train state loss: 0.07017
train reward loss: 0.01496
val total loss: 0.05150
val state loss: 0.04392
val reward loss: 0.00758
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 25%|██▌       | 101/400 [1:02:04<3:14:43, 39.08s/it]

time elapsed: 1:02:05
num of updates: 10100
train total loss: 0.08297
train state loss: 0.06866
train reward loss: 0.01431
val total loss: 0.05375
val state loss: 0.04537
val reward loss: 0.00838
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 26%|██▌       | 102/400 [1:02:40<3:08:10, 37.89s/it]

time elapsed: 1:02:40
num of updates: 10200
train total loss: 0.08323
train state loss: 0.06891
train reward loss: 0.01432
val total loss: 0.05352
val state loss: 0.04467
val reward loss: 0.00884
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 26%|██▌       | 103/400 [1:03:17<3:07:33, 37.89s/it]

time elapsed: 1:03:18
num of updates: 10300
train total loss: 0.08163
train state loss: 0.06748
train reward loss: 0.01415
val total loss: 0.05328
val state loss: 0.04437
val reward loss: 0.00891
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 26%|██▌       | 104/400 [1:03:52<3:02:34, 37.01s/it]

time elapsed: 1:03:53
num of updates: 10400
train total loss: 0.08051
train state loss: 0.06695
train reward loss: 0.01356
val total loss: 0.04984
val state loss: 0.04242
val reward loss: 0.00742
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 26%|██▋       | 105/400 [1:04:25<2:56:08, 35.83s/it]

time elapsed: 1:04:26
num of updates: 10500
train total loss: 0.08061
train state loss: 0.06686
train reward loss: 0.01375
val total loss: 0.04965
val state loss: 0.04262
val reward loss: 0.00703
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 26%|██▋       | 106/400 [1:05:06<3:02:09, 37.18s/it]

time elapsed: 1:05:06
num of updates: 10600
train total loss: 0.08083
train state loss: 0.06705
train reward loss: 0.01378
val total loss: 0.05570
val state loss: 0.04727
val reward loss: 0.00843
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 27%|██▋       | 107/400 [1:05:42<3:00:36, 36.99s/it]

time elapsed: 1:05:43
num of updates: 10700
train total loss: 0.07903
train state loss: 0.06556
train reward loss: 0.01346
val total loss: 0.04525
val state loss: 0.03902
val reward loss: 0.00623
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 27%|██▋       | 108/400 [1:06:16<2:55:27, 36.05s/it]

time elapsed: 1:06:16
num of updates: 10800
train total loss: 0.07887
train state loss: 0.06534
train reward loss: 0.01353
val total loss: 0.04855
val state loss: 0.04037
val reward loss: 0.00818
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 27%|██▋       | 109/400 [1:06:53<2:55:48, 36.25s/it]

time elapsed: 1:06:53
num of updates: 10900
train total loss: 0.07742
train state loss: 0.06424
train reward loss: 0.01318
val total loss: 0.04845
val state loss: 0.04093
val reward loss: 0.00751
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 28%|██▊       | 110/400 [1:07:30<2:56:15, 36.47s/it]

time elapsed: 1:07:30
num of updates: 11000
train total loss: 0.07725
train state loss: 0.06403
train reward loss: 0.01322
val total loss: 0.04671
val state loss: 0.04074
val reward loss: 0.00597
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 28%|██▊       | 111/400 [1:08:03<2:51:03, 35.51s/it]

time elapsed: 1:08:03
num of updates: 11100
train total loss: 0.07648
train state loss: 0.06349
train reward loss: 0.01298
val total loss: 0.04600
val state loss: 0.04027
val reward loss: 0.00572
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 28%|██▊       | 112/400 [1:08:39<2:50:41, 35.56s/it]

time elapsed: 1:08:39
num of updates: 11200
train total loss: 0.07664
train state loss: 0.06343
train reward loss: 0.01321
val total loss: 0.05020
val state loss: 0.04244
val reward loss: 0.00776
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 28%|██▊       | 113/400 [1:09:16<2:51:45, 35.91s/it]

time elapsed: 1:09:16
num of updates: 11300
train total loss: 0.07545
train state loss: 0.06270
train reward loss: 0.01276
val total loss: 0.04538
val state loss: 0.03910
val reward loss: 0.00628
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 28%|██▊       | 114/400 [1:09:49<2:47:48, 35.20s/it]

time elapsed: 1:09:49
num of updates: 11400
train total loss: 0.07432
train state loss: 0.06194
train reward loss: 0.01238
val total loss: 0.04361
val state loss: 0.03721
val reward loss: 0.00640
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 29%|██▉       | 115/400 [1:10:23<2:44:56, 34.73s/it]

time elapsed: 1:10:23
num of updates: 11500
train total loss: 0.07340
train state loss: 0.06115
train reward loss: 0.01225
val total loss: 0.04475
val state loss: 0.03888
val reward loss: 0.00588
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 29%|██▉       | 116/400 [1:10:59<2:46:08, 35.10s/it]

time elapsed: 1:10:59
num of updates: 11600
train total loss: 0.07284
train state loss: 0.06071
train reward loss: 0.01213
val total loss: 0.04496
val state loss: 0.03854
val reward loss: 0.00642
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 29%|██▉       | 117/400 [1:11:35<2:47:47, 35.58s/it]

time elapsed: 1:11:36
num of updates: 11700
train total loss: 0.07399
train state loss: 0.06148
train reward loss: 0.01252
val total loss: 0.04539
val state loss: 0.03847
val reward loss: 0.00692
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 30%|██▉       | 118/400 [1:12:08<2:42:59, 34.68s/it]

time elapsed: 1:12:08
num of updates: 11800
train total loss: 0.07288
train state loss: 0.06064
train reward loss: 0.01224
val total loss: 0.04438
val state loss: 0.03737
val reward loss: 0.00702
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 30%|██▉       | 119/400 [1:12:45<2:45:11, 35.27s/it]

time elapsed: 1:12:45
num of updates: 11900
train total loss: 0.07237
train state loss: 0.06017
train reward loss: 0.01220
val total loss: 0.04519
val state loss: 0.03863
val reward loss: 0.00656
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 30%|███       | 120/400 [1:13:31<2:59:37, 38.49s/it]

time elapsed: 1:13:31
num of updates: 12000
train total loss: 0.07008
train state loss: 0.05841
train reward loss: 0.01167
val total loss: 0.04377
val state loss: 0.03716
val reward loss: 0.00661
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 30%|███       | 121/400 [1:14:09<2:58:20, 38.35s/it]

time elapsed: 1:14:09
num of updates: 12100
train total loss: 0.07045
train state loss: 0.05878
train reward loss: 0.01167
val total loss: 0.04128
val state loss: 0.03536
val reward loss: 0.00592
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 30%|███       | 122/400 [1:14:49<3:00:41, 39.00s/it]

time elapsed: 1:14:49
num of updates: 12200
train total loss: 0.07134
train state loss: 0.05919
train reward loss: 0.01215
val total loss: 0.04309
val state loss: 0.03721
val reward loss: 0.00589
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 31%|███       | 123/400 [1:15:30<3:02:26, 39.52s/it]

time elapsed: 1:15:30
num of updates: 12300
train total loss: 0.06936
train state loss: 0.05781
train reward loss: 0.01155
val total loss: 0.04242
val state loss: 0.03559
val reward loss: 0.00683
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 31%|███       | 124/400 [1:16:10<3:02:25, 39.66s/it]

time elapsed: 1:16:10
num of updates: 12400
train total loss: 0.06971
train state loss: 0.05805
train reward loss: 0.01166
val total loss: 0.04351
val state loss: 0.03691
val reward loss: 0.00660
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 31%|███▏      | 125/400 [1:16:51<3:03:19, 40.00s/it]

time elapsed: 1:16:51
num of updates: 12500
train total loss: 0.06911
train state loss: 0.05749
train reward loss: 0.01162
val total loss: 0.04275
val state loss: 0.03599
val reward loss: 0.00676
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 32%|███▏      | 126/400 [1:17:30<3:01:39, 39.78s/it]

time elapsed: 1:17:30
num of updates: 12600
train total loss: 0.06762
train state loss: 0.05648
train reward loss: 0.01113
val total loss: 0.04140
val state loss: 0.03480
val reward loss: 0.00660
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 32%|███▏      | 127/400 [1:18:11<3:02:54, 40.20s/it]

time elapsed: 1:18:11
num of updates: 12700
train total loss: 0.06869
train state loss: 0.05728
train reward loss: 0.01141
val total loss: 0.04054
val state loss: 0.03432
val reward loss: 0.00622
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 32%|███▏      | 128/400 [1:18:50<3:00:28, 39.81s/it]

time elapsed: 1:18:50
num of updates: 12800
train total loss: 0.06771
train state loss: 0.05645
train reward loss: 0.01125
val total loss: 0.04278
val state loss: 0.03524
val reward loss: 0.00754
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 32%|███▏      | 129/400 [1:19:28<2:57:41, 39.34s/it]

time elapsed: 1:19:28
num of updates: 12900
train total loss: 0.06747
train state loss: 0.05621
train reward loss: 0.01126
val total loss: 0.04337
val state loss: 0.03749
val reward loss: 0.00588
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 32%|███▎      | 130/400 [1:20:42<3:44:06, 49.80s/it]

time elapsed: 1:20:43
num of updates: 13000
train total loss: 0.06676
train state loss: 0.05570
train reward loss: 0.01106
val total loss: 0.04001
val state loss: 0.03324
val reward loss: 0.00677
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 33%|███▎      | 131/400 [1:21:29<3:38:52, 48.82s/it]

time elapsed: 1:21:29
num of updates: 13100
train total loss: 0.06659
train state loss: 0.05555
train reward loss: 0.01104
val total loss: 0.04005
val state loss: 0.03460
val reward loss: 0.00545
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 33%|███▎      | 132/400 [1:22:05<3:21:13, 45.05s/it]

time elapsed: 1:22:05
num of updates: 13200
train total loss: 0.06639
train state loss: 0.05533
train reward loss: 0.01106
val total loss: 0.04126
val state loss: 0.03460
val reward loss: 0.00666
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 33%|███▎      | 133/400 [1:22:47<3:15:42, 43.98s/it]

time elapsed: 1:22:47
num of updates: 13300
train total loss: 0.06582
train state loss: 0.05502
train reward loss: 0.01080
val total loss: 0.04163
val state loss: 0.03507
val reward loss: 0.00656
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 34%|███▎      | 134/400 [1:23:24<3:06:29, 42.06s/it]

time elapsed: 1:23:25
num of updates: 13400
train total loss: 0.06487
train state loss: 0.05423
train reward loss: 0.01064
val total loss: 0.04266
val state loss: 0.03471
val reward loss: 0.00795
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 34%|███▍      | 135/400 [1:23:58<2:55:14, 39.68s/it]

time elapsed: 1:23:59
num of updates: 13500
train total loss: 0.06500
train state loss: 0.05421
train reward loss: 0.01079
val total loss: 0.03904
val state loss: 0.03373
val reward loss: 0.00531
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 34%|███▍      | 136/400 [1:24:33<2:48:19, 38.26s/it]

time elapsed: 1:24:34
num of updates: 13600
train total loss: 0.06409
train state loss: 0.05363
train reward loss: 0.01045
val total loss: 0.03787
val state loss: 0.03186
val reward loss: 0.00601
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 34%|███▍      | 137/400 [1:25:09<2:43:48, 37.37s/it]

time elapsed: 1:25:09
num of updates: 13700
train total loss: 0.06462
train state loss: 0.05394
train reward loss: 0.01069
val total loss: 0.04149
val state loss: 0.03450
val reward loss: 0.00699
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 34%|███▍      | 138/400 [1:25:42<2:37:40, 36.11s/it]

time elapsed: 1:25:42
num of updates: 13800
train total loss: 0.06370
train state loss: 0.05322
train reward loss: 0.01048
val total loss: 0.03694
val state loss: 0.03188
val reward loss: 0.00506
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 35%|███▍      | 139/400 [1:26:18<2:36:36, 36.00s/it]

time elapsed: 1:26:18
num of updates: 13900
train total loss: 0.06284
train state loss: 0.05260
train reward loss: 0.01024
val total loss: 0.04045
val state loss: 0.03417
val reward loss: 0.00628
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 35%|███▌      | 140/400 [1:26:54<2:36:36, 36.14s/it]

time elapsed: 1:26:54
num of updates: 14000
train total loss: 0.06376
train state loss: 0.05320
train reward loss: 0.01056
val total loss: 0.03597
val state loss: 0.03109
val reward loss: 0.00488
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 35%|███▌      | 141/400 [1:27:27<2:32:12, 35.26s/it]

time elapsed: 1:27:27
num of updates: 14100
train total loss: 0.06265
train state loss: 0.05222
train reward loss: 0.01042
val total loss: 0.03843
val state loss: 0.03307
val reward loss: 0.00535
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 36%|███▌      | 142/400 [1:28:02<2:31:01, 35.12s/it]

time elapsed: 1:28:02
num of updates: 14200
train total loss: 0.06247
train state loss: 0.05229
train reward loss: 0.01018
val total loss: 0.03617
val state loss: 0.03124
val reward loss: 0.00493
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 36%|███▌      | 143/400 [1:28:37<2:30:37, 35.17s/it]

time elapsed: 1:28:38
num of updates: 14300
train total loss: 0.06161
train state loss: 0.05154
train reward loss: 0.01007
val total loss: 0.03963
val state loss: 0.03291
val reward loss: 0.00672
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 36%|███▌      | 144/400 [1:29:10<2:26:53, 34.43s/it]

time elapsed: 1:29:10
num of updates: 14400
train total loss: 0.06231
train state loss: 0.05179
train reward loss: 0.01052
val total loss: 0.04145
val state loss: 0.03423
val reward loss: 0.00722
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 36%|███▋      | 145/400 [1:29:44<2:25:58, 34.35s/it]

time elapsed: 1:29:44
num of updates: 14500
train total loss: 0.06134
train state loss: 0.05127
train reward loss: 0.01007
val total loss: 0.03559
val state loss: 0.03042
val reward loss: 0.00517
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 36%|███▋      | 146/400 [1:30:21<2:27:57, 34.95s/it]

time elapsed: 1:30:21
num of updates: 14600
train total loss: 0.06199
train state loss: 0.05180
train reward loss: 0.01018
val total loss: 0.04142
val state loss: 0.03426
val reward loss: 0.00716
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 37%|███▋      | 147/400 [1:30:53<2:24:12, 34.20s/it]

time elapsed: 1:30:53
num of updates: 14700
train total loss: 0.06041
train state loss: 0.05057
train reward loss: 0.00984
val total loss: 0.03772
val state loss: 0.03214
val reward loss: 0.00558
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 37%|███▋      | 148/400 [1:31:29<2:25:32, 34.65s/it]

time elapsed: 1:31:29
num of updates: 14800
train total loss: 0.05992
train state loss: 0.05014
train reward loss: 0.00977
val total loss: 0.03710
val state loss: 0.03165
val reward loss: 0.00544
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 37%|███▋      | 149/400 [1:32:06<2:28:01, 35.38s/it]

time elapsed: 1:32:06
num of updates: 14900
train total loss: 0.06020
train state loss: 0.05026
train reward loss: 0.00994
val total loss: 0.03371
val state loss: 0.02895
val reward loss: 0.00475
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 38%|███▊      | 150/400 [1:32:40<2:26:23, 35.13s/it]

time elapsed: 1:32:41
num of updates: 15000
train total loss: 0.06002
train state loss: 0.05011
train reward loss: 0.00991
val total loss: 0.03816
val state loss: 0.03223
val reward loss: 0.00593
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 38%|███▊      | 151/400 [1:33:14<2:23:53, 34.67s/it]

time elapsed: 1:33:14
num of updates: 15100
train total loss: 0.05872
train state loss: 0.04914
train reward loss: 0.00959
val total loss: 0.03792
val state loss: 0.03207
val reward loss: 0.00585
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 38%|███▊      | 152/400 [1:33:52<2:27:43, 35.74s/it]

time elapsed: 1:33:52
num of updates: 15200
train total loss: 0.05960
train state loss: 0.04990
train reward loss: 0.00970
val total loss: 0.03614
val state loss: 0.03039
val reward loss: 0.00576
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 38%|███▊      | 153/400 [1:34:36<2:37:36, 38.28s/it]

time elapsed: 1:34:37
num of updates: 15300
train total loss: 0.05838
train state loss: 0.04884
train reward loss: 0.00954
val total loss: 0.03492
val state loss: 0.02979
val reward loss: 0.00512
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 38%|███▊      | 154/400 [1:35:12<2:34:08, 37.60s/it]

time elapsed: 1:35:13
num of updates: 15400
train total loss: 0.05861
train state loss: 0.04908
train reward loss: 0.00953
val total loss: 0.03702
val state loss: 0.03030
val reward loss: 0.00672
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 39%|███▉      | 155/400 [1:35:46<2:28:18, 36.32s/it]

time elapsed: 1:35:46
num of updates: 15500
train total loss: 0.05830
train state loss: 0.04878
train reward loss: 0.00952
val total loss: 0.03534
val state loss: 0.02982
val reward loss: 0.00552
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 39%|███▉      | 156/400 [1:36:25<2:31:17, 37.20s/it]

time elapsed: 1:36:25
num of updates: 15600
train total loss: 0.05791
train state loss: 0.04861
train reward loss: 0.00930
val total loss: 0.03504
val state loss: 0.02990
val reward loss: 0.00513
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 39%|███▉      | 157/400 [1:37:02<2:30:25, 37.14s/it]

time elapsed: 1:37:02
num of updates: 15700
train total loss: 0.05758
train state loss: 0.04822
train reward loss: 0.00936
val total loss: 0.03665
val state loss: 0.03101
val reward loss: 0.00564
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 40%|███▉      | 158/400 [1:37:40<2:30:37, 37.34s/it]

time elapsed: 1:37:40
num of updates: 15800
train total loss: 0.05686
train state loss: 0.04769
train reward loss: 0.00917
val total loss: 0.03670
val state loss: 0.03102
val reward loss: 0.00568
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 40%|███▉      | 159/400 [1:38:20<2:33:37, 38.25s/it]

time elapsed: 1:38:20
num of updates: 15900
train total loss: 0.05724
train state loss: 0.04787
train reward loss: 0.00937
val total loss: 0.03506
val state loss: 0.02843
val reward loss: 0.00663
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 40%|████      | 160/400 [1:38:56<2:29:49, 37.45s/it]

time elapsed: 1:38:56
num of updates: 16000
train total loss: 0.05652
train state loss: 0.04740
train reward loss: 0.00912
val total loss: 0.03514
val state loss: 0.03014
val reward loss: 0.00500
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 40%|████      | 161/400 [1:39:47<2:46:01, 41.68s/it]

time elapsed: 1:39:48
num of updates: 16100
train total loss: 0.05666
train state loss: 0.04746
train reward loss: 0.00920
val total loss: 0.03519
val state loss: 0.02959
val reward loss: 0.00560
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 40%|████      | 162/400 [1:40:39<2:56:45, 44.56s/it]

time elapsed: 1:40:39
num of updates: 16200
train total loss: 0.05702
train state loss: 0.04757
train reward loss: 0.00945
val total loss: 0.03200
val state loss: 0.02700
val reward loss: 0.00500
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 41%|████      | 163/400 [1:41:15<2:46:37, 42.18s/it]

time elapsed: 1:41:15
num of updates: 16300
train total loss: 0.05641
train state loss: 0.04722
train reward loss: 0.00919
val total loss: 0.03450
val state loss: 0.02912
val reward loss: 0.00539
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 41%|████      | 164/400 [1:41:53<2:40:55, 40.91s/it]

time elapsed: 1:41:53
num of updates: 16400
train total loss: 0.05579
train state loss: 0.04687
train reward loss: 0.00893
val total loss: 0.03418
val state loss: 0.02924
val reward loss: 0.00494
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 41%|████▏     | 165/400 [1:42:29<2:34:19, 39.40s/it]

time elapsed: 1:42:29
num of updates: 16500
train total loss: 0.05535
train state loss: 0.04636
train reward loss: 0.00899
val total loss: 0.03562
val state loss: 0.03012
val reward loss: 0.00550
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 42%|████▏     | 166/400 [1:43:05<2:29:45, 38.40s/it]

time elapsed: 1:43:05
num of updates: 16600
train total loss: 0.05497
train state loss: 0.04604
train reward loss: 0.00893
val total loss: 0.03449
val state loss: 0.02957
val reward loss: 0.00492
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 42%|████▏     | 167/400 [1:43:39<2:24:20, 37.17s/it]

time elapsed: 1:43:40
num of updates: 16700
train total loss: 0.05494
train state loss: 0.04602
train reward loss: 0.00892
val total loss: 0.03419
val state loss: 0.02890
val reward loss: 0.00529
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 42%|████▏     | 168/400 [1:44:14<2:20:35, 36.36s/it]

time elapsed: 1:44:14
num of updates: 16800
train total loss: 0.05567
train state loss: 0.04659
train reward loss: 0.00908
val total loss: 0.03398
val state loss: 0.02904
val reward loss: 0.00494
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 42%|████▏     | 169/400 [1:44:50<2:19:20, 36.19s/it]

time elapsed: 1:44:50
num of updates: 16900
train total loss: 0.05467
train state loss: 0.04568
train reward loss: 0.00898
val total loss: 0.03282
val state loss: 0.02775
val reward loss: 0.00506
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 42%|████▎     | 170/400 [1:45:26<2:19:18, 36.34s/it]

time elapsed: 1:45:27
num of updates: 17000
train total loss: 0.05497
train state loss: 0.04607
train reward loss: 0.00890
val total loss: 0.03332
val state loss: 0.02853
val reward loss: 0.00480
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 43%|████▎     | 171/400 [1:46:08<2:24:21, 37.82s/it]

time elapsed: 1:46:08
num of updates: 17100
train total loss: 0.05461
train state loss: 0.04575
train reward loss: 0.00886
val total loss: 0.03174
val state loss: 0.02713
val reward loss: 0.00462
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 43%|████▎     | 172/400 [1:46:45<2:22:38, 37.54s/it]

time elapsed: 1:46:45
num of updates: 17200
train total loss: 0.05411
train state loss: 0.04515
train reward loss: 0.00896
val total loss: 0.03166
val state loss: 0.02742
val reward loss: 0.00423
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 43%|████▎     | 173/400 [1:47:23<2:23:33, 37.95s/it]

time elapsed: 1:47:24
num of updates: 17300
train total loss: 0.05369
train state loss: 0.04494
train reward loss: 0.00875
val total loss: 0.03167
val state loss: 0.02737
val reward loss: 0.00430
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 44%|████▎     | 174/400 [1:48:04<2:25:29, 38.63s/it]

time elapsed: 1:48:04
num of updates: 17400
train total loss: 0.05434
train state loss: 0.04548
train reward loss: 0.00886
val total loss: 0.03014
val state loss: 0.02631
val reward loss: 0.00383
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 44%|████▍     | 175/400 [1:48:41<2:23:32, 38.28s/it]

time elapsed: 1:48:41
num of updates: 17500
train total loss: 0.05319
train state loss: 0.04471
train reward loss: 0.00848
val total loss: 0.03483
val state loss: 0.02877
val reward loss: 0.00606
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 44%|████▍     | 176/400 [1:49:20<2:24:01, 38.58s/it]

time elapsed: 1:49:21
num of updates: 17600
train total loss: 0.05321
train state loss: 0.04460
train reward loss: 0.00861
val total loss: 0.03035
val state loss: 0.02638
val reward loss: 0.00397
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 44%|████▍     | 177/400 [1:49:59<2:22:55, 38.46s/it]

time elapsed: 1:49:59
num of updates: 17700
train total loss: 0.05354
train state loss: 0.04482
train reward loss: 0.00871
val total loss: 0.03102
val state loss: 0.02657
val reward loss: 0.00444
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 44%|████▍     | 178/400 [1:50:38<2:22:56, 38.63s/it]

time elapsed: 1:50:38
num of updates: 17800
train total loss: 0.05251
train state loss: 0.04403
train reward loss: 0.00848
val total loss: 0.03277
val state loss: 0.02705
val reward loss: 0.00573
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 45%|████▍     | 179/400 [1:51:16<2:21:41, 38.47s/it]

time elapsed: 1:51:16
num of updates: 17900
train total loss: 0.05312
train state loss: 0.04458
train reward loss: 0.00854
val total loss: 0.03412
val state loss: 0.02774
val reward loss: 0.00638
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 45%|████▌     | 180/400 [1:52:11<2:39:45, 43.57s/it]

time elapsed: 1:52:11
num of updates: 18000
train total loss: 0.05266
train state loss: 0.04393
train reward loss: 0.00873
val total loss: 0.03175
val state loss: 0.02668
val reward loss: 0.00507
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 45%|████▌     | 181/400 [1:52:50<2:34:19, 42.28s/it]

time elapsed: 1:52:51
num of updates: 18100
train total loss: 0.05219
train state loss: 0.04382
train reward loss: 0.00837
val total loss: 0.03266
val state loss: 0.02736
val reward loss: 0.00530
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 46%|████▌     | 182/400 [1:53:35<2:36:12, 42.99s/it]

time elapsed: 1:53:35
num of updates: 18200
train total loss: 0.05217
train state loss: 0.04377
train reward loss: 0.00839
val total loss: 0.02783
val state loss: 0.02435
val reward loss: 0.00348
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 46%|████▌     | 183/400 [1:54:14<2:31:00, 41.75s/it]

time elapsed: 1:54:14
num of updates: 18300
train total loss: 0.05226
train state loss: 0.04377
train reward loss: 0.00848
val total loss: 0.03364
val state loss: 0.02777
val reward loss: 0.00587
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 46%|████▌     | 184/400 [1:54:59<2:34:12, 42.84s/it]

time elapsed: 1:55:00
num of updates: 18400
train total loss: 0.05113
train state loss: 0.04279
train reward loss: 0.00834
val total loss: 0.03047
val state loss: 0.02546
val reward loss: 0.00501
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 46%|████▋     | 185/400 [1:55:38<2:29:14, 41.65s/it]

time elapsed: 1:55:38
num of updates: 18500
train total loss: 0.05120
train state loss: 0.04298
train reward loss: 0.00821
val total loss: 0.03131
val state loss: 0.02670
val reward loss: 0.00460
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 46%|████▋     | 186/400 [1:56:19<2:28:01, 41.50s/it]

time elapsed: 1:56:20
num of updates: 18600
train total loss: 0.05159
train state loss: 0.04321
train reward loss: 0.00838
val total loss: 0.03093
val state loss: 0.02585
val reward loss: 0.00508
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 47%|████▋     | 187/400 [1:57:09<2:35:43, 43.86s/it]

time elapsed: 1:57:09
num of updates: 18700
train total loss: 0.05101
train state loss: 0.04277
train reward loss: 0.00824
val total loss: 0.03277
val state loss: 0.02807
val reward loss: 0.00470
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 47%|████▋     | 188/400 [1:57:51<2:33:41, 43.50s/it]

time elapsed: 1:57:52
num of updates: 18800
train total loss: 0.05104
train state loss: 0.04277
train reward loss: 0.00828
val total loss: 0.03147
val state loss: 0.02628
val reward loss: 0.00520
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 47%|████▋     | 189/400 [1:58:29<2:26:35, 41.69s/it]

time elapsed: 1:58:29
num of updates: 18900
train total loss: 0.05064
train state loss: 0.04245
train reward loss: 0.00819
val total loss: 0.02839
val state loss: 0.02416
val reward loss: 0.00423
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 48%|████▊     | 190/400 [1:59:09<2:24:35, 41.31s/it]

time elapsed: 1:59:09
num of updates: 19000
train total loss: 0.05009
train state loss: 0.04199
train reward loss: 0.00810
val total loss: 0.02880
val state loss: 0.02502
val reward loss: 0.00377
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 48%|████▊     | 191/400 [1:59:51<2:24:35, 41.51s/it]

time elapsed: 1:59:51
num of updates: 19100
train total loss: 0.04967
train state loss: 0.04175
train reward loss: 0.00792
val total loss: 0.02949
val state loss: 0.02538
val reward loss: 0.00411
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 48%|████▊     | 192/400 [2:00:31<2:21:36, 40.85s/it]

time elapsed: 2:00:31
num of updates: 19200
train total loss: 0.04973
train state loss: 0.04186
train reward loss: 0.00787
val total loss: 0.02890
val state loss: 0.02499
val reward loss: 0.00390
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 48%|████▊     | 193/400 [2:01:09<2:18:54, 40.26s/it]

time elapsed: 2:01:10
num of updates: 19300
train total loss: 0.05009
train state loss: 0.04200
train reward loss: 0.00809
val total loss: 0.03209
val state loss: 0.02597
val reward loss: 0.00612
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 48%|████▊     | 194/400 [2:01:50<2:18:15, 40.27s/it]

time elapsed: 2:01:50
num of updates: 19400
train total loss: 0.04959
train state loss: 0.04162
train reward loss: 0.00797
val total loss: 0.03003
val state loss: 0.02571
val reward loss: 0.00431
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 49%|████▉     | 195/400 [2:02:30<2:17:08, 40.14s/it]

time elapsed: 2:02:30
num of updates: 19500
train total loss: 0.04996
train state loss: 0.04181
train reward loss: 0.00815
val total loss: 0.03164
val state loss: 0.02721
val reward loss: 0.00442
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 49%|████▉     | 196/400 [2:03:11<2:17:51, 40.54s/it]

time elapsed: 2:03:11
num of updates: 19600
train total loss: 0.04987
train state loss: 0.04180
train reward loss: 0.00808
val total loss: 0.02791
val state loss: 0.02352
val reward loss: 0.00439
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 49%|████▉     | 197/400 [2:03:50<2:15:27, 40.04s/it]

time elapsed: 2:03:50
num of updates: 19700
train total loss: 0.04898
train state loss: 0.04113
train reward loss: 0.00786
val total loss: 0.02875
val state loss: 0.02472
val reward loss: 0.00403
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 50%|████▉     | 198/400 [2:04:30<2:14:33, 39.97s/it]

time elapsed: 2:04:30
num of updates: 19800
train total loss: 0.04972
train state loss: 0.04170
train reward loss: 0.00802
val total loss: 0.03133
val state loss: 0.02649
val reward loss: 0.00484
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 50%|████▉     | 199/400 [2:05:11<2:15:16, 40.38s/it]

time elapsed: 2:05:11
num of updates: 19900
train total loss: 0.04872
train state loss: 0.04094
train reward loss: 0.00778
val total loss: 0.03048
val state loss: 0.02502
val reward loss: 0.00545
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 50%|█████     | 200/400 [2:05:49<2:12:23, 39.72s/it]

time elapsed: 2:05:49
num of updates: 20000
train total loss: 0.04909
train state loss: 0.04099
train reward loss: 0.00810
val total loss: 0.02839
val state loss: 0.02420
val reward loss: 0.00419
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 50%|█████     | 201/400 [2:06:28<2:11:05, 39.52s/it]

time elapsed: 2:06:29
num of updates: 20100
train total loss: 0.04881
train state loss: 0.04091
train reward loss: 0.00790
val total loss: 0.02925
val state loss: 0.02425
val reward loss: 0.00500
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 50%|█████     | 202/400 [2:07:18<2:20:31, 42.58s/it]

time elapsed: 2:07:18
num of updates: 20200
train total loss: 0.04854
train state loss: 0.04056
train reward loss: 0.00798
val total loss: 0.02882
val state loss: 0.02449
val reward loss: 0.00433
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 51%|█████     | 203/400 [2:08:03<2:22:29, 43.40s/it]

time elapsed: 2:08:04
num of updates: 20300
train total loss: 0.04830
train state loss: 0.04049
train reward loss: 0.00781
val total loss: 0.03032
val state loss: 0.02544
val reward loss: 0.00488
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 51%|█████     | 204/400 [2:08:43<2:18:00, 42.25s/it]

time elapsed: 2:08:43
num of updates: 20400
train total loss: 0.04833
train state loss: 0.04045
train reward loss: 0.00788
val total loss: 0.03055
val state loss: 0.02556
val reward loss: 0.00499
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 51%|█████▏    | 205/400 [2:09:23<2:15:38, 41.73s/it]

time elapsed: 2:09:24
num of updates: 20500
train total loss: 0.04835
train state loss: 0.04057
train reward loss: 0.00778
val total loss: 0.02841
val state loss: 0.02378
val reward loss: 0.00463
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 52%|█████▏    | 206/400 [2:10:01<2:11:23, 40.64s/it]

time elapsed: 2:10:02
num of updates: 20600
train total loss: 0.04753
train state loss: 0.03982
train reward loss: 0.00771
val total loss: 0.02729
val state loss: 0.02376
val reward loss: 0.00353
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 52%|█████▏    | 207/400 [2:10:43<2:11:24, 40.85s/it]

time elapsed: 2:10:43
num of updates: 20700
train total loss: 0.04705
train state loss: 0.03955
train reward loss: 0.00750
val total loss: 0.03094
val state loss: 0.02579
val reward loss: 0.00515
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 52%|█████▏    | 208/400 [2:11:25<2:12:15, 41.33s/it]

time elapsed: 2:11:26
num of updates: 20800
train total loss: 0.04807
train state loss: 0.04023
train reward loss: 0.00784
val total loss: 0.02963
val state loss: 0.02475
val reward loss: 0.00489
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 52%|█████▏    | 209/400 [2:12:03<2:08:29, 40.36s/it]

time elapsed: 2:12:04
num of updates: 20900
train total loss: 0.04696
train state loss: 0.03935
train reward loss: 0.00762
val total loss: 0.03091
val state loss: 0.02556
val reward loss: 0.00534
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 52%|█████▎    | 210/400 [2:12:43<2:07:02, 40.12s/it]

time elapsed: 2:12:43
num of updates: 21000
train total loss: 0.04706
train state loss: 0.03954
train reward loss: 0.00752
val total loss: 0.02866
val state loss: 0.02462
val reward loss: 0.00404
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 53%|█████▎    | 211/400 [2:13:26<2:08:50, 40.90s/it]

time elapsed: 2:13:26
num of updates: 21100
train total loss: 0.04717
train state loss: 0.03955
train reward loss: 0.00762
val total loss: 0.03090
val state loss: 0.02499
val reward loss: 0.00592
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 53%|█████▎    | 212/400 [2:14:08<2:09:52, 41.45s/it]

time elapsed: 2:14:09
num of updates: 21200
train total loss: 0.04638
train state loss: 0.03885
train reward loss: 0.00753
val total loss: 0.02940
val state loss: 0.02411
val reward loss: 0.00530
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 53%|█████▎    | 213/400 [2:14:51<2:09:51, 41.66s/it]

time elapsed: 2:14:51
num of updates: 21300
train total loss: 0.04681
train state loss: 0.03922
train reward loss: 0.00759
val total loss: 0.02992
val state loss: 0.02537
val reward loss: 0.00455
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 54%|█████▎    | 214/400 [2:15:29<2:06:29, 40.80s/it]

time elapsed: 2:15:30
num of updates: 21400
train total loss: 0.04575
train state loss: 0.03835
train reward loss: 0.00740
val total loss: 0.02801
val state loss: 0.02399
val reward loss: 0.00402
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 54%|█████▍    | 215/400 [2:16:08<2:03:59, 40.21s/it]

time elapsed: 2:16:08
num of updates: 21500
train total loss: 0.04582
train state loss: 0.03844
train reward loss: 0.00738
val total loss: 0.02551
val state loss: 0.02129
val reward loss: 0.00423
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 54%|█████▍    | 216/400 [2:16:49<2:03:29, 40.27s/it]

time elapsed: 2:16:49
num of updates: 21600
train total loss: 0.04629
train state loss: 0.03874
train reward loss: 0.00755
val total loss: 0.02999
val state loss: 0.02382
val reward loss: 0.00617
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 54%|█████▍    | 217/400 [2:17:25<1:59:38, 39.22s/it]

time elapsed: 2:17:26
num of updates: 21700
train total loss: 0.04641
train state loss: 0.03890
train reward loss: 0.00751
val total loss: 0.02778
val state loss: 0.02318
val reward loss: 0.00460
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 55%|█████▍    | 218/400 [2:18:04<1:58:12, 38.97s/it]

time elapsed: 2:18:04
num of updates: 21800
train total loss: 0.04549
train state loss: 0.03816
train reward loss: 0.00734
val total loss: 0.02760
val state loss: 0.02338
val reward loss: 0.00422
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 55%|█████▍    | 219/400 [2:18:44<1:59:03, 39.47s/it]

time elapsed: 2:18:45
num of updates: 21900
train total loss: 0.04662
train state loss: 0.03904
train reward loss: 0.00758
val total loss: 0.02712
val state loss: 0.02260
val reward loss: 0.00452
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 55%|█████▌    | 220/400 [2:19:22<1:56:23, 38.80s/it]

time elapsed: 2:19:22
num of updates: 22000
train total loss: 0.04600
train state loss: 0.03845
train reward loss: 0.00754
val total loss: 0.02734
val state loss: 0.02350
val reward loss: 0.00384
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 55%|█████▌    | 221/400 [2:20:00<1:55:43, 38.79s/it]

time elapsed: 2:20:01
num of updates: 22100
train total loss: 0.04605
train state loss: 0.03854
train reward loss: 0.00751
val total loss: 0.02615
val state loss: 0.02247
val reward loss: 0.00368
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 56%|█████▌    | 222/400 [2:20:40<1:56:08, 39.15s/it]

time elapsed: 2:20:41
num of updates: 22200
train total loss: 0.04570
train state loss: 0.03814
train reward loss: 0.00756
val total loss: 0.02609
val state loss: 0.02236
val reward loss: 0.00372
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 56%|█████▌    | 223/400 [2:21:19<1:54:49, 38.92s/it]

time elapsed: 2:21:19
num of updates: 22300
train total loss: 0.04546
train state loss: 0.03812
train reward loss: 0.00734
val total loss: 0.02659
val state loss: 0.02244
val reward loss: 0.00415
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 56%|█████▌    | 224/400 [2:22:00<1:55:57, 39.53s/it]

time elapsed: 2:22:00
num of updates: 22400
train total loss: 0.04524
train state loss: 0.03787
train reward loss: 0.00737
val total loss: 0.02588
val state loss: 0.02253
val reward loss: 0.00335
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 56%|█████▋    | 225/400 [2:22:36<1:52:31, 38.58s/it]

time elapsed: 2:22:36
num of updates: 22500
train total loss: 0.04496
train state loss: 0.03758
train reward loss: 0.00739
val total loss: 0.02565
val state loss: 0.02203
val reward loss: 0.00362
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 56%|█████▋    | 226/400 [2:23:33<2:07:52, 44.09s/it]

time elapsed: 2:23:33
num of updates: 22600
train total loss: 0.04508
train state loss: 0.03772
train reward loss: 0.00736
val total loss: 0.03070
val state loss: 0.02560
val reward loss: 0.00511
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 57%|█████▋    | 227/400 [2:24:14<2:04:37, 43.22s/it]

time elapsed: 2:24:14
num of updates: 22700
train total loss: 0.04508
train state loss: 0.03771
train reward loss: 0.00737
val total loss: 0.02764
val state loss: 0.02356
val reward loss: 0.00408
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 57%|█████▋    | 228/400 [2:24:52<1:59:34, 41.71s/it]

time elapsed: 2:24:53
num of updates: 22800
train total loss: 0.04483
train state loss: 0.03762
train reward loss: 0.00721
val total loss: 0.02637
val state loss: 0.02239
val reward loss: 0.00398
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 57%|█████▋    | 229/400 [2:25:40<2:04:00, 43.51s/it]

time elapsed: 2:25:40
num of updates: 22900
train total loss: 0.04495
train state loss: 0.03758
train reward loss: 0.00737
val total loss: 0.02680
val state loss: 0.02275
val reward loss: 0.00405
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 57%|█████▊    | 230/400 [2:26:18<1:58:53, 41.96s/it]

time elapsed: 2:26:19
num of updates: 23000
train total loss: 0.04418
train state loss: 0.03694
train reward loss: 0.00724
val total loss: 0.02484
val state loss: 0.02105
val reward loss: 0.00379
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 58%|█████▊    | 231/400 [2:26:56<1:54:25, 40.62s/it]

time elapsed: 2:26:56
num of updates: 23100
train total loss: 0.04402
train state loss: 0.03691
train reward loss: 0.00711
val total loss: 0.02751
val state loss: 0.02353
val reward loss: 0.00397
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 58%|█████▊    | 232/400 [2:27:37<1:53:49, 40.65s/it]

time elapsed: 2:27:37
num of updates: 23200
train total loss: 0.04402
train state loss: 0.03682
train reward loss: 0.00721
val total loss: 0.02594
val state loss: 0.02209
val reward loss: 0.00386
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 58%|█████▊    | 233/400 [2:28:13<1:49:34, 39.37s/it]

time elapsed: 2:28:13
num of updates: 23300
train total loss: 0.04449
train state loss: 0.03724
train reward loss: 0.00725
val total loss: 0.02871
val state loss: 0.02362
val reward loss: 0.00509
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 58%|█████▊    | 234/400 [2:28:53<1:49:10, 39.46s/it]

time elapsed: 2:28:53
num of updates: 23400
train total loss: 0.04388
train state loss: 0.03680
train reward loss: 0.00707
val total loss: 0.02643
val state loss: 0.02261
val reward loss: 0.00382
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 59%|█████▉    | 235/400 [2:29:33<1:48:59, 39.63s/it]

time elapsed: 2:29:33
num of updates: 23500
train total loss: 0.04392
train state loss: 0.03678
train reward loss: 0.00714
val total loss: 0.02856
val state loss: 0.02261
val reward loss: 0.00595
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 59%|█████▉    | 236/400 [2:30:10<1:46:30, 38.96s/it]

time elapsed: 2:30:10
num of updates: 23600
train total loss: 0.04405
train state loss: 0.03685
train reward loss: 0.00720
val total loss: 0.02851
val state loss: 0.02415
val reward loss: 0.00436
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 59%|█████▉    | 237/400 [2:30:51<1:47:26, 39.55s/it]

time elapsed: 2:30:51
num of updates: 23700
train total loss: 0.04357
train state loss: 0.03638
train reward loss: 0.00719
val total loss: 0.02462
val state loss: 0.02105
val reward loss: 0.00357
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 60%|█████▉    | 238/400 [2:31:29<1:45:27, 39.06s/it]

time elapsed: 2:31:29
num of updates: 23800
train total loss: 0.04399
train state loss: 0.03681
train reward loss: 0.00718
val total loss: 0.02452
val state loss: 0.02135
val reward loss: 0.00317
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 60%|█████▉    | 239/400 [2:32:09<1:45:45, 39.41s/it]

time elapsed: 2:32:09
num of updates: 23900
train total loss: 0.04311
train state loss: 0.03602
train reward loss: 0.00709
val total loss: 0.02607
val state loss: 0.02232
val reward loss: 0.00376
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 60%|██████    | 240/400 [2:32:48<1:44:28, 39.18s/it]

time elapsed: 2:32:48
num of updates: 24000
train total loss: 0.04347
train state loss: 0.03634
train reward loss: 0.00713
val total loss: 0.03053
val state loss: 0.02512
val reward loss: 0.00541
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 60%|██████    | 241/400 [2:33:28<1:44:10, 39.31s/it]

time elapsed: 2:33:28
num of updates: 24100
train total loss: 0.04256
train state loss: 0.03562
train reward loss: 0.00695
val total loss: 0.02409
val state loss: 0.02056
val reward loss: 0.00353
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 60%|██████    | 242/400 [2:34:12<1:47:17, 40.74s/it]

time elapsed: 2:34:12
num of updates: 24200
train total loss: 0.04311
train state loss: 0.03606
train reward loss: 0.00705
val total loss: 0.02502
val state loss: 0.02155
val reward loss: 0.00347
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 61%|██████    | 243/400 [2:34:54<1:47:32, 41.10s/it]

time elapsed: 2:34:54
num of updates: 24300
train total loss: 0.04302
train state loss: 0.03598
train reward loss: 0.00704
val total loss: 0.02428
val state loss: 0.02099
val reward loss: 0.00328
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 61%|██████    | 244/400 [2:35:38<1:49:50, 42.25s/it]

time elapsed: 2:35:39
num of updates: 24400
train total loss: 0.04289
train state loss: 0.03591
train reward loss: 0.00697
val total loss: 0.02484
val state loss: 0.02148
val reward loss: 0.00336
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 61%|██████▏   | 245/400 [2:36:19<1:47:51, 41.75s/it]

time elapsed: 2:36:19
num of updates: 24500
train total loss: 0.04299
train state loss: 0.03585
train reward loss: 0.00713
val total loss: 0.02447
val state loss: 0.02114
val reward loss: 0.00333
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 62%|██████▏   | 246/400 [2:37:01<1:46:58, 41.68s/it]

time elapsed: 2:37:01
num of updates: 24600
train total loss: 0.04232
train state loss: 0.03543
train reward loss: 0.00690
val total loss: 0.02464
val state loss: 0.02161
val reward loss: 0.00303
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 62%|██████▏   | 247/400 [2:37:39<1:44:05, 40.82s/it]

time elapsed: 2:37:40
num of updates: 24700
train total loss: 0.04217
train state loss: 0.03526
train reward loss: 0.00691
val total loss: 0.02720
val state loss: 0.02245
val reward loss: 0.00474
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 62%|██████▏   | 248/400 [2:38:21<1:43:49, 40.98s/it]

time elapsed: 2:38:21
num of updates: 24800
train total loss: 0.04244
train state loss: 0.03553
train reward loss: 0.00690
val total loss: 0.02915
val state loss: 0.02379
val reward loss: 0.00537
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 62%|██████▏   | 249/400 [2:39:20<1:56:50, 46.42s/it]

time elapsed: 2:39:20
num of updates: 24900
train total loss: 0.04240
train state loss: 0.03547
train reward loss: 0.00693
val total loss: 0.02792
val state loss: 0.02260
val reward loss: 0.00533
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 62%|██████▎   | 250/400 [2:40:00<1:51:28, 44.59s/it]

time elapsed: 2:40:00
num of updates: 25000
train total loss: 0.04240
train state loss: 0.03543
train reward loss: 0.00697
val total loss: 0.02757
val state loss: 0.02345
val reward loss: 0.00412
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 63%|██████▎   | 251/400 [2:40:46<1:51:22, 44.85s/it]

time elapsed: 2:40:46
num of updates: 25100
train total loss: 0.04210
train state loss: 0.03520
train reward loss: 0.00689
val total loss: 0.02810
val state loss: 0.02387
val reward loss: 0.00423
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 63%|██████▎   | 252/400 [2:41:27<1:47:53, 43.74s/it]

time elapsed: 2:41:27
num of updates: 25200
train total loss: 0.04185
train state loss: 0.03497
train reward loss: 0.00687
val total loss: 0.02609
val state loss: 0.02229
val reward loss: 0.00380
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 63%|██████▎   | 253/400 [2:42:08<1:45:24, 43.02s/it]

time elapsed: 2:42:08
num of updates: 25300
train total loss: 0.04142
train state loss: 0.03471
train reward loss: 0.00671
val total loss: 0.02670
val state loss: 0.02250
val reward loss: 0.00419
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 64%|██████▎   | 254/400 [2:42:54<1:47:02, 43.99s/it]

time elapsed: 2:42:55
num of updates: 25400
train total loss: 0.04117
train state loss: 0.03451
train reward loss: 0.00666
val total loss: 0.02688
val state loss: 0.02263
val reward loss: 0.00425
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 64%|██████▍   | 255/400 [2:43:35<1:43:43, 42.92s/it]

time elapsed: 2:43:35
num of updates: 25500
train total loss: 0.04187
train state loss: 0.03501
train reward loss: 0.00686
val total loss: 0.02536
val state loss: 0.02186
val reward loss: 0.00350
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 64%|██████▍   | 256/400 [2:44:13<1:39:42, 41.55s/it]

time elapsed: 2:44:13
num of updates: 25600
train total loss: 0.04129
train state loss: 0.03465
train reward loss: 0.00665
val total loss: 0.02399
val state loss: 0.02055
val reward loss: 0.00344
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 64%|██████▍   | 257/400 [2:44:54<1:38:17, 41.24s/it]

time elapsed: 2:44:54
num of updates: 25700
train total loss: 0.04109
train state loss: 0.03450
train reward loss: 0.00660
val total loss: 0.02370
val state loss: 0.02030
val reward loss: 0.00340
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 64%|██████▍   | 258/400 [2:45:31<1:35:02, 40.16s/it]

time elapsed: 2:45:31
num of updates: 25800
train total loss: 0.04162
train state loss: 0.03488
train reward loss: 0.00674
val total loss: 0.02271
val state loss: 0.01992
val reward loss: 0.00279
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 65%|██████▍   | 259/400 [2:46:09<1:32:41, 39.44s/it]

time elapsed: 2:46:09
num of updates: 25900
train total loss: 0.04030
train state loss: 0.03391
train reward loss: 0.00639
val total loss: 0.02485
val state loss: 0.02105
val reward loss: 0.00379
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 65%|██████▌   | 260/400 [2:46:51<1:33:42, 40.16s/it]

time elapsed: 2:46:51
num of updates: 26000
train total loss: 0.04190
train state loss: 0.03503
train reward loss: 0.00687
val total loss: 0.02456
val state loss: 0.02144
val reward loss: 0.00312
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 65%|██████▌   | 261/400 [2:47:29<1:31:52, 39.66s/it]

time elapsed: 2:47:30
num of updates: 26100
train total loss: 0.04100
train state loss: 0.03429
train reward loss: 0.00670
val total loss: 0.02501
val state loss: 0.02153
val reward loss: 0.00347
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 66%|██████▌   | 262/400 [2:48:10<1:31:32, 39.80s/it]

time elapsed: 2:48:10
num of updates: 26200
train total loss: 0.04107
train state loss: 0.03436
train reward loss: 0.00671
val total loss: 0.02304
val state loss: 0.01944
val reward loss: 0.00361
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 66%|██████▌   | 263/400 [2:48:50<1:31:10, 39.93s/it]

time elapsed: 2:48:50
num of updates: 26300
train total loss: 0.04115
train state loss: 0.03450
train reward loss: 0.00665
val total loss: 0.02391
val state loss: 0.02091
val reward loss: 0.00300
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 66%|██████▌   | 264/400 [2:49:30<1:30:43, 40.03s/it]

time elapsed: 2:49:30
num of updates: 26400
train total loss: 0.04056
train state loss: 0.03394
train reward loss: 0.00662
val total loss: 0.02941
val state loss: 0.02283
val reward loss: 0.00657
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 66%|██████▋   | 265/400 [2:50:11<1:30:27, 40.21s/it]

time elapsed: 2:50:11
num of updates: 26500
train total loss: 0.04117
train state loss: 0.03438
train reward loss: 0.00679
val total loss: 0.02525
val state loss: 0.02086
val reward loss: 0.00439
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 66%|██████▋   | 266/400 [2:50:48<1:27:59, 39.40s/it]

time elapsed: 2:50:48
num of updates: 26600
train total loss: 0.04042
train state loss: 0.03384
train reward loss: 0.00658
val total loss: 0.02546
val state loss: 0.02125
val reward loss: 0.00421
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 67%|██████▋   | 267/400 [2:51:30<1:28:49, 40.07s/it]

time elapsed: 2:51:30
num of updates: 26700
train total loss: 0.04092
train state loss: 0.03426
train reward loss: 0.00666
val total loss: 0.02617
val state loss: 0.02192
val reward loss: 0.00425
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 67%|██████▋   | 268/400 [2:52:18<1:33:48, 42.64s/it]

time elapsed: 2:52:19
num of updates: 26800
train total loss: 0.04003
train state loss: 0.03351
train reward loss: 0.00652
val total loss: 0.02258
val state loss: 0.01936
val reward loss: 0.00323
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 67%|██████▋   | 269/400 [2:53:43<2:00:52, 55.37s/it]

time elapsed: 2:53:44
num of updates: 26900
train total loss: 0.04025
train state loss: 0.03371
train reward loss: 0.00653
val total loss: 0.02633
val state loss: 0.02132
val reward loss: 0.00501
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 68%|██████▊   | 270/400 [2:55:20<2:26:34, 67.65s/it]

time elapsed: 2:55:20
num of updates: 27000
train total loss: 0.03971
train state loss: 0.03337
train reward loss: 0.00635
val total loss: 0.02522
val state loss: 0.02154
val reward loss: 0.00368
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 68%|██████▊   | 271/400 [2:56:08<2:12:53, 61.81s/it]

time elapsed: 2:56:08
num of updates: 27100
train total loss: 0.04001
train state loss: 0.03354
train reward loss: 0.00647
val total loss: 0.02299
val state loss: 0.01928
val reward loss: 0.00371
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 68%|██████▊   | 272/400 [2:57:01<2:06:28, 59.29s/it]

time elapsed: 2:57:02
num of updates: 27200
train total loss: 0.03993
train state loss: 0.03344
train reward loss: 0.00649
val total loss: 0.02355
val state loss: 0.02006
val reward loss: 0.00349
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 68%|██████▊   | 273/400 [2:58:03<2:07:05, 60.04s/it]

time elapsed: 2:58:03
num of updates: 27300
train total loss: 0.04014
train state loss: 0.03369
train reward loss: 0.00646
val total loss: 0.02375
val state loss: 0.02018
val reward loss: 0.00358
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 68%|██████▊   | 274/400 [2:59:05<2:06:57, 60.45s/it]

time elapsed: 2:59:05
num of updates: 27400
train total loss: 0.03992
train state loss: 0.03341
train reward loss: 0.00652
val total loss: 0.02496
val state loss: 0.02107
val reward loss: 0.00389
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 69%|██████▉   | 275/400 [2:59:54<1:59:13, 57.23s/it]

time elapsed: 2:59:55
num of updates: 27500
train total loss: 0.03945
train state loss: 0.03305
train reward loss: 0.00639
val total loss: 0.02407
val state loss: 0.02025
val reward loss: 0.00381
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 69%|██████▉   | 276/400 [3:00:48<1:55:47, 56.03s/it]

time elapsed: 3:00:48
num of updates: 27600
train total loss: 0.03969
train state loss: 0.03324
train reward loss: 0.00645
val total loss: 0.02268
val state loss: 0.01941
val reward loss: 0.00328
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 69%|██████▉   | 277/400 [3:01:40<1:52:32, 54.89s/it]

time elapsed: 3:01:40
num of updates: 27700
train total loss: 0.03928
train state loss: 0.03284
train reward loss: 0.00644
val total loss: 0.02184
val state loss: 0.01899
val reward loss: 0.00285
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 70%|██████▉   | 278/400 [3:02:25<1:45:50, 52.05s/it]

time elapsed: 3:02:25
num of updates: 27800
train total loss: 0.03901
train state loss: 0.03271
train reward loss: 0.00630
val total loss: 0.02501
val state loss: 0.02151
val reward loss: 0.00350
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 70%|██████▉   | 279/400 [3:03:20<1:46:43, 52.92s/it]

time elapsed: 3:03:20
num of updates: 27900
train total loss: 0.03938
train state loss: 0.03294
train reward loss: 0.00644
val total loss: 0.02315
val state loss: 0.01963
val reward loss: 0.00352
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 70%|███████   | 280/400 [3:04:12<1:45:05, 52.54s/it]

time elapsed: 3:04:12
num of updates: 28000
train total loss: 0.03936
train state loss: 0.03292
train reward loss: 0.00644
val total loss: 0.02302
val state loss: 0.01959
val reward loss: 0.00343
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 70%|███████   | 281/400 [3:05:00<1:41:55, 51.39s/it]

time elapsed: 3:05:01
num of updates: 28100
train total loss: 0.03940
train state loss: 0.03299
train reward loss: 0.00642
val total loss: 0.02568
val state loss: 0.02160
val reward loss: 0.00408
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 70%|███████   | 282/400 [3:05:53<1:41:34, 51.65s/it]

time elapsed: 3:05:53
num of updates: 28200
train total loss: 0.03857
train state loss: 0.03232
train reward loss: 0.00624
val total loss: 0.02473
val state loss: 0.02040
val reward loss: 0.00433
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 71%|███████   | 283/400 [3:06:39<1:37:26, 49.97s/it]

time elapsed: 3:06:39
num of updates: 28300
train total loss: 0.03872
train state loss: 0.03249
train reward loss: 0.00623
val total loss: 0.02511
val state loss: 0.02070
val reward loss: 0.00441
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 71%|███████   | 284/400 [3:07:27<1:35:51, 49.58s/it]

time elapsed: 3:07:28
num of updates: 28400
train total loss: 0.03875
train state loss: 0.03230
train reward loss: 0.00645
val total loss: 0.02267
val state loss: 0.01907
val reward loss: 0.00361
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 71%|███████▏  | 285/400 [3:08:21<1:37:05, 50.65s/it]

time elapsed: 3:08:21
num of updates: 28500
train total loss: 0.03834
train state loss: 0.03214
train reward loss: 0.00620
val total loss: 0.02465
val state loss: 0.02122
val reward loss: 0.00343
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 72%|███████▏  | 286/400 [3:09:09<1:34:43, 49.86s/it]

time elapsed: 3:09:09
num of updates: 28600
train total loss: 0.03851
train state loss: 0.03232
train reward loss: 0.00619
val total loss: 0.02287
val state loss: 0.01952
val reward loss: 0.00335
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 72%|███████▏  | 287/400 [3:10:01<1:35:12, 50.56s/it]

time elapsed: 3:10:01
num of updates: 28700
train total loss: 0.03839
train state loss: 0.03215
train reward loss: 0.00624
val total loss: 0.02527
val state loss: 0.02099
val reward loss: 0.00428
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 72%|███████▏  | 288/400 [3:11:12<1:45:48, 56.68s/it]

time elapsed: 3:11:12
num of updates: 28800
train total loss: 0.03887
train state loss: 0.03247
train reward loss: 0.00640
val total loss: 0.02378
val state loss: 0.01996
val reward loss: 0.00382
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 72%|███████▏  | 289/400 [3:12:02<1:41:14, 54.72s/it]

time elapsed: 3:12:02
num of updates: 28900
train total loss: 0.03815
train state loss: 0.03206
train reward loss: 0.00609
val total loss: 0.02445
val state loss: 0.02084
val reward loss: 0.00361
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 72%|███████▎  | 290/400 [3:12:56<1:40:10, 54.64s/it]

time elapsed: 3:12:57
num of updates: 29000
train total loss: 0.03849
train state loss: 0.03208
train reward loss: 0.00640
val total loss: 0.02406
val state loss: 0.02026
val reward loss: 0.00380
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 73%|███████▎  | 291/400 [3:13:59<1:43:33, 57.00s/it]

time elapsed: 3:13:59
num of updates: 29100
train total loss: 0.03807
train state loss: 0.03189
train reward loss: 0.00619
val total loss: 0.02301
val state loss: 0.01919
val reward loss: 0.00382
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 73%|███████▎  | 292/400 [3:14:45<1:36:55, 53.85s/it]

time elapsed: 3:14:46
num of updates: 29200
train total loss: 0.03809
train state loss: 0.03197
train reward loss: 0.00612
val total loss: 0.02228
val state loss: 0.01895
val reward loss: 0.00333
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 73%|███████▎  | 293/400 [3:15:34<1:33:29, 52.42s/it]

time elapsed: 3:15:35
num of updates: 29300
train total loss: 0.03867
train state loss: 0.03216
train reward loss: 0.00650
val total loss: 0.02342
val state loss: 0.01955
val reward loss: 0.00386
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 74%|███████▎  | 294/400 [3:16:13<1:25:16, 48.27s/it]

time elapsed: 3:16:13
num of updates: 29400
train total loss: 0.03828
train state loss: 0.03208
train reward loss: 0.00620
val total loss: 0.02306
val state loss: 0.01984
val reward loss: 0.00322
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 74%|███████▍  | 295/400 [3:17:04<1:26:04, 49.19s/it]

time elapsed: 3:17:05
num of updates: 29500
train total loss: 0.03725
train state loss: 0.03123
train reward loss: 0.00602
val total loss: 0.02291
val state loss: 0.01937
val reward loss: 0.00354
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 74%|███████▍  | 296/400 [3:17:56<1:26:35, 49.95s/it]

time elapsed: 3:17:56
num of updates: 29600
train total loss: 0.03814
train state loss: 0.03181
train reward loss: 0.00633
val total loss: 0.02396
val state loss: 0.02019
val reward loss: 0.00376
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt


 74%|███████▍  | 297/400 [3:18:44<1:24:31, 49.24s/it]

time elapsed: 3:18:44
num of updates: 29700
train total loss: 0.03805
train state loss: 0.03171
train reward loss: 0.00634
val total loss: 0.02340
val state loss: 0.01969
val reward loss: 0.00371
saving min loss model at: ./log/dt_halfcheetah_model_400_64_best.pt
saving current model at: ./log/dt_halfcheetah_model_400_64.pt
