In [28]:
import os
os.environ['MUJOCO_GL'] = 'egl'
os.environ['LAZY_LEGACY_OP'] = '0'
import warnings
warnings.filterwarnings('ignore')
import torch

import hydra
from termcolor import colored

from common.parser import parse_cfg
from common.seed import set_seed
from common.buffer import Buffer
from envs import make_env
from dialectic import DialecticMPC, DialecticImitation
from trainer.offline_trainer import OfflineTrainer
from trainer.online_trainer import OnlineTrainer
from common.logger import Logger

torch.backends.cudnn.benchmark = True

torch.cuda.is_available()

True

In [29]:
import yaml

def read_yaml_to_dict(file_path):
    try:
        with open(file_path, 'r') as file:
            yaml_data = yaml.safe_load(file)
        return yaml_data
    except FileNotFoundError:
        print(f"파일을 찾을 수 없습니다: {file_path}")
        return None
    except yaml.YAMLError as e:
        print(f"YAML 파싱 오류: {e}")
        return None

In [30]:
# 사용 예시
file_path = 'dialectic_config.yaml'
result = read_yaml_to_dict(file_path)

if result:
    print("YAML 내용:")
    print(result)

YAML 내용:
{'defaults': [{'override hydra/launcher': 'submitit_local'}], 'task': 'walker-walk', 'obs': 'state', 'checkpoint': '???', 'eval_episodes': 10, 'eval_freq': 5000, 'device': 'cuda', 'steps': 10000000, 'batch_size': 256, 'reward_coef': 0.1, 'value_coef': 0.1, 'consistency_coef': 20, 'rho': 0.5, 'lr': '3e-4', 'enc_lr_scale': 0.3, 'grad_clip_norm': 20, 'tau': 0.01, 'discount_denom': 5, 'discount_min': 0.95, 'discount_max': 0.995, 'buffer_size': 1000000, 'exp_name': 'default', 'data_dir': '???', 'mpc': True, 'iterations': 6, 'num_samples': 512, 'num_elites': 64, 'num_pi_trajs': 24, 'horizon': 3, 'min_std': 0.05, 'max_std': 2, 'temperature': 0.5, 'log_std_min': -10, 'log_std_max': 2, 'entropy_coef': '1e-4', 'num_bins': 101, 'vmin': -10, 'vmax': 10, 'model_size': '???', 'num_enc_layers': 2, 'enc_dim': 256, 'num_channels': 32, 'mlp_dim': 512, 'latent_dim': 512, 'task_dim': 96, 'num_q': 5, 'dropout': 0.01, 'simnorm_dim': 8, 'wandb_project': 'dialectic-mujoco', 'wandb_entity': 'calmlab',

In [31]:
from omegaconf import OmegaConf
cfg = OmegaConf.load('dialectic_config.yaml')

In [32]:
from pathlib import Path
work_dir = Path('/home/pipaek/project/tdmpc2') / 'logs' / cfg.task / str(cfg.seed) / cfg.exp_name

cfg.work_dir = work_dir
work_dir

PosixPath('/home/pipaek/project/tdmpc2/logs/walker-walk/1/default')

In [33]:
from common import MODEL_SIZE, TASK_SET

def myparse_cfg(cfg: OmegaConf) -> OmegaConf:
	"""
	Parses a Hydra config. Mostly for convenience.
	"""

	# Logic
	for k in cfg.keys():
		try:
			v = cfg[k]
			if v == None:
				v = True
		except:
			pass

	# Algebraic expressions
	for k in cfg.keys():
		try:
			v = cfg[k]
			if isinstance(v, str):
				match = re.match(r"(\d+)([+\-*/])(\d+)", v)
				if match:
					cfg[k] = eval(match.group(1) + match.group(2) + match.group(3))
					if isinstance(cfg[k], float) and cfg[k].is_integer():
						cfg[k] = int(cfg[k])
		except:
			pass

	# Convenience
	cfg.task_title = cfg.task.replace("-", " ").title()
	cfg.bin_size = (cfg.vmax - cfg.vmin) / (cfg.num_bins-1) # Bin size for discrete regression

	# Model size
	if cfg.get('model_size', None) is not None:
		assert cfg.model_size in MODEL_SIZE.keys(), \
			f'Invalid model size {cfg.model_size}. Must be one of {list(MODEL_SIZE.keys())}'
		for k, v in MODEL_SIZE[cfg.model_size].items():
			cfg[k] = v
		if cfg.task == 'mt30' and cfg.model_size == 19:
			cfg.latent_dim = 512 # This checkpoint is slightly smaller

	# Multi-task
	cfg.multitask = cfg.task in TASK_SET.keys()
	if cfg.multitask:
		cfg.task_title = cfg.task.upper()
		# Account for slight inconsistency in task_dim for the mt30 experiments
		cfg.task_dim = 96 if cfg.task == 'mt80' or cfg.model_size in {1, 317} else 64
	else:
		cfg.task_dim = 0
	cfg.tasks = TASK_SET.get(cfg.task, [cfg.task])

	return cfg

In [34]:
cfg = myparse_cfg(cfg)

In [35]:
set_seed(cfg.seed)

In [36]:
from trainer.online_trainer import OnlineTrainer, OnlineDialecticTrainer

trainer = OnlineDialecticTrainer(
		cfg=cfg,
		env=make_env(cfg),
		agent=DialecticImitation(cfg),
		buffer_l=Buffer(cfg),
        buffer_r=Buffer(cfg),
		logger=Logger(cfg),
	)

[1m[34m### Logger Init.[0m
------------------------------------
  [1m[32mTask:          [0m Walker Walk
  [1m[32mSteps:         [0m 10,000,000
  [1m[32mObservations:  [0m [24]
  [1m[32mActions:       [0m 6
  [1m[32mExperiment:    [0m default
------------------------------------
[1m[34mWandb disabled.[0m
Architecture: DualModel(
  (_brain_l): Sequential(
    (0): NormedLinear(in_features=18, out_features=512, bias=True, act=Mish)
    (1): NormedLinear(in_features=512, out_features=512, bias=True, act=Mish)
    (2): Linear(in_features=512, out_features=6, bias=True)
  )
  (_brain_r): Sequential(
    (0): NormedLinear(in_features=18, out_features=512, bias=True, act=Mish)
    (1): NormedLinear(in_features=512, out_features=512, bias=True, act=Mish)
    (2): Linear(in_features=512, out_features=6, bias=True)
  )
)
Learnable parameters: 555,020


In [37]:
train_metrics, done, eval_next = {}, True, True

In [38]:
_step = 0
_ep_idx = 0

In [51]:
self = trainer

In [52]:
obs = self.env.reset()

In [41]:
td = self.to_td(obs)
td

TensorDict(
    fields={
        action: Tensor(shape=torch.Size([1, 6]), device=cpu, dtype=torch.float32, is_shared=False),
        obs: Tensor(shape=torch.Size([1, 24]), device=cpu, dtype=torch.float32, is_shared=False),
        reward: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.float32, is_shared=False)},
    batch_size=torch.Size([1]),
    device=None,
    is_shared=False)

In [50]:
action, is_act_left = self.agent.rand_act(self.env)
action

tensor([-0.7661, -0.2494, -0.3876,  0.2716, -0.5663,  0.6794])

In [45]:
self.agent.device

device(type='cuda')

In [68]:
self._tds_l = []
for _ in range(200):
    obs, reward, done, info = self.env.step(action)
    td = self.to_td(obs, action, reward)
    action, is_act_left = self.agent.rand_act(self.env)
    self._tds_l.append(td)
    # print(action, is_act_left, reward, done, info)

TypeError: unsupported operand type(s) for *=: 'float' and 'NoneType'

In [54]:
len(self._tds_l)

10

In [57]:
self._ep_idx = self.buffer_l.add(torch.cat(self._tds_l))
self._ep_idx

Buffer capacity: 1,000,000
Storage required: 0.13 GB
Using CUDA memory for storage.


1

In [73]:
len(self.buffer_l._buffer)

6410

In [72]:
for _ in range(10):
    self._tds_l = []
    obs = self.env.reset()
    for _ in range(1000):
        obs, reward, done, info = self.env.step(action)
        td = self.to_td(obs, action, reward)
        action, is_act_left = self.agent.rand_act(self.env)
        self._tds_l.append(td)
        if done:
            break
    self._ep_idx = self.buffer_l.add(torch.cat(self._tds_l))
    print(self._ep_idx)

10
11
12
13
14
15
16
17
18
19


In [74]:
obs, action, reward, task = self.buffer_l.sample()

In [60]:
self.buffer_l._buffer.sample()

IndexError: The shape of the mask [0] at index 0 does not match the shape of the indexed tensor [1, 1] at index 0

In [61]:
cfg.batch_size

256