In [1]:
import yaml

def read_yaml_to_dict(file_path):
    try:
        with open(file_path, 'r') as file:
            yaml_data = yaml.safe_load(file)
        return yaml_data
    except FileNotFoundError:
        print(f"파일을 찾을 수 없습니다: {file_path}")
        return None
    except yaml.YAMLError as e:
        print(f"YAML 파싱 오류: {e}")
        return None

In [2]:
# 사용 예시
file_path = 'config.yaml'
result = read_yaml_to_dict(file_path)

if result:
    print("YAML 내용:")
    print(result)

YAML 내용:
{'defaults': [{'override hydra/launcher': 'submitit_local'}], 'task': 'dog-run', 'obs': 'state', 'checkpoint': '???', 'eval_episodes': 10, 'eval_freq': 50000, 'steps': 10000000, 'batch_size': 256, 'reward_coef': 0.1, 'value_coef': 0.1, 'consistency_coef': 20, 'rho': 0.5, 'lr': '3e-4', 'enc_lr_scale': 0.3, 'grad_clip_norm': 20, 'tau': 0.01, 'discount_denom': 5, 'discount_min': 0.95, 'discount_max': 0.995, 'buffer_size': 1000000, 'exp_name': 'default', 'data_dir': '???', 'mpc': True, 'iterations': 6, 'num_samples': 512, 'num_elites': 64, 'num_pi_trajs': 24, 'horizon': 3, 'min_std': 0.05, 'max_std': 2, 'temperature': 0.5, 'log_std_min': -10, 'log_std_max': 2, 'entropy_coef': '1e-4', 'num_bins': 101, 'vmin': -10, 'vmax': 10, 'model_size': '???', 'num_enc_layers': 2, 'enc_dim': 256, 'num_channels': 32, 'mlp_dim': 512, 'latent_dim': 512, 'task_dim': 96, 'num_q': 5, 'dropout': 0.01, 'simnorm_dim': 8, 'wandb_project': 'dialectic-mujoco', 'wandb_entity': 'calmlab', 'wandb_silent': Fals

In [3]:
import torch

torch.cuda.is_available()

True

In [5]:
cfg = result
cfg['steps']

10000000

In [3]:
from omegaconf import OmegaConf
cfg = OmegaConf.load('config.yaml')

In [4]:
cfg.task

'dog-run'

In [5]:
domain, task = cfg.task.replace('-', '_').split('_', 1)
print (domain, task)

dog run


In [9]:
cfg.steps

10000000

In [9]:
from common.seed import set_seed
set_seed(cfg.seed)
cfg.task = 'walker-walk'

In [10]:
from pathlib import Path
work_dir = Path('/home/pipaek/project/tdmpc2') / 'logs' / cfg.task / str(cfg.seed) / cfg.exp_name

cfg.work_dir = work_dir
work_dir

PosixPath('/home/pipaek/project/tdmpc2/logs/walker-walk/1/default')

In [11]:
MODEL_SIZE = { # parameters (M)
	1:   {'enc_dim': 256,
		  'mlp_dim': 384,
		  'latent_dim': 128,
		  'num_enc_layers': 2,
		  'num_q': 2},
	5:   {'enc_dim': 256,
		  'mlp_dim': 512,
		  'latent_dim': 512,
		  'num_enc_layers': 2},
	19:  {'enc_dim': 1024,
		  'mlp_dim': 1024,
		  'latent_dim': 768,
		  'num_enc_layers': 3},
	48:  {'enc_dim': 1792,
		  'mlp_dim': 1792,
		  'latent_dim': 768,
		  'num_enc_layers': 4},
	317: {'enc_dim': 4096,
		  'mlp_dim': 4096,
		  'latent_dim': 1376,
		  'num_enc_layers': 5,
		  'num_q': 8},
}

In [12]:
TASK_SET = {
	'mt30': [
		# 19 original dmcontrol tasks
		'walker-stand', 'walker-walk', 'walker-run', 'cheetah-run', 'reacher-easy',
	    'reacher-hard', 'acrobot-swingup', 'pendulum-swingup', 'cartpole-balance', 'cartpole-balance-sparse',
		'cartpole-swingup', 'cartpole-swingup-sparse', 'cup-catch', 'finger-spin', 'finger-turn-easy',
		'finger-turn-hard', 'fish-swim', 'hopper-stand', 'hopper-hop',
		# 11 custom dmcontrol tasks
		'walker-walk-backwards', 'walker-run-backwards', 'cheetah-run-backwards', 'cheetah-run-front', 'cheetah-run-back',
		'cheetah-jump', 'hopper-hop-backwards', 'reacher-three-easy', 'reacher-three-hard', 'cup-spin',
		'pendulum-spin',
	],
	'mt80': [
		# 19 original dmcontrol tasks
		'walker-stand', 'walker-walk', 'walker-run', 'cheetah-run', 'reacher-easy',
	    'reacher-hard', 'acrobot-swingup', 'pendulum-swingup', 'cartpole-balance', 'cartpole-balance-sparse',
		'cartpole-swingup', 'cartpole-swingup-sparse', 'cup-catch', 'finger-spin', 'finger-turn-easy',
		'finger-turn-hard', 'fish-swim', 'hopper-stand', 'hopper-hop',
		# 11 custom dmcontrol tasks
		'walker-walk-backwards', 'walker-run-backwards', 'cheetah-run-backwards', 'cheetah-run-front', 'cheetah-run-back',
		'cheetah-jump', 'hopper-hop-backwards', 'reacher-three-easy', 'reacher-three-hard', 'cup-spin',
		'pendulum-spin',
		# meta-world mt50
		'mw-assembly', 'mw-basketball', 'mw-button-press-topdown', 'mw-button-press-topdown-wall', 'mw-button-press',
		'mw-button-press-wall', 'mw-coffee-button', 'mw-coffee-pull', 'mw-coffee-push', 'mw-dial-turn',
		'mw-disassemble', 'mw-door-open', 'mw-door-close', 'mw-drawer-close', 'mw-drawer-open',
		'mw-faucet-open', 'mw-faucet-close', 'mw-hammer', 'mw-handle-press-side', 'mw-handle-press',
		'mw-handle-pull-side', 'mw-handle-pull', 'mw-lever-pull', 'mw-peg-insert-side', 'mw-peg-unplug-side',
		'mw-pick-out-of-hole', 'mw-pick-place', 'mw-pick-place-wall', 'mw-plate-slide', 'mw-plate-slide-side',
		'mw-plate-slide-back', 'mw-plate-slide-back-side', 'mw-push-back', 'mw-push', 'mw-push-wall',
		'mw-reach', 'mw-reach-wall', 'mw-shelf-place', 'mw-soccer', 'mw-stick-push',
		'mw-stick-pull', 'mw-sweep-into', 'mw-sweep', 'mw-window-open', 'mw-window-close',
		'mw-bin-picking', 'mw-box-close', 'mw-door-lock', 'mw-door-unlock', 'mw-hand-insert',
	],
}

In [13]:
cfg.task_title = cfg.task.replace("-", " ").title()
cfg.bin_size = (cfg.vmax - cfg.vmin) / (cfg.num_bins-1) # Bin size for discrete regression

# Model size
if cfg.get('model_size', None) is not None:
    assert cfg.model_size in MODEL_SIZE.keys(), \
        f'Invalid model size {cfg.model_size}. Must be one of {list(MODEL_SIZE.keys())}'
    for k, v in MODEL_SIZE[cfg.model_size].items():
        cfg[k] = v
    if cfg.task == 'mt30' and cfg.model_size == 19:
        cfg.latent_dim = 512 # This checkpoint is slightly smaller

# Multi-task
cfg.multitask = cfg.task in TASK_SET.keys()
if cfg.multitask:
    cfg.task_title = cfg.task.upper()
    # Account for slight inconsistency in task_dim for the mt30 experiments
    cfg.task_dim = 96 if cfg.task == 'mt80' or cfg.model_size in {1, 317} else 64
else:
    cfg.task_dim = 0
cfg.tasks = TASK_SET.get(cfg.task, [cfg.task])

In [14]:
cfg.task_title

'Walker Walk'

In [15]:
cfg.bin_size

0.2

In [30]:
cfg.model_size   # 아직 안정해졌나봄봄

MissingMandatoryValue: Missing mandatory value: model_size
    full_key: model_size
    object_type=dict

In [16]:
cfg.multitask

False

In [17]:
cfg.task_dim = 0

In [18]:
cfg.tasks

['walker-walk']

In [19]:
from envs import make_env
env=make_env(cfg)

/home/pipaek/miniconda3/envs/tdmpc2/lib/python3.9/site-packages/glfw/__init__.py:916: GLFWError: (65544) b'X11: The DISPLAY environment variable is missing'


In [20]:
cfg.get('obs', 'state')

'state'

In [21]:
cfg.obs_shape

{'state': [24]}

In [37]:
# from envs.dmcontrol import make_env as make_dm_control_env

In [40]:
# from envs.myosuite import make_env as make_myosuite_env

In [41]:
# env = make_myosuite_env(cfg)

ValueError: ('Unknown task:', 'walker-walk')

In [43]:
from tdmpc2 import TDMPC2
agent=TDMPC2(cfg)

  fn, params, _ = combine_state_for_ensemble(modules)


In [44]:
from common.buffer import Buffer
buffer=Buffer(cfg)



In [45]:
from common.logger import Logger
logger=Logger(cfg)

------------------------------------
  [1m[32mTask:          [0m Walker Walk
  [1m[32mSteps:         [0m 10,000,000
  [1m[32mObservations:  [0m [24]
  [1m[32mActions:       [0m 6
  [1m[32mExperiment:    [0m default
------------------------------------


ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33muhmppi1[0m ([33mcalmlab[0m). Use [1m`wandb login --relogin`[0m to force relogin


[1m[34mLogs will be synced with wandb.[0m


In [46]:
from trainer.online_trainer import OnlineTrainer
trainer = OnlineTrainer(
		cfg=cfg,
		env=env,
		agent=agent,
		buffer=buffer,
		logger=logger,
	)


Architecture: WorldModel(
  (_encoder): ModuleDict(
    (state): Sequential(
      (0): NormedLinear(in_features=24, out_features=256, bias=True, act=Mish)
      (1): NormedLinear(in_features=256, out_features=512, bias=True, act=SimNorm)
    )
  )
  (_dynamics): Sequential(
    (0): NormedLinear(in_features=518, out_features=512, bias=True, act=Mish)
    (1): NormedLinear(in_features=512, out_features=512, bias=True, act=Mish)
    (2): NormedLinear(in_features=512, out_features=512, bias=True, act=SimNorm)
  )
  (_reward): Sequential(
    (0): NormedLinear(in_features=518, out_features=512, bias=True, act=Mish)
    (1): NormedLinear(in_features=512, out_features=512, bias=True, act=Mish)
    (2): Linear(in_features=512, out_features=101, bias=True)
  )
  (_pi): Sequential(
    (0): NormedLinear(in_features=512, out_features=512, bias=True, act=Mish)
    (1): NormedLinear(in_features=512, out_features=512, bias=True, act=Mish)
    (2): Linear(in_features=512, out_features=12, bias=True

In [47]:
agent.device = torch.device('cuda')
agent.device

device(type='cuda')

In [48]:
from common.world_model import WorldModel
agent.model = WorldModel(cfg).to(agent.device)

  fn, params, _ = combine_state_for_ensemble(modules)


In [49]:
for k in cfg.obs_shape.keys():
    print(k, cfg.obs_shape[k])

state [24]


In [None]:
cfg.action_dim = env.action_space.shape[0]
cfg.episode_length = env.max_episode_steps
cfg.seed_steps = max(1000, 5*cfg.episode_length)

In [50]:
env.action_space.shape[0]

6

In [51]:
env.max_episode_steps

500

In [52]:
cfg.episode_length

500

In [54]:
env.observation_space

Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf
 -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf
 inf inf inf inf inf inf], (24,), float32)

In [55]:
env.observation_space.spaces

AttributeError: 'Box' object has no attribute 'spaces'

In [56]:
env.observation_spec()

AttributeError: 'TimeStepToGymWrapper' object has no attribute 'observation_spec'

In [57]:
domain, task = cfg.task.replace('-', '_').split('_', 1)

In [58]:
domain

'walker'

In [59]:
task

'walk'

In [60]:
domain = dict(cup='ball_in_cup', pointmass='point_mass').get(domain, domain)
domain

'walker'

In [61]:
from dm_control import suite
env = suite.load(domain,
					 task,
					 task_kwargs={'random': cfg.seed},
					 visualize_reward=False)


In [62]:
env.observation_spec()

OrderedDict([('orientations',
              Array(shape=(14,), dtype=dtype('float64'), name='orientations')),
             ('height',
              Array(shape=(), dtype=dtype('float64'), name='height')),
             ('velocity',
              Array(shape=(9,), dtype=dtype('float64'), name='velocity'))])

In [63]:
env.action_spec()

BoundedArray(shape=(6,), dtype=dtype('float64'), name=None, minimum=[-1. -1. -1. -1. -1. -1.], maximum=[1. 1. 1. 1. 1. 1.])

In [22]:
for k in cfg.obs_shape.keys():
    print(k, cfg.obs_shape[k])

state [24]


In [23]:
cfg.obs_shape[k][0] + cfg.task_dim

24

In [24]:
cfg.task_dim

0

In [25]:
cfg.num_enc_layers

2

In [26]:
cfg.enc_dim

256

In [27]:
max(cfg.num_enc_layers-1, 1)*[cfg.enc_dim]

[256]

In [28]:
cfg.latent_dim

512

In [29]:
cfg.simnorm_dim

8

In [30]:
cfg.action_dim

6