In [58]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [59]:
import os
from hydra import initialize, initialize_config_dir, initialize_config_module, compose
from omegaconf import OmegaConf
from common.parser import parse_cfg
from common import MODEL_SIZE, TASK_SET
import re
from pathlib import Path
import torch

from common.buffer import ReplayBuffer

with initialize(version_base=None, config_path='.'):
    cfg = compose(config_name='config.yaml')

    # Logic
    for k in cfg.keys():
        try:
            v = cfg[k]
            if v == None:
                v = True
        except:
            pass

    # Algebraic expressions
    for k in cfg.keys():
        try:
            v = cfg[k]
            if isinstance(v, str):
                match = re.match(r"(\d+)([+\-*/])(\d+)", v)
                if match:
                    cfg[k] = eval(match.group(1) + match.group(2) + match.group(3))
                    if isinstance(cfg[k], float) and cfg[k].is_integer():
                        cfg[k] = int(cfg[k])
        except:
            pass

    cfg.buffer_size = 50000
    # Convenience
    cfg.work_dir = '.'
    cfg.task_title = cfg.task.replace("-", " ").title()
    cfg.bin_size = (cfg.vmax - cfg.vmin) / (cfg.num_bins-1) # Bin size for discrete regression

    # Model size
    if cfg.get('model_size', None) is not None:
        assert cfg.model_size in MODEL_SIZE.keys(), \
            f'Invalid model size {cfg.model_size}. Must be one of {list(MODEL_SIZE.keys())}'
        for k, v in MODEL_SIZE[cfg.model_size].items():
            cfg[k] = v
        if cfg.task == 'mt30' and cfg.model_size == 19:
            cfg.latent_dim = 512 # This checkpoint is slightly smaller

    # Multi-task
    cfg.multitask = cfg.task in TASK_SET.keys()
    if cfg.multitask:
        cfg.task_title = cfg.task.upper()
        # Account for slight inconsistency in task_dim for the mt30 experiments
        cfg.task_dim = 96 if cfg.task == 'mt80' or cfg.model_size in {1, 317} else 64
    else:
        cfg.task_dim = 0
    cfg.tasks = TASK_SET.get(cfg.task, [cfg.task])



In [60]:
from envs import make_env
env = make_env(cfg)

Pusht force sparse reward:  False


In [61]:
import cv2
import numpy as np
from pynput import mouse
import time

from tensordict.tensordict import TensorDict
def to_td(obs, env, action=None, reward=None):
    """Creates a TensorDict for a new episode."""
    if isinstance(obs, dict):
        obs = TensorDict(obs, batch_size=(), device='cpu')
    else:
        obs = obs.unsqueeze(0).cpu()
    if action is None:
        action = torch.full_like(env.rand_act(), float('nan'))
    if reward is None:
        reward = torch.tensor(float('nan'))
    td = TensorDict(dict(
        obs=obs,
        action=action.unsqueeze(0),
        reward=reward.unsqueeze(0),
    ), batch_size=(1,))
    return td



In [62]:
from common.buffer import Buffer
buffer = Buffer(cfg)

global next_action
next_action = np.array([0,0])

minx, maxx = 500, 1500
miny, maxy = 300, 700
def on_move(x, y):
    if x < minx or x > maxx or y < miny or y > maxy: return np.array([0, 0])
    
    xnorm = (x - minx) / (maxx - minx)
    ynorm = (y - miny) / (maxy - miny)

    xnorm = max(0, min(1, xnorm))
    ynorm = max(0, min(1, ynorm))

    xnorm = 2 * xnorm - 1
    ynorm = 2 * ynorm - 1

    # print(f'Pointer moved to {(x, y)} -> {xnorm, ynorm}')
    global next_action
    next_action = np.array([xnorm, ynorm])

# Create a listener
listener = mouse.Listener(on_move=on_move)

# Start the listener
listener.start()
cv2.destroyAllWindows()
obs, done, ep_reward, t = env.reset(), False, 0, 0
tds = [to_td(obs, env)]
eps = 0; ts = 0
while eps < 20:
    # action = torch.Tensor(env.action_space.sample())
    action = torch.Tensor(next_action)
    obs, reward, done, info = env.step(action)
    tds.append(to_td(obs, env, action, reward))

    if done or ts >= 300:
        eps += 1; ts = 0
        buffer.add(torch.cat(tds))
        obs, done, ep_reward, t = env.reset(), False, 0, 0
        tds = [to_td(obs, env)]


    img = obs.detach().cpu().numpy()
    # Step 1: Reshape the stack into separate images
    img = img.transpose(1, 2, 0)
    # reshaped = np.hstack([img[:,:,i*3:(i*3)+3] for i in range(3)])
    reshaped = img[:, :, -3:]
    reshaped = cv2.resize(reshaped, (reshaped.shape[1] * 3, reshaped.shape[0] * 3), interpolation=cv2.INTER_NEAREST)

    # Step 3: Display using OpenCV
    cv2.imshow('row', reshaped)
    k = cv2.waitKey(100) & 0xFF
    print(k)
    if k == 27: 
        buffer.add(torch.cat(tds))
        break

    ts += 1

cv2.destroyAllWindows()
listener.stop()
import os

path = os.path.expanduser("~/workspace/tdmpc2/demonstrations/HD_1")
buffer.save(path)

num_eps = buffer._num_eps; cap = buffer.capacity
rb_load = Buffer(cfg); rb_load.load(path)
rb_load._num_eps = num_eps; rb_load._capacity = cap

print(f"{rb_load._num_eps}")

255
255
255
255
255
255
255
255
255
255
255
27
Buffer capacity: 50,000
Storage required: 1.84 GB
Using CUDA memory for storage.


In [57]:
torch.manual_seed(0)
s1 = buffer.sample()
print(s1)

torch.manual_seed(0)
s2 = rb_load.sample()
print(s1)
print(len(rb_load._buffer))

IndexError: The shape of the mask [0] at index 0 does not match the shape of the indexed tensor [1, 1] at index 0

In [None]:
rb_load._buffer[0]["obs"].shape, rb_load._buffer[0]["action"], rb_load._buffer[0]["reward"]

In [None]:
# a = img.reshape(64, 64, 3, 3)
a = img
cv2.imshow('a', a)
cv2.waitKey(0)
cv2.destroyAllWindows()