In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
from hydra import initialize, initialize_config_dir, initialize_config_module, compose
from omegaconf import OmegaConf
from common.parser import parse_cfg
from common import MODEL_SIZE, TASK_SET
import re
from pathlib import Path
import torch
from common.buffer import Buffer, ReplayBuffer

with initialize(version_base=None, config_path='.'):
    cfg = compose(config_name='config.yaml')

    # Logic
    for k in cfg.keys():
        try:
            v = cfg[k]
            if v == None:
                v = True
        except:
            pass

    # Algebraic expressions
    for k in cfg.keys():
        try:
            v = cfg[k]
            if isinstance(v, str):
                match = re.match(r"(\d+)([+\-*/])(\d+)", v)
                if match:
                    cfg[k] = eval(match.group(1) + match.group(2) + match.group(3))
                    if isinstance(cfg[k], float) and cfg[k].is_integer():
                        cfg[k] = int(cfg[k])
        except:
            pass

    cfg.buffer_size = cfg.demo_buffer_size
    cfg.force_sparse = True
    demo_id = 2
    demo_eps = cfg.demo_n_eps

    # Convenience
    cfg.work_dir = '.'
    cfg.task_title = cfg.task.replace("-", " ").title()
    cfg.bin_size = (cfg.vmax - cfg.vmin) / (cfg.num_bins-1) # Bin size for discrete regression

    # Model size
    if cfg.get('model_size', None) is not None:
        assert cfg.model_size in MODEL_SIZE.keys(), \
            f'Invalid model size {cfg.model_size}. Must be one of {list(MODEL_SIZE.keys())}'
        for k, v in MODEL_SIZE[cfg.model_size].items():
            cfg[k] = v
        if cfg.task == 'mt30' and cfg.model_size == 19:
            cfg.latent_dim = 512 # This checkpoint is slightly smaller

    # Multi-task
    cfg.multitask = cfg.task in TASK_SET.keys()
    if cfg.multitask:
        cfg.task_title = cfg.task.upper()
        # Account for slight inconsistency in task_dim for the mt30 experiments
        cfg.task_dim = 96 if cfg.task == 'mt80' or cfg.model_size in {1, 317} else 64
    else:
        cfg.task_dim = 0
    cfg.tasks = TASK_SET.get(cfg.task, [cfg.task])





In [3]:
from envs import make_env
env = make_env(cfg)

Pusht force sparse reward:  True  display_cross:  False


In [4]:
import cv2
import numpy as np
from pynput import mouse
import time

from tensordict.tensordict import TensorDict
def to_td(obs, env, action=None, reward=None):
    """Creates a TensorDict for a new episode."""
    if isinstance(obs, dict):
        obs = TensorDict(obs, batch_size=(), device='cpu')
    else:
        obs = obs.unsqueeze(0).cpu()
    if action is None:
        action = torch.full_like(env.rand_act(), float('nan'))
    if reward is None:
        reward = torch.tensor(float('nan'))
    td = TensorDict(dict(
        obs=obs,
        action=action.unsqueeze(0),
        reward=reward.unsqueeze(0),
    ), batch_size=(1,))
    return td



In [5]:
buffer = Buffer(cfg)


In [6]:

global next_action
next_action = np.array([0,0])

minx, maxx = 500, 1500
miny, maxy = 100, 1200
def on_move(x, y):
    if x < minx or x > maxx or y < miny or y > maxy: return np.array([0, 0])
    
    xnorm = (x - minx) / (maxx - minx)
    ynorm = (y - miny) / (maxy - miny)

    xnorm = max(0, min(1, xnorm))
    ynorm = max(0, min(1, ynorm))

    xnorm = 2 * xnorm - 1
    ynorm = 2 * ynorm - 1

    # print(f'Pointer moved to {(x, y)} -> {xnorm, ynorm}')
    global next_action
    next_action = np.array([xnorm, ynorm])

# Create a listener
listener = mouse.Listener(on_move=on_move)

# Start the listener
listener.start()
cv2.destroyAllWindows()
obs, done, ep_reward, t = env.reset(), False, 0, 0
tds = [to_td(obs, env)]
eps = 0; ts = 0; successes = 0; rewards = 0
while eps < demo_eps:
    # action = torch.Tensor(env.action_space.sample())
    action = torch.Tensor(next_action)
    obs, reward, done, info = env.step(action)
    rewards += reward
    # print(obs.shape, reward, action)
    tds.append(to_td(obs, env, action, reward))

    if done or ts >= 300:
        print(f"ep {eps} reward {rewards}"); rewards = 0
        successes += 1 if info['success'] else 0
        eps += 1; ts = 0
        buffer.add(torch.cat(tds))
        obs, done, ep_reward, t = env.reset(), False, 0, 0
        tds = [to_td(obs, env)]


    img = obs.detach().cpu().numpy()
    # Step 1: Reshape the stack into separate images
    img = img.transpose(1, 2, 0)
    # reshaped = np.hstack([img[:,:,i*3:(i*3)+3] for i in range(3)])
    reshaped = img[:, :, -3:]
    reshaped = cv2.resize(reshaped, (reshaped.shape[1] * 3, reshaped.shape[0] * 3), interpolation=cv2.INTER_NEAREST)

    # Step 3: Display using OpenCV
    cv2.imshow('row', reshaped)
    k = cv2.waitKey(100) & 0xFF
    if k == 27: 
        buffer.add(torch.cat(tds))
        break

    ts += 1




Success!
ep 0 reward 1.0
Buffer capacity: 20,000
Storage required: 0.74 GB
Using CUDA memory for storage.
Success!
ep 1 reward 1.0
Success!
ep 2 reward 1.0
Success!
ep 3 reward 1.0
Success!
ep 4 reward 1.0
Success!
ep 5 reward 1.0
Success!
ep 6 reward 1.0
Success!
ep 7 reward 1.0
Success!
ep 8 reward 1.0
Success!
ep 9 reward 1.0
Success!
ep 10 reward 1.0
Success!
ep 11 reward 1.0
Success!
ep 12 reward 1.0
Success!
ep 13 reward 1.0
Success!
ep 14 reward 1.0
Success!
ep 15 reward 1.0
Success!
ep 16 reward 1.0
Success!
ep 17 reward 1.0
Success!
ep 18 reward 1.0
Success!
ep 19 reward 1.0
Success!
ep 20 reward 1.0
Success!
ep 21 reward 1.0
Success!
ep 22 reward 1.0
Success!
ep 23 reward 1.0
Success!
ep 24 reward 1.0
Success!
ep 25 reward 1.0
Success!
ep 26 reward 1.0
Success!
ep 27 reward 1.0
Success!
ep 28 reward 1.0
Success!
ep 29 reward 1.0
Success!
ep 30 reward 1.0
Success!
ep 31 reward 1.0
Success!
ep 32 reward 1.0
Success!
ep 33 reward 1.0
Success!
ep 34 reward 1.0
Success!
ep 35 rewa

KeyboardInterrupt: 

In [7]:
cv2.destroyAllWindows()
listener.stop()
print(f"Rand {eps} eps with {successes} successes.")


Rand 50 eps with 50 successes.


In [8]:

import os
path = os.path.expanduser(f"~/workspace/tdmpc2/demonstrations/HD_{demo_id}" + "_sparse" if cfg.force_sparse else '')
buffer.save(path)
len(buffer._buffer.storage)

Buffer saved to: /home/j/workspace/tdmpc2/demonstrations/HD_2_sparse


5275

In [11]:
from tensordict.tensordict import TensorDict
def to_td(obs, action=None, reward=None):
    """Creates a TensorDict for a new episode."""
    if isinstance(obs, dict):
        obs = TensorDict(obs, batch_size=(), device=device)
    else:
        obs = obs.unsqueeze(0).cpu()
    if action is None or reward is None:
        raise ValueError
    td = TensorDict(dict(
        obs=obs,
        action=action.unsqueeze(0),
        reward=reward.unsqueeze(0),
    ), batch_size=(1,))
    return td

device = 'cpu'
from torchrl.data.replay_buffers import LazyTensorStorage	
buf = Buffer(cfg)
buf._buffer = buf._reserve_buffer(LazyTensorStorage(buf.capacity, device=torch.device(device)))
names = ["HD_0_sparse", "HD_1_sparse", "HD_2_sparse"]
base_path = os.path.expanduser("~/workspace/tdmpc2/demonstrations/")
for demo_name in names:
    demo_path = base_path + "/" + demo_name
    device = 'cuda' if 'cluster' in demo_path else 'cpu'
    
    load_buffer = Buffer(cfg) # HACK
    load_buffer._capacity = cfg.demo_buffer_size
    load_buffer._buffer = load_buffer._reserve_buffer(LazyTensorStorage(cfg.demo_buffer_size, device=torch.device(device)))
    if not (load_buffer.load(os.path.expanduser(demo_path))):
        raise FileNotFoundError(f"Could not load buffer at {demo_path}")

    print(f"Loading from {demo_path}, transitions (3 frames): {len(load_buffer._buffer.storage)}")


    # show the demos:
    tds = []
    n_ep = 0; r = torch.tensor(0.0, device=device)
    for i in range(len(load_buffer._buffer.storage)):
        obs, action, reward, episode = load_buffer._buffer[i]["obs"], load_buffer._buffer[i]["action"], load_buffer._buffer[i]["reward"], load_buffer._buffer[i]["episode"]
        if 'cluster' not in demo_path: # dont show images on the cluster
            toshow = obs.detach().cpu().numpy().transpose(1,2,0); title = str(demo_path.split('/')[-1])
            cv2.imshow(title+'0', toshow[:, :, :3]); cv2.imshow(title+'1', toshow[:, :, 3:6]); cv2.imshow(title+'2', toshow[:, :, 6:])
            cv2.waitKey(1)
        if not tds or episode > n_ep:
            if tds:
                print(f"Adding episode ", episode, f" with r {r.detach().cpu()}"); r = torch.tensor(0.0, device=device)
                buf.add(torch.cat(tds))
            tds = [to_td(obs, action, reward)]
            n_ep = episode
        else:
            tds.append(to_td(obs, action, reward))
        if not torch.isnan(reward): r += reward

Buffer loaded from /home/j/workspace/tdmpc2/demonstrations//HD_0_sparse
Loading from /home/j/workspace/tdmpc2/demonstrations//HD_0_sparse, transitions (3 frames): 8882
Adding episode  tensor(1)  with r 1.0
Buffer capacity: 20,000
Storage required: 0.74 GB
Using CUDA memory for storage.
Adding episode  tensor(2)  with r 1.0
Adding episode  tensor(3)  with r 1.0
Adding episode  tensor(4)  with r 1.0
Adding episode  tensor(5)  with r 1.0
Adding episode  tensor(6)  with r 1.0
Adding episode  tensor(7)  with r 1.0
Adding episode  tensor(8)  with r 1.0
Adding episode  tensor(9)  with r 1.0
Adding episode  tensor(10)  with r 1.0
Adding episode  tensor(11)  with r 1.0
Adding episode  tensor(12)  with r 1.0
Adding episode  tensor(13)  with r 1.0
Adding episode  tensor(14)  with r 1.0
Adding episode  tensor(15)  with r 1.0
Adding episode  tensor(16)  with r 1.0
Adding episode  tensor(17)  with r 1.0
Adding episode  tensor(18)  with r 1.0
Adding episode  tensor(19)  with r 1.0
Adding episode  ten

In [13]:

agg_numbers = [entry.split('_')[1] for entry in names]

import os
path = os.path.expanduser(f"~/workspace/tdmpc2/demonstrations/AGG_HD_{'_'.join(agg_numbers)}" + "_sparse" if cfg.force_sparse else '')
buffer.save(path)
len(buffer._buffer.storage)

Buffer saved to: /home/j/workspace/tdmpc2/demonstrations/AGG_HD_0_1_2_sparse


5275

: 