In [1]:
import sys
import os
sys.path.append('../')

In [28]:
from pathlib import Path

import gym_pusht  # noqa: F401
import gymnasium as gym
import imageio
import numpy
import torch
from huggingface_hub import snapshot_download

from lerobot.common.policies.diffusion.modeling_diffusion import DiffusionPolicy

from lerobot.common.datasets.push_dataset_to_hub.utils import (
    concatenate_episodes,
    get_default_encoding,
    save_images_concurrently,
)
from lerobot.common.datasets.utils import (
    calculate_episode_data_index,
    hf_transform_to_torch,
)

In [3]:
output_directory = Path("outputs/eval/example_pusht_diffusion")
output_directory.mkdir(parents=True, exist_ok=True)

In [4]:
pretrained_policy_path = Path(snapshot_download("lerobot/diffusion_pusht"))

policy = DiffusionPolicy.from_pretrained(pretrained_policy_path)
policy.eval()

if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is available. Device set to:", device)
else:
    device = torch.device("cpu")
    print(f"GPU is not available. Device set to: {device}. Inference will be slower than on GPU.")
    # Decrease the number of reverse-diffusion steps (trades off a bit of quality for 10x speed)
    policy.diffusion.num_inference_steps = 10

policy = policy.to(device)

Fetching 11 files: 100%|█████████████████████████████████████████████████████████████████████████████████████████| 11/11 [00:00<00:00, 235394.61it/s]


Loading weights from local directory
GPU is available. Device set to: cuda


In [5]:
env = gym.make(
    "gym_pusht/PushT-v0",
    obs_type="pixels_agent_pos",
    max_episode_steps=300,
)

In [9]:
xs = [torch.randn(2) for _ in range(5)]
xs2 = torch.stack(xs)
xs2.size()

torch.Size([5, 2])

In [25]:
def build_ep_dict(ep_idx, 
                  observation_states: list, 
                  actions: list, 
                  rewards: list, 
                  dones: list, 
                  successes: list, 
                  frames: list, 
                  videos_dir: Path,
                  fps=10):
    num_frames = len(frames)
    assert len(observation_states) == num_frames
    assert len(actions) == num_frames
    assert len(rewards) == num_frames
    assert len(dones) == num_frames
    assert len(successes) == num_frames
    
    ep_dict = {}
    
    #  observation.image
    img_key = 'observation.image'
    video_path = videos_dir / f"{img_key}_episode_{ep_idx:06d}.mp4"
    imageio.mimsave(video_path, frames, fps=fps)
    print(f"Video of the evaluation is available in '{video_path}'.")
    ep_dict[img_key] = [
        {'path': video_path, 'timestamp': i / fps} for i in range(num_frames)   
    ]

    #  observation.state (b x n)
    ep_dict['observation.state'] = torch.stack(observation_states)
    # action (b x 2)
    ep_dict['action'] = torch.stack(actions)
    # frame_index
    ep_dict['episode_index'] = torch.tensor([ep_idx] * num_frames, dtype=torch.int64)
    ep_dict['frame_index'] = torch.arange(0, num_frames, 1)
    ep_dict['timestamp'] = torch.arange(0, num_frames, 1) / fps
    ep_dict["next.reward"] = torch.tensor(rewards)
    ep_dict["next.done"] = torch.tensor(dones)
    ep_dict["next.success"] = torch.tensor(successes)
    return ep_dict

In [26]:
output_directory = Path("outputs/eval/dataset_test")
output_directory.mkdir(parents=True, exist_ok=True)

In [27]:
num_episodes = 1

ep_dicts = []

for ep_idx in range(num_episodes):
    policy.reset()
    ep_dict = {}
    
    numpy_observation, info = env.reset(seed=42)

    frames = []
    observation_states = []
    actions = []
    rewards = []
    dones = []
    successes = []
    
    step = 0
    done = False
    while not done:
        frames.append(env.render())
        
        # Prepare observation for the policy running in Pytorch
        state = torch.from_numpy(numpy_observation["agent_pos"])
        image = torch.from_numpy(numpy_observation["pixels"])
    
        # Convert to float32 with image from channel first in [0,255]
        # to channel last in [0,1]
        state_t = state.to(torch.float32)
        image = image.to(torch.float32) / 255
        image_t = image.permute(2, 0, 1)  # c x h x w

        # Send data tensors from CPU to GPU
        state = state_t.to(device, non_blocking=True)
        image = image_t.to(device, non_blocking=True)
    
        # Add extra (empty) batch dimension, required to forward the policy
        state = state.unsqueeze(0)
        image = image.unsqueeze(0)
    
        # Create the policy input dictionary
        observation = {
            "observation.state": state,
            "observation.image": image,
        }
    
        # Predict the next action with respect to the current observation
        with torch.inference_mode():
            action = policy.select_action(observation)
    
        # Prepare the action for the environment
        action_t = action.squeeze(0).to("cpu")
        numpy_action = action_t.numpy()
    
        # Step through the environment and receive a new observation
        numpy_observation, reward, terminated, truncated, info = env.step(numpy_action)
        print(f"{step=} {reward=} {terminated=}")

        # The rollout is considered done when the success state is reach (i.e. terminated is True),
        # or the maximum number of iterations is reached (i.e. truncated is True)
        done = terminated | truncated | done
    
        # Keep track of all the rewards and frames
        observation_states.append(state_t)
        actions.append(action_t)
        rewards.append(reward)
        successes.append(terminated)
        dones.append(done)
        
        step += 1
    
    if terminated:
        print("Success!")
    else:
        print("Failure!")
    
    # Get the speed of environment (i.e. its number of frames per second).
    fps = env.metadata["render_fps"]
    
    videos_dir = output_directory / 'videos'
    videos_dir.mkdir(parents=True, exist_ok=True)
    ep_dict = build_ep_dict(ep_idx,
                            observation_states=observation_states, 
                            actions=actions, 
                            rewards=rewards, 
                            dones=dones, 
                            successes=successes, 
                            frames=frames, 
                            fps=fps,
                            videos_dir=videos_dir)
    ep_dicts.append(ep_dict)

step=0 reward=0.0 terminated=False
step=1 reward=0.0 terminated=False
step=2 reward=0.0 terminated=False
step=3 reward=0.0 terminated=False
step=4 reward=0.0 terminated=False
step=5 reward=0.0 terminated=False
step=6 reward=0.0 terminated=False
step=7 reward=0.0 terminated=False
step=8 reward=0.0 terminated=False
step=9 reward=0.0 terminated=False
step=10 reward=0.0 terminated=False
step=11 reward=0.0 terminated=False
step=12 reward=0.0 terminated=False
step=13 reward=0.0 terminated=False
step=14 reward=0.0 terminated=False
step=15 reward=0.0 terminated=False
step=16 reward=0.0 terminated=False
step=17 reward=0.0 terminated=False
step=18 reward=0.0 terminated=False
step=19 reward=0.0 terminated=False
step=20 reward=0.0 terminated=False
step=21 reward=0.0 terminated=False
step=22 reward=0.0 terminated=False
step=23 reward=0.0 terminated=False
step=24 reward=0.0 terminated=False
step=25 reward=0.0 terminated=False
step=26 reward=0.0 terminated=False
step=27 reward=0.0 terminated=False
st



step=152 reward=0.9906987247170604 terminated=False
step=153 reward=0.9906987247170604 terminated=False
step=154 reward=0.9906987247170604 terminated=False
step=155 reward=1.0 terminated=True
Success!




Video of the evaluation is available in 'outputs/eval/dataset_test/videos/observation.image_episode_000000.mp4'.


In [29]:
from IPython.display import Video

# 비디오 표시
Video(videos_dir / 'observation.image_episode_000000.mp4', embed=True, width=640, height=360)