# VQ_Bet Policy with Lerobot

---

- Conda env : [lerobot](../README.md#setup-a-conda-environment)

----

- Ref: 
    - https://huggingface.co/lerobot/vqbet_pusht

## Device Setup

In [1]:
import torch

if torch.backends.mps.is_available():
    device = "mps"
elif torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

print(f"Available device : {device}")

Available device : cuda


##  Train the diffusion policy for pushT environment

In [None]:
!python -m lerobot.scripts.train --output_dir=./temp/outputs/train/vqbet_pusht \
    --policy.type=vqbet \
    --dataset.repo_id=lerobot/pusht \
    --seed=100000 \
    --env.type=pusht \
    --batch_size=64 \
    --steps=200000 \
    --eval_freq=25 \
    --save_freq=2000 \
    --policy.device=$device \
    --policy.push_to_hub=false \
    --wandb.enable=false

INFO 2025-10-08 12:15:16 ts/train.py:111 {'batch_size': 64,
 'dataset': {'episodes': None,
             'image_transforms': {'enable': False,
                                  'max_num_transforms': 3,
                                  'random_order': False,
                                  'tfs': {'brightness': {'kwargs': {'brightness': [0.8,
                                                                                   1.2]},
                                                         'type': 'ColorJitter',
                                                         'weight': 1.0},
                                          'contrast': {'kwargs': {'contrast': [0.8,
                                                                               1.2]},
                                                       'type': 'ColorJitter',
                                                       'weight': 1.0},
                                          'hue': {'kwargs': {'hue': [-0.05,
                

In [2]:
!python -m lerobot.scripts.train --output_dir=./temp/outputs/train/vqbet_pusht \
    --policy.type=vqbet \
    --dataset.repo_id=lerobot/pusht \
    --seed=100000 \
    --env.type=pusht \
    --batch_size=64 \
    --steps=200000 \
    --eval_freq=1000 \
    --save_freq=2000 \
    --policy.device=$device \
    --policy.push_to_hub=false \
    --wandb.enable=false \
    --config_path=./temp/outputs/train/vqbet_pusht/checkpoints/last/pretrained_model/train_config.json \
    --resume=true

INFO 2025-10-11 12:21:46 ts/train.py:111 {'batch_size': 64,
 'dataset': {'episodes': None,
             'image_transforms': {'enable': False,
                                  'max_num_transforms': 3,
                                  'random_order': False,
                                  'tfs': {'brightness': {'kwargs': {'brightness': [0.8,
                                                                                   1.2]},
                                                         'type': 'ColorJitter',
                                                         'weight': 1.0},
                                          'contrast': {'kwargs': {'contrast': [0.8,
                                                                               1.2]},
                                                       'type': 'ColorJitter',
                                                       'weight': 1.0},
                                          'hue': {'kwargs': {'hue': [-0.05,
                

## Video evaluation of the training sequence

In [None]:
from IPython.display import HTML, display

def display_video_grid(videos, cols=2, ratio = 100):
    """
    Display videos in a grid with titles.

    Args:
        videos (list of tuples): [(filepath, title), ...]
        cols (int): number of videos per row
    """
    # CSS for grid
    style = f"""
    <style>
    .video-grid {{
      display: grid;
      grid-template-columns: repeat({cols}, 1fr);
      gap: 20px;
      margin-top: 20px;
    }}
    .video-item {{
      text-align: center;
    }}
    .video-item video {{
      width: {ratio}%;
      border-radius: 10px;
      box-shadow: 0 4px 10px rgba(0,0,0,0.2);
    }}
    .video-title {{
      margin-top: 8px;
      font-weight: bold;
      font-family: sans-serif;
    }}
    </style>
    """

    # Build video HTML
    items = ""
    for path, title in videos:
        items += f"""
        <div class="video-item">
            <video controls>
                <source src="{path}" type="video/mp4">
            </video>
            <div class="video-title">{title}</div>
        </div>
        """

    html = style + f'<div class="video-grid">{items}</div>'
    display(HTML(html))



In [None]:
steps = ["step_000025", "step_002500", "step_025000", "step_050000", "step_100000", "step_200000"]
episodes = ["episode_0", "episode_1", "episode_2", "episode_3"]

video_list = []
for episode in episodes:
    for step in steps:
        v_fp = f"./output/train_videos_pusht/videos_{step}/eval_{episode}.mp4"
        v_title = f"{step}_{episode}"
        video_list.append((v_fp, v_title))

print(video_list)

display_video_grid(video_list, cols=len(steps))