# 🚀 Deploy a Trained Policy

This notebook guides you through deploying a trained policy to a physical robot. 

### Process:
1.  **Configure**: Set the path to your trained model and the robot's server address.
2.  **Load**: The `policy_loader` automatically loads the model and its training configuration.
3.  **Deploy**: The `deploy_policy` function starts the inference loop and sends commands to the robot.

The deployment script automatically handles details like the action space (`tcp`, `joint`, etc.) based on the loaded training configuration.

# 0. Add additional helpers for the dataset evaluation

In [62]:
# limitations under the License.

import argparse
import time
from pathlib import Path

import grpc

# Lerobot Environment Bug
import numpy as np
import torch

from example_policies.robot_deploy.action_translator import ActionMode, ActionTranslator
from example_policies.robot_deploy.debug_helpers.utils import print_info
from example_policies.robot_deploy.policy_loader import load_policy
from example_policies.robot_deploy.robot_io.robot_interface import RobotInterface
from example_policies.robot_deploy.robot_io.robot_service import (
    robot_service_pb2,
    robot_service_pb2_grpc,
)

TIME_TO_GET_INTO_STARTING_POSITION_S = 3


def set_tcp_position(service_stub, cfg, position: list[float]):
    pos = torch.tensor(position).unsqueeze(0)
    
    robot_interface = RobotInterface(service_stub, cfg)
    robot_interface.send_action(position, ActionMode.ABS_TCP)


def inference_loop(
    policy, cfg, hz: float, service_stub: robot_service_pb2_grpc.RobotServiceStub, observation_callback_fn=None, steps=None,
    starting_position=None
):
    robot_interface = RobotInterface(service_stub, cfg)
    model_to_action_trans = ActionTranslator(cfg)

    step = 0
    done = False

    if starting_position is not None:
        print("Starting position provided, moving the robot to the starting position")
        set_tcp_position(service_stub, cfg, starting_position)
        time.sleep(TIME_TO_GET_INTO_STARTING_POSITION_S)
        print("Wait to get into the position finished")
        # TODO (could actually fetch a position and compare).
    
    # Inference Loop
    print("Starting inference loop...")
    period = 1.0 / hz

    while not done and (steps is None or step < steps):
        start_time = time.time()
        # print(policy.config.input_features)
        print("Step:", step)
        observation = robot_interface.get_observation(cfg.device, show=False)

        if observation:
            if observation_callback_fn:
                observation_callback_fn(observation)
            # Predict the next action with respect to the current observation
            with torch.inference_mode():
                action = policy.select_action(observation)
                # print(f"\n=== RAW MODEL PREDICTION ===")
                # print_info(step, observation, action)
                # print()
            action = model_to_action_trans.translate(action, observation)

            # print(f"\n=== ABSOLUTE ROBOT COMMANDS ===")
            # print_info(step, observation, action)
            robot_interface.send_action(action, model_to_action_trans.action_mode)
            # policy._queues["action"].clear()

        # wait for execution to finish
        elapsed_time = time.time() - start_time
        sleep_duration = period - elapsed_time
        # print(sleep_duration)
        # wait for input
        # input("Press Enter to continue...")
        time.sleep(max(0.0, sleep_duration))

        step += 1


def deploy_policy(policy, cfg, hz: float, server: str, observation_callback_fn=None, steps=100, starting_position=None):
    channel = grpc.insecure_channel(server)
    stub = robot_service_pb2_grpc.RobotServiceStub(channel)
    try:
        inference_loop(policy, cfg, hz, stub, observation_callback_fn, steps, starting_position)
    except Exception as e:
        print(f"Error occurred: {e}")
        raise e
    finally:
        channel.close()
        print("Connection closed.")

In [63]:
import imageio
import os
import time

BLUE_BOX_TASK_DESCRIPTION = 'Put the white cube in the blue container'

class Recorder:
    def __init__(self, output_dir, task_description_default=None):
        self.rgb_left_frames = []
        self.rgb_right_frames = []
        self.rgb_static_frames = []
        self.output_dir = output_dir
        self.episode_n = 0
        os.makedirs(output_dir, exist_ok=True)
        self.task_description_default = task_description_default
        
    def process_obs(self, obs):
        if 'task' not in obs and self.task_description_default is not None:
            obs['task'] = [self.task_description_default]
    
        frame_rgb_right = process_frame(obs['observation.images.rgb_right'])
        frame_rgb_left = process_frame(obs['observation.images.rgb_left'])
        frame_rgb_static = process_frame(obs['observation.images.rgb_static'])
        self.rgb_right_frames.append(frame_rgb_right)
        self.rgb_left_frames.append(frame_rgb_left)
        self.rgb_static_frames.append(frame_rgb_static)

    def save_and_start_new(self):
        imageio.mimsave(f"{self.output_dir}/eval_episode_{self.episode_n}_right.mp4", np.stack(self.rgb_right_frames), fps=INFERENCE_FREQUENCY_HZ)
        imageio.mimsave(f"{self.output_dir}/eval_episode_{self.episode_n}_left.mp4", np.stack(self.rgb_left_frames), fps=INFERENCE_FREQUENCY_HZ)
        imageio.mimsave(f"{self.output_dir}/eval_episode_{self.episode_n}_static.mp4", np.stack(self.rgb_static_frames), fps=INFERENCE_FREQUENCY_HZ)

        self.rgb_left_frames = []
        self.rgb_right_frames = []
        self.rgb_static_frames = []
        self.episode_n +=1

        
def process_frame(frame):
    return (frame.cpu().squeeze().transpose(0, 2) * 255).to(torch.uint8)


def eval_policy(policy, cfg, hz, server, output_dir, starting_position=None, num_episodes=10, steps_per_episode=100, time_to_reset_env_s=10, task_description_default=BLUE_BOX_TASK_DESCRIPTION):

    recorder = Recorder(output_dir=output_dir, task_description_default=task_description_default)

    for episode_n in range(num_episodes):
        print(f"Starting episode {episode_n}")
        
        try:
            deploy_policy(policy, cfg, hz=INFERENCE_FREQUENCY_HZ, server=SERVER_ENDPOINT, observation_callback_fn=recorder.process_obs, steps=steps_per_episode, starting_position=starting_position)
        except Exception as e:
            print("oh no", e, e.__traceback__)
            print(e)
            raise(e)
        print(f"Saving videos for episode {episode_n}")
        recorder.save_and_start_new()
        print("Reset environment before the next episode")
        time.sleep(time_to_reset_env_s)
        

## 1. Configuration

First, specify the necessary parameters for deployment. **You must edit these values.**

In [52]:
import pathlib

# TODO: Change to the directory containing your trained policy checkpoint.
# Example: "outputs/2025-09-14/12-00-00"
CHECKPOINT_DIR = pathlib.Path("/home/jovyan/hackathon-example-policies/notebooks/outputs/train/2025-09-18/19-59-18_smolvla/")

# TODO: Change to the robot's IP address.
SERVER_ENDPOINT = "192.168.0.212:50051"

# Inference frequency in Hz. Higher values result in smoother but potentially faster movements.
INFERENCE_FREQUENCY_HZ: float = 10.0

print(f"Attempting to load policy from: {CHECKPOINT_DIR}")
print(f"Robot server endpoint: {SERVER_ENDPOINT}")
print(f"Inference frequency: {INFERENCE_FREQUENCY_HZ} Hz")

Attempting to load policy from: /home/jovyan/hackathon-example-policies/notebooks/outputs/train/2025-09-18/19-59-18_smolvla
Robot server endpoint: 192.168.0.212:50051
Inference frequency: 10.0 Hz


## 1.5 Configuration for the HOME position


In [53]:
HOME = [-0.2108,  0.6804,  0.4403, -0.0931,  0.9910,  0.0301,  0.0914,  0.1795,
         0.6926,  0.4007, -0.0163, -0.9996, -0.0122,  0.0170]


def get_current_tcp_position(service_stub, cfg):
    robot_interface = RobotInterface(service_stub, cfg)
    snapshot_response, robot_names = robot_interface.client.get_snapshot()

    tcp_state = robot_interface.observation_builder._get_tcp_state(snapshot_response, robot_names)
    return tcp_state


def set_in_home():
    set_tcp_position(stub, cfg, HOME)


To configure the new HOME position, set the robot in the desired position and run the following code (after uncommenting the HOME =...)

In [54]:
channel = grpc.insecure_channel(SERVER_ENDPOINT)
stub = robot_service_pb2_grpc.RobotServiceStub(channel)
pos = get_current_tcp_position(stub, cfg)

print(torch.tensor(pos))

# Uncomment the function to set the new home position.
# HOME = pos

tensor([-0.1969,  0.6755,  0.4361, -0.0694,  0.9929,  0.0246,  0.0938,  0.1647,
         0.6868,  0.3982, -0.0248, -0.9995, -0.0072,  0.0171])


## 2. Load the Policy

Now, we load the policy from the specified checkpoint directory. The loader will find the latest checkpoint and its corresponding configuration file.

In [55]:
from example_policies.robot_deploy import policy_loader

policy, cfg = policy_loader.load_policy(CHECKPOINT_DIR)

print("✅ Policy loaded successfully!")


Checkpoint path /home/jovyan/hackathon-example-policies/notebooks/outputs/train/2025-09-18/19-59-18_smolvla does not contain config.json, extending path.
Reducing the number of VLM layers to 16 ...
Loading weights from local directory
✅ Policy loaded successfully!


## 3. (Optional) Modify Policy Attributes

Before deployment, you can override policy attributes for experimentation. For example, you might want to adjust the action chunking (`n_action_steps`) to see how it affects robot behavior.

In [56]:
# Uncomment and modify the lines below to change policy attributes.
# For available options, refer to the lerobot policy's config documentation.

# policy.device = "cuda"  # or "cpu"
# policy.n_action_steps = 15  # Number of actions to predict in each forward pass

# print(f"Policy will run on device: {policy.device}")
# print(f"Action steps set to: {policy.n_action_steps}")

## 4. Eval policy on the robot

Finally, execute the cell below to start sending commands to the robot.

⚠️ **Warning**: This will move the physical robot. Ensure the robot has a clear and safe workspace.

In [64]:
STEPS_PER_EPISODE = 10
OUTPUT_DIR = "eval/eval1/"
STARTING_POSITION = torch.tensor(HOME)
NUM_EPISODES = 10
eval_policy(policy, cfg, hz=INFERENCE_FREQUENCY_HZ, server=SERVER_ENDPOINT, steps_per_episode=STEPS_PER_EPISODE, output_dir=OUTPUT_DIR, 
            starting_position=STARTING_POSITION, num_episodes=NUM_EPISODES)


Starting episode 0
Starting position provided, moving the robot to the starting position


  pos = torch.tensor(position).unsqueeze(0)


Wait to get into the position finished
Starting inference loop...
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Connection closed.
Saving videos for episode 0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Reset environment before the next episode
Starting episode 1
Starting position provided, moving the robot to the starting position
Wait to get into the position finished
Starting inference loop...
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Connection closed.
Saving videos for episode 1


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Reset environment before the next episode
Starting episode 2
Starting position provided, moving the robot to the starting position
Wait to get into the position finished
Starting inference loop...
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Connection closed.
Saving videos for episode 2


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Reset environment before the next episode
Starting episode 3
Starting position provided, moving the robot to the starting position
Wait to get into the position finished
Starting inference loop...
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Connection closed.
Saving videos for episode 3


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Reset environment before the next episode
Starting episode 4
Starting position provided, moving the robot to the starting position
Wait to get into the position finished
Starting inference loop...
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Connection closed.
Saving videos for episode 4


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Reset environment before the next episode
Starting episode 5
Starting position provided, moving the robot to the starting position
Wait to get into the position finished
Starting inference loop...
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Connection closed.
Saving videos for episode 5


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Reset environment before the next episode
Starting episode 6
Starting position provided, moving the robot to the starting position
Wait to get into the position finished
Starting inference loop...
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Connection closed.
Saving videos for episode 6


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Reset environment before the next episode
Starting episode 7
Starting position provided, moving the robot to the starting position
Wait to get into the position finished
Starting inference loop...
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Connection closed.
Saving videos for episode 7


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Reset environment before the next episode
Starting episode 8
Starting position provided, moving the robot to the starting position
Wait to get into the position finished
Starting inference loop...
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Connection closed.
Saving videos for episode 8


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Reset environment before the next episode
Starting episode 9
Starting position provided, moving the robot to the starting position
Wait to get into the position finished
Starting inference loop...
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Connection closed.
Saving videos for episode 9


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Reset environment before the next episode


## 5. Videos from eval

In [67]:
from IPython.display import Video


for episode in range(NUM_EPISODES):
    print(f"Evaluation videos for the episode: {episode}")
    display(Video(f"{OUTPUT_DIR}/eval_episode_{episode}_right.mp4"))
    display(Video(f"{OUTPUT_DIR}/eval_episode_{episode}_left.mp4"))
    display(Video(f"{OUTPUT_DIR}/eval_episode_{episode}_static.mp4"))

Evaluation videos for the episode: 0


Evaluation videos for the episode: 1


Evaluation videos for the episode: 2


Evaluation videos for the episode: 3


Evaluation videos for the episode: 4


Evaluation videos for the episode: 5


Evaluation videos for the episode: 6


Evaluation videos for the episode: 7


Evaluation videos for the episode: 8


Evaluation videos for the episode: 9
