# Training with BC-RNN

In [1]:
import os 
os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES'] = '2'

In [2]:
import hydra
from hydra import compose, initialize
from omegaconf import OmegaConf
import yaml
from easydict import EasyDict
from libero.libero import get_libero_path
import pprint

# load default hydra config
hydra.core.global_hydra.GlobalHydra.instance().clear()
initialize(config_path='../libero/configs')
hydra_cfg = compose(config_name="easy_config")
yaml_config = OmegaConf.to_yaml(hydra_cfg)
cfg = EasyDict(yaml.safe_load(yaml_config))

# prepare lifelong learning
cfg.folder = get_libero_path("datasets")
cfg.bddl_folder = get_libero_path("bddl_files")
cfg.init_states_folder = get_libero_path("init_states")
cfg.eval.num_procs = 5
cfg.eval.n_eval = 5
cfg.train.n_epochs = 50
cfg.benchmark_name = "libero_object" # can be from {"libero_spatial", "libero_object", "libero_goal", "libero_10"}

pp = pprint.PrettyPrinter(indent=2)
pp.pprint(cfg.policy)

The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  initialize(config_path='../libero/configs')


{ 'color_aug': { 'network': 'BatchWiseImgColorJitterAug',
                 'network_kwargs': { 'brightness': 0.3,
                                     'contrast': 0.3,
                                     'epsilon': 0.1,
                                     'hue': 0.3,
                                     'input_shape': None,
                                     'saturation': 0.3}},
  'image_embed_size': 64,
  'image_encoder': { 'network': 'ResnetEncoder',
                     'network_kwargs': { 'freeze': False,
                                         'language_fusion': 'film',
                                         'no_stride': False,
                                         'pretrained': False,
                                         'remove_layer_num': 4}},
  'language_encoder': { 'network': 'MLPEncoder',
                        'network_kwargs': { 'hidden_size': 128,
                                            'input_size': 768,
                                            'num

In [3]:
from libero.libero import benchmark
from libero.libero.benchmark import get_benchmark

task_order = cfg.data.task_order_index # can be from {0 .. 21}, default to 0, which is [task 0, 1, 2 ...]
benchmark = get_benchmark(cfg.benchmark_name)(task_order)

[info] using task orders [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


In [4]:
from libero.lifelong.datasets import SequenceVLDataset, get_dataset
from libero.lifelong.utils import get_task_embs
import os
# prepare datasets from the benchmark
datasets = []
descriptions = []
n_tasks = benchmark.n_tasks #hopefully unused
task_id = 0
assert 0 <= task_id < n_tasks, f"task_id {task_id} out of range [0, {n_tasks})"
N_DEMOS = None

# currently we assume tasks from same benchmark have the same shape_meta
task_id_dataset, shape_meta = get_dataset(
        dataset_path=os.path.join(cfg.folder, benchmark.get_task_demonstration(task_id)),
        obs_modality=cfg.data.obs.modality,
        initialize_obs_utils=True,
        seq_len=cfg.data.seq_len,
        n_demos=N_DEMOS
)
# add language to the vision dataset, hence we call vl_dataset
description = benchmark.get_task(task_id).language

task_embs = get_task_embs(cfg, [description])
benchmark.set_task_embs(task_embs)
dataset = SequenceVLDataset(task_id_dataset, task_embs[0])



using obs modality: rgb with keys: ['eye_in_hand_rgb', 'agentview_rgb']
using obs modality: depth with keys: []
using obs modality: low_dim with keys: ['joint_states', 'gripper_states']
SequenceDataset: loading dataset into memory...
100%|██████████| 50/50 [00:00<00:00, 1380.41it/s]


In [5]:
import robomimic.utils.tensor_utils as TensorUtils
import torch
import torch.nn as nn

from einops import rearrange, repeat
from libero.lifelong.algos.single_task import SingleTask
from libero.lifelong.utils import safe_device, create_experiment_dir


cfg.policy.policy_type = 'BCRNNPolicy'
cfg.lifelong.algo = 'SingleTask'
create_experiment_dir(cfg)
cfg.shape_meta = shape_meta

algo = safe_device(SingleTask(n_tasks, cfg), cfg.device)

# unused variable
import numpy as np
result_summary = {
    'L_conf_mat': np.zeros((n_tasks, n_tasks)),   # loss confusion matrix
    'S_conf_mat': np.zeros((n_tasks, n_tasks)),   # success confusion matrix
    'L_fwd'     : np.zeros((n_tasks,)),           # loss AUC, how fast the agent learns
    'S_fwd'     : np.zeros((n_tasks,)),           # success AUC, how fast the agent succeeds
}

# succ is from evaluation / rollout on the task
algo.train()
succ_fwd, loss_fwd = algo.learn_one_task(dataset, 0, benchmark, result_summary)



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,


[info] Epoch:   0 | train loss:  5.44 | time: 0.60
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used

Process Process-5:
Process Process-6:
Traceback (most recent call last):
  File "/home/andang/anaconda3/envs/libero/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/home/andang/anaconda3/envs/libero/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/andang/neurips_2025/LIBERO/libero/libero/envs/venv.py", line 231, in _worker
    env = env_fn_wrapper.data()
  File "/home/andang/neurips_2025/LIBERO/libero/lifelong/metric.py", line 93, in <lambda>
    [lambda: OffScreenRenderEnv(**env_args) for _ in range(env_num)]
  File "/home/andang/neurips_2025/LIBERO/libero/libero/envs/env_wrapper.py", line 161, in __init__
    super().__init__(**kwargs)
  File "/home/andang/neurips_2025/LIBERO/libero/libero/envs/env_wrapper.py", line 56, in __init__
    self.env = TASK_MAPPING[self.problem_name](
  File "/home/andang/neurips_2025/LIBERO/libero/libero/envs/problems/libero_floor_manipulation.

	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Process Process-8:
Traceback (most recent call last):
  File "/home/andang/anaconda3/envs/libero/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/home/andang/anaconda3/envs/libero/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/andang/neurips_2025/LIBERO/libero/libero/envs/venv.py", line 231, in _worker
    env = env_fn_wrapper.data()
  File "/home/andang/neurips_2025/LIBERO/libero/lifelong/metric.py", line 93, in <lambda>
    [lambda: OffScreenRenderEnv(**env_args) for _ in range(env_num)]
  File "/home/andang/neurips_2025/LIBERO/libero/libero/envs/env_wrapper.py", line 161, in __init__
    super().__init__(**kwargs)
  File "/home/andang/neurips_2025/LIBERO/libero/libero/envs/env_wrapper.py", line 56, in __init__
    self.env = TASK_MAPPING[self.problem_name](
  File "/home/andang/neurips_2025/LIBERO/libero/libero/envs/problems/libero_floor_manipulation.py", line 37, in __

ConnectionResetError: [Errno 104] Connection reset by peer

In [None]:
from IPython.display import HTML
from base64 import b64encode
import imageio
import cv2
from libero.libero.envs import OffScreenRenderEnv, DummyVectorEnv
from libero.lifelong.metric import raw_obs_to_tensor_obs

# You can turn on subprocess
env_num = 1
action_dim = 7

# If it's packnet, the weights need to be processed first
task = benchmark.get_task(task_id)
task_emb = benchmark.get_task_emb(task_id)
if cfg.lifelong.algo == "PackNet":
    algo = algo.get_eval_algo(task_id)

algo.eval()
env_args = {
    "bddl_file_name": os.path.join(
        cfg.bddl_folder, task.problem_folder, task.bddl_file
    ),
    "camera_heights": cfg.data.img_h,
    "camera_widths": cfg.data.img_w,
}

env = DummyVectorEnv(
            [lambda: OffScreenRenderEnv(**env_args) for _ in range(env_num)]
)

init_states_path = os.path.join(
    cfg.init_states_folder, task.problem_folder, task.init_states_file
)
init_states = torch.load(init_states_path)

env.reset()

init_state = init_states[0:1]
dones = [False]

algo.reset()

obs = env.set_init_state(init_state)

# Make sure the gripepr is open to make it consistent with the provided demos.
dummy_actions = np.zeros((env_num, action_dim))
for _ in range(5):
    obs, _, _, _ = env.step(dummy_actions)

steps = 0

obs_tensors = [[]] * env_num
while steps < cfg.eval.max_steps:
    steps += 1
    data = raw_obs_to_tensor_obs(obs, task_emb, cfg)
    action = algo.policy.get_action(data)

    obs, reward, done, info = env.step(action)

    for k in range(env_num):
        dones[k] = dones[k] or done[k]
        obs_tensors[k].append(obs[k]["agentview_image"])
    if all(dones):
        break
    
# visualize video
# obs_tensor: (env_num, T, H, W, C)

images = [img[::-1] for img in obs_tensors[0]]
fps = 30
writer  = imageio.get_writer('tmp_video.mp4', fps=fps)
for image in images:
    # resize image to 640x640
    image = cv2.resize(image, (640, 640))
    writer.append_data(image)
writer.close()

video_data = open("tmp_video.mp4", "rb").read()
video_tag = f'<video controls alt="test" src="data:video/mp4;base64,{b64encode(video_data).decode()}">'
HTML(data=video_tag)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
