# Neural Robot Dynamics Training on Colab

This notebook demonstrates how to setup the environment, generate a dataset, and train the NeRD model.

In [1]:
# 1. Setup Environment
!git clone https://github.com/dhruv0000/neural-robot-dynamics.git
%cd neural-robot-dynamics
!pip install -r requirements.txt
!pip install warp-lang==1.8.0
!pip install rl_games
!pip install wandb


Cloning into 'neural-robot-dynamics'...
remote: Enumerating objects: 636, done.[K
remote: Counting objects: 100% (202/202), done.[K
remote: Compressing objects: 100% (134/134), done.[K
remote: Total 636 (delta 109), reused 134 (delta 66), pack-reused 434 (from 1)[K
Receiving objects: 100% (636/636), 21.46 MiB | 12.37 MiB/s, done.
Resolving deltas: 100% (196/196), done.
Filtering content: 100% (11/11), 202.03 MiB | 56.06 MiB/s, done.
/content/neural-robot-dynamics
Collecting pyglet==2.1.6 (from -r requirements.txt (line 2))
  Downloading pyglet-2.1.6-py3-none-any.whl.metadata (7.7 kB)
Collecting ipdb (from -r requirements.txt (line 3))
  Downloading ipdb-0.13.13-py3-none-any.whl.metadata (14 kB)
Collecting h5py==3.11.0 (from -r requirements.txt (line 4))
  Downloading h5py-3.11.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.5 kB)
Collecting pyyaml==6.0.2 (from -r requirements.txt (line 5))
  Downloading PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylin

In [None]:
# Setup WandB
import os
import wandb
# Assuming wandb_key is defined in the environment variables or you can set it here
# For Colab, we can try to get it from userdata or assume it's set
try:
    from google.colab import userdata
    os.environ['WANDB_API_KEY'] = userdata.get('wandb_key')
except:
    os.environ['WANDB_API_KEY'] = 'eb2afd65565d8bc1bb3010bcb082ec1e48de6860'  # Replace with your actual key if not using Colab
    pass

wandb_project = 'neural-robot-dynamics-big'


In [3]:
# 2. Generate Dataset
# We generate a smaller dataset for demonstration purposes.
import os
import shutil
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

%cd generate

# Define paths
drive_data_dir = '/content/drive/MyDrive/neural-robot-dynamics/data/datasets/Cartpole/'
local_data_dir = '../data/datasets/Cartpole/'
train_filename = 'trajectory_len-100_train.hdf5'
valid_filename = 'trajectory_len-100_valid.hdf5'

os.makedirs(local_data_dir, exist_ok=True)
os.makedirs(drive_data_dir, exist_ok=True)

# Check if data exists in Drive
if os.path.exists(os.path.join(drive_data_dir, train_filename)) and os.path.exists(os.path.join(drive_data_dir, valid_filename)):
    print("Loading datasets from Google Drive...")
    shutil.copy(os.path.join(drive_data_dir, train_filename), local_data_dir)
    shutil.copy(os.path.join(drive_data_dir, valid_filename), local_data_dir)
else:
    print("Generating datasets...")
    # Generate Training Data
    !python generate_dataset_contact_free.py --env-name Cartpole --num-transitions 1000000 --dataset-dir ../data/datasets/ --dataset-name trajectory_len-100_train.hdf5 --trajectory-length 100 --num-envs 2048 --seed 0

    # Generate Validation Data
    !python generate_dataset_contact_free.py --env-name Cartpole --num-transitions 100000 --dataset-dir ../data/datasets/ --dataset-name trajectory_len-100_valid.hdf5 --trajectory-length 100 --num-envs 2048 --seed 10

    print("Saving datasets to Google Drive...")
    shutil.copy(os.path.join(local_data_dir, train_filename), drive_data_dir)
    shutil.copy(os.path.join(local_data_dir, valid_filename), drive_data_dir)

%cd ..

Mounted at /content/drive
/content/neural-robot-dynamics/generate
Loading datasets from Google Drive...
/content/neural-robot-dynamics


In [None]:
# 3. Train Baseline Model (Transformer)
%cd train

import yaml
import os

# Load default config
with open('cfg/Cartpole/transformer.yaml', 'r') as f:
    cfg = yaml.safe_load(f)

# Override dataset paths to point to the generated data
cfg['algorithm']['dataset']['train_dataset_path'] = '../data/datasets/Cartpole/trajectory_len-100_train.hdf5'
cfg['algorithm']['dataset']['valid_datasets']['exp_trajectory'] = '../data/datasets/Cartpole/trajectory_len-100_valid.hdf5'

# Reduce training parameters for quick demonstration
cfg['algorithm']['num_epochs'] = 100
cfg['algorithm']['num_iters_per_epoch'] = 100
cfg['algorithm']['batch_size'] = 1024
cfg['algorithm']['dataset']['num_data_workers'] = 8

# Save the modified config
with open('colab_config.yaml', 'w') as f:
    yaml.dump(cfg, f)

# Run training
!python train.py --cfg colab_config.yaml --logdir ../data/logs/baseline --wandb-project {wandb_project} --wandb-name baseline

/content/neural-robot-dynamics/train
Warp 1.8.0 initialized:
   CUDA Toolkit 12.8, Driver 12.4
   Devices:
     "cpu"      : "x86_64"
     "cuda:0"   : "Tesla T4" (15 GiB, sm_75, mempool enabled)
   Kernel cache:
     /root/.cache/warp/1.8.0
2025-12-07 15:42:17.365836: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1765122137.386063    1980 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1765122137.392335    1980 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1765122137.407755    1980 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:17651221

In [None]:
# 4a. Train Mamba-6 Model (6 layers)
# This is the full 6-layer Mamba model
!python train.py --cfg colab_config.yaml --novelty mamba-6 --logdir ../data/logs/mamba_6 --wandb-project {wandb_project} --wandb-name mamba-6

In [None]:
# 4b. Train Mamba-3 Model (3 layers)
# Reduced parameter count version for comparison
!python train.py --cfg colab_config.yaml --novelty mamba-3 --logdir ../data/logs/mamba_3 --wandb-project {wandb_project} --wandb-name mamba-3

In [None]:
# 5. Train Unroll Model
# We use the same config but add the --novelty unroll flag
# !python train.py --cfg colab_config.yaml --novelty unroll --logdir ../data/logs/unroll --wandb-project {wandb_project} --wandb-name unroll

Warp 1.8.0 initialized:
   CUDA Toolkit 12.8, Driver 12.4
   Devices:
     "cpu"      : "x86_64"
     "cuda:0"   : "Tesla T4" (15 GiB, sm_75, mempool enabled)
   Kernel cache:
     /root/.cache/warp/1.8.0
2025-12-07 15:51:23.530696: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1765122683.558173    5709 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1765122683.564220    5709 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1765122683.579837    5709 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1765122683.579865    5709 computation_placer.

In [6]:
# 5. Train Jamba Model
# We use the same config but add the --novelty jamba flag
!python train.py --cfg colab_config.yaml --novelty jamba --logdir ../data/logs/jamba --wandb-project {wandb_project} --wandb-name jamba

Warp 1.8.0 initialized:
   CUDA Toolkit 12.8, Driver 12.4
   Devices:
     "cpu"      : "x86_64"
     "cuda:0"   : "Tesla T4" (15 GiB, sm_75, mempool enabled)
   Kernel cache:
     /root/.cache/warp/1.8.0
2025-12-07 15:48:23.154626: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1765122503.173932    4486 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1765122503.179931    4486 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1765122503.195009    4486 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1765122503.195040    4486 computation_placer.

In [8]:
# 6. Save Models to Google Drive
import os
import shutil

models = ['baseline', 'mamba_6', 'mamba_3', 'unroll', 'jamba']
drive_base_dir = '/content/drive/MyDrive/neural-robot-dynamics/data/logs'
local_base_dir = '../data/logs'

for model in models:
    local_dir = os.path.join(local_base_dir, model)
    drive_dir = os.path.join(drive_base_dir, model)

    if os.path.exists(local_dir):
        print(f"Saving {model} model to Google Drive...")
        if os.path.exists(drive_dir):
            shutil.rmtree(drive_dir)
        shutil.copytree(local_dir, drive_dir)
    else:
        print(f"Local log directory for {model} not found. Skipping save.")


Saving baseline model to Google Drive...
Saving mamba model to Google Drive...
Saving unroll model to Google Drive...
Saving jamba model to Google Drive...


In [9]:
# 7. Load Models from Google Drive (Optional)
# Run this cell if you want to load pre-trained models from Drive instead of training them.
import os
import shutil

models = ['baseline', 'mamba_6', 'mamba_3', 'unroll', 'jamba']
drive_base_dir = '/content/drive/MyDrive/neural-robot-dynamics/data/logs'
local_base_dir = '../data/logs'

for model in models:
    local_dir = os.path.join(local_base_dir, model)
    drive_dir = os.path.join(drive_base_dir, model)

    if os.path.exists(drive_dir):
        print(f"Loading {model} model from Google Drive...")
        if os.path.exists(local_dir):
            shutil.rmtree(local_dir)
        shutil.copytree(drive_dir, local_dir)
    else:
        print(f"Drive log directory for {model} not found. Skipping load.")


Loading baseline model from Google Drive...
Loading mamba model from Google Drive...
Loading unroll model from Google Drive...
Loading jamba model from Google Drive...


# 7. Quantitative Analysis

We now perform the quantitative analysis as described in the paper experiments.
We evaluate:
1. **Long-Horizon Passive Motion**: Accuracy of the trained NeRD models over 100, 500, and 1000 steps.
2. **RL Policy Evaluation**: Performance of the pretrained RL policy using the NeRD models.

In [None]:
# 7.1 Long-Horizon Passive Motion Evaluation
# We evaluate the Baseline, Mamba, and Unroll models on Cartpole for 100, 500, and 1000 steps.

import os
import glob

def find_latest_model(model_type):
    base_log_dir = f'../data/logs/{model_type}'
    if not os.path.exists(base_log_dir):
        return None
    dirs = [d for d in glob.glob(os.path.join(base_log_dir, '*')) if os.path.isdir(d)]
    if not dirs:
        return None
    latest_dir = sorted(dirs)[-1]
    model_path = os.path.join(latest_dir, 'nn', 'best_eval_model.pt')
    if not os.path.exists(model_path):
        return None
    return model_path

models = ['baseline', 'mamba_6', 'mamba_3', 'unroll', 'jamba']
horizons = [100, 500, 1000]

for model_name in models:
    model_path = find_latest_model(model_name)
    if not model_path:
        print(f"Skipping {model_name} (model not found)")
        continue

    print(f"\n{'='*20} Evaluating {model_name.capitalize()} Model {'='*20}")
    for horizon in horizons:
        print(f"\n--- Horizon: {horizon} ---")
        # We use !python to ensure output is printed to the cell
        !python ../eval/eval_passive/eval_passive_motion.py \
            --env-name Cartpole \
            --model-path {model_path} \
            --env-mode neural \
            --num-envs 2048 \
            --num-rollouts 2048 \
            --rollout-horizon {horizon} \
            --seed 500 \
            --wandb-project {wandb_project} \
            --wandb-name {model_name}_passive_eval_{horizon}




--- Horizon: 100 ---
Warp 1.8.0 initialized:
   CUDA Toolkit 12.8, Driver 12.4
   Devices:
     "cpu"      : "x86_64"
     "cuda:0"   : "Tesla T4" (15 GiB, sm_75, mempool enabled)
   Kernel cache:
     /root/.cache/warp/1.8.0
Number of Model Parameters:  2713668
[96m [NeuralEnvironment] Creating abstract contact environment: Cartpole. [0m
Creating 2048 environments: 100% 2048/2048 [00:06<00:00, 312.57it/s]
Module warp.sim.integrator_featherstone 18b3327 load on device 'cuda:0' took 4.50 ms  (cached)
Module envs.abstract_contact_environment 8e8d790 load on device 'cuda:0' took 0.44 ms  (cached)
Module integrators.integrator_neural ee402cd load on device 'cuda:0' took 0.47 ms  (cached)
[96m [NeuralEnvironment] Created a Neural Integrator. [0m
Sampling state transitions:   0% 0/1 [00:00<?, ?it/s]Module warp.sim.articulation 770a52a load on device 'cuda:0' took 1.36 ms  (cached)
Module envs.warp_sim_envs.env_cartpole 01fd57b load on device 'cuda:0' took 0.33 ms  (cached)
Module utils

In [11]:
# 7.2 RL Policy Evaluation (Quantitative)
# We evaluate the policy using the trained NeRD models and compare with Ground Truth.
# We run for more games (2048) to get a statistically significant result as in the paper.

import pandas as pd
import re
import subprocess
import os

def run_eval(model_path=None, env_mode='neural', label='Model'):
    print(f"\n{'='*20} RL Evaluation: {label} {'='*20}")

    # Use absolute paths
    abs_playback_path = os.path.abspath('../pretrained_models/RL_policies/Cartpole/0/nn/CartpolePPO.pth')
    abs_rl_cfg_path = os.path.abspath('../eval/eval_rl/cfg/Cartpole/cartpole.yaml')

    cmd = [
        'python', 'run_rl.py',
        '--rl-cfg', abs_rl_cfg_path,
        '--playback', abs_playback_path,
        '--num-envs', '2048',
        '--num-games', '2048',
        '--env-mode', env_mode,
        '--wandb-project', wandb_project,
        '--wandb-name', f'{model_name}_rl_eval'
    ]

    if model_path:
        abs_model_path = os.path.abspath(model_path)
        cmd.extend([
            '--nerd-model-path', abs_model_path
        ])

    try:
        result = subprocess.run(cmd, cwd='../eval/eval_rl', check=True, capture_output=True, text=True)
        output = result.stdout
        print(output[-500:]) # Print last 500 chars to see result

        # Parse reward
        # Look for 'av reward: <value> av steps: <value>'
        match = re.search(r'av reward:\s*([-\d\.]+)', output)
        if match:
            reward = float(match.group(1))
            return reward
        else:
            print("Could not parse reward from output.")
            return None
    except subprocess.CalledProcessError as e:
        print(f'Error running RL evaluation for {label}:')
        print('STDOUT:', e.stdout)
        print('STDERR:', e.stderr)
        return None

results = []

# 1. Evaluate Ground Truth
gt_reward = run_eval(env_mode='ground-truth', label='Ground Truth')
if gt_reward is not None:
    results.append({'Model': 'Ground Truth', 'Reward': gt_reward, 'Error (%)': 0.0})

# 2. Evaluate NeRD Models
for model_name in models:
    model_path = find_latest_model(model_name)
    if not model_path:
        continue

    reward = run_eval(model_path=model_path, env_mode='neural', label=model_name.capitalize())

    if reward is not None and gt_reward is not None:
        error = (reward - gt_reward) / gt_reward * 100
        results.append({'Model': model_name.capitalize(), 'Reward': reward, 'Error (%)': error})
    elif reward is not None:
        results.append({'Model': model_name.capitalize(), 'Reward': reward, 'Error (%)': float('nan')})

# 3. Create Table
df = pd.DataFrame(results)
print("\nFinal Evaluation Comparison:")
display(df)


teps: 56.0
reward: 98.16730499267578 steps: 61.0
reward: 57.46388244628906 steps: 62.0
reward: -94.28440856933594 steps: 94.0
reward: -85.1441421508789 steps: 100.0
reward: 1225.2821979114867 steps: 300.0
2461286.3829221725
av reward: 1201.799991661217 av steps: 295.4072265625
visited states range:
State 0: [-4.084733486175537, 4.017707347869873]
State 1: [-3.1415224075317383, 3.1415481567382812]
State 2: [-10.378302574157715, 8.506925582885742]
State 3: [-10.652484893798828, 9.791386604309082]


ps: 293.0
reward: 1224.8743489583333 steps: 296.0
reward: 1259.690673828125 steps: 297.0
reward: 1267.3876953125 steps: 298.0
reward: 1273.3040771484375 steps: 299.0
reward: 1090.1022199277234 steps: 300.0
2155489.165640831
av reward: 1050.9454732524773 av steps: 290.6616284739152
visited states range:
State 0: [-4.109048366546631, 3.390089750289917]
State 1: [-3.141591787338257, 3.141425609588623]
State 2: [-10.443437576293945, 8.636869430541992]
State 3: [-8.79551887512207, 9.13463497161865

Unnamed: 0,Model,Reward,Error (%)
0,Ground Truth,1201.799992,0.0
1,Baseline,1050.945473,-12.552381
2,Mamba,1090.842595,-9.232601
3,Unroll,-191.580636,-115.941141
4,Jamba,1107.342391,-7.859677


# 7.3 Inference Throughput (FPS) Evaluation

We measure the inference throughput (FPS) of the different models. This metric measures the raw speed of the simulation, expressed in Frames Per Second (FPS).
We measure the wall-clock time required to roll out a large batch of parallel environments (2048 robots) for a fixed number of steps.

In [None]:
import pandas as pd
import re
import subprocess
import os

def run_fps_eval(model_path=None, env_mode='neural', label='Model'):
    print(f"\n{'='*20} FPS Evaluation: {label} {'='*20}")

    cmd = [
        'python', 'eval_fps.py',
        '--env-name', 'Cartpole',
        '--num-envs', '2048',
        '--rollout-horizon', '100',
        '--env-mode', env_mode
    ]

    if model_path:
        abs_model_path = os.path.abspath(model_path)
        cmd.extend(['--model-path', abs_model_path])

    try:
        result = subprocess.run(cmd, cwd='../eval/eval_fps', check=True, capture_output=True, text=True)
        output = result.stdout
        print(output)

        # Parse FPS
        match = re.search(r'FPS:\s*([-\d\.]+)', output)
        if match:
            fps = float(match.group(1))
            return fps
        else:
            print("Could not parse FPS from output.")
            return None
    except subprocess.CalledProcessError as e:
        print(f'Error running FPS evaluation for {label}:')
        print('STDOUT:', e.stdout)
        print('STDERR:', e.stderr)
        return None

fps_results = []

# 1. Evaluate Ground Truth (Analytical Simulator)
gt_fps = run_fps_eval(env_mode='ground-truth', label='Analytical (Warp)')
if gt_fps is not None:
    fps_results.append({'Model': 'Analytical (Warp)', 'FPS': gt_fps})

# 2. Evaluate NeRD Models
for model_name in models:
    model_path = find_latest_model(model_name)
    if not model_path:
        continue

    fps = run_fps_eval(model_path=model_path, env_mode='neural', label=model_name.capitalize())

    if fps is not None:
        fps_results.append({'Model': model_name.capitalize(), 'FPS': fps})

# 3. Create Table
df_fps = pd.DataFrame(fps_results)
print("\nInference Throughput Comparison:")
display(df_fps)


Warp 1.8.0 initialized:
   CUDA Toolkit 12.8, Driver 12.4
   Devices:
     "cpu"      : "x86_64"
     "cuda:0"   : "Tesla T4" (15 GiB, sm_75, mempool enabled)
   Kernel cache:
     /root/.cache/warp/1.8.0
[96m [NeuralEnvironment] Creating abstract contact environment: Cartpole. [0m
Module warp.sim.integrator_featherstone 18b3327 load on device 'cuda:0' took 5.37 ms  (cached)
Module envs.abstract_contact_environment 8e8d790 load on device 'cuda:0' took 0.42 ms  (cached)
Module integrators.integrator_neural ee402cd load on device 'cuda:0' took 0.51 ms  (cached)
[91m [NeuralEnvironment] Created a DUMMY Neural Integrator. [0m
Module warp.sim.articulation 770a52a load on device 'cuda:0' took 1.34 ms  (cached)
Module envs.warp_sim_envs.env_cartpole 01fd57b load on device 'cuda:0' took 0.39 ms  (cached)
Module utils.warp_utils 294c46a load on device 'cuda:0' took 0.34 ms  (cached)
Module envs.warp_sim_envs.utils d93eb17 load on device 'cuda:0' took 0.81 ms  (cached)
time(collision_detect

Unnamed: 0,Model,FPS
0,Analytical (Warp),723222.836166
1,Baseline,32772.515622
2,Mamba,8146.571066
3,Unroll,31465.761774
4,Jamba,10826.766537
