In [None]:
import os

os.environ["NGC_API_KEY"] = "nvapi-****"

In [13]:
%%bash
echo "${NGC_API_KEY}" | docker login nvcr.io -u '$oauthtoken' --password-stdin

Login Succeeded


In [14]:
import os, json, subprocess, time

# -------------------------------
# 1. Setup NeMo/NIM cache
# -------------------------------
os.environ["LOCAL_NIM_CACHE"] = "/ephemeral/cache/nim"
os.makedirs(os.environ["LOCAL_NIM_CACHE"], exist_ok=True)
print(f"LOCAL_NIM_CACHE set to {os.environ['LOCAL_NIM_CACHE']}")

# -------------------------------
# 2. Setup Docker ephemeral storage
# -------------------------------
storage_path = "/ephemeral/cache/docker"
os.makedirs(storage_path, exist_ok=True)

daemon_file = "/etc/docker/daemon.json"
config = {}
try:
    config = json.load(open(daemon_file)) if os.path.exists(daemon_file) else {}
except PermissionError:
    print("Cannot read daemon.json. Run with sudo or check path.")

# Update Docker root
config["data-root"] = storage_path
config_str = json.dumps(config, indent=4)

# Write daemon.json (requires sudo)
subprocess.run(f"echo '{config_str}' | sudo tee {daemon_file} > /dev/null", shell=True, check=True)

# Restart Docker
subprocess.run("sudo systemctl restart docker", shell=True, check=True)
time.sleep(5)

# Verify new Docker root
docker_root = subprocess.run(
    "docker info | grep 'Docker Root Dir'",
    shell=True, capture_output=True, text=True
).stdout.strip()
print("Docker Root Dir:", docker_root)

# -------------------------------
# 3. Setup pip cache
# -------------------------------
pip_cache = "/ephemeral/cache/pip"
os.makedirs(pip_cache, exist_ok=True)
os.environ["PIP_CACHE_DIR"] = pip_cache
print(f"PIP_CACHE_DIR set to {pip_cache}")

# -------------------------------
# 4. Setup HuggingFace cache
# -------------------------------
hf_cache = "/ephemeral/cache/huggingface"
os.makedirs(hf_cache, exist_ok=True)
os.environ["HF_HOME"] = hf_cache
print(f"HF_HOME set to {hf_cache}")

# -------------------------------
# 5. Setup tmpdir
# -------------------------------
tmp_dir = "/ephemeral/tmp"
os.makedirs(tmp_dir, exist_ok=True)
os.environ["TMPDIR"] = tmp_dir
print(f"TMPDIR set to {tmp_dir}")

LOCAL_NIM_CACHE set to /ephemeral/cache/nim
Docker Root Dir: Docker Root Dir: /ephemeral/cache/docker
PIP_CACHE_DIR set to /ephemeral/cache/pip
HF_HOME set to /ephemeral/cache/huggingface
TMPDIR set to /ephemeral/tmp


In [17]:
!docker run --gpus all --name nemo-rl -it \
  -p 9000:9000 \
  -v "$(pwd)":/workspace \
  -w /workspace \
  -d nvcr.io/nvidia/nemo-rl:v0.4.0

338c41dd9c790a7454c565af194f9b2e765acbd8f78e6cb2ba114ee824f1556f


In [None]:
container = "nemo-rl"

!docker exec {container} bash -c "git clone https://github.com/NVIDIA-NeMo/RL.git nemo-rl --recursive"
!docker exec {container} bash -c "cd nemo-rl && git submodule update --init --recursive"

# Activate NeMo RL venv
!docker exec {container} bash -c "source /opt/nemo_rl_venv/bin/activate"

# HuggingFace login
!docker exec {container} bash -c "huggingface-cli login --token hf_****"

# WANDB API key
!docker exec {container} bash -c 'export WANDB_API_KEY="****"'

fatal: destination path 'nemo-rl' already exists and is not an empty directory.
The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `hf`CLI if you want to set the git credential as well.
Token is valid (permission: write).
The token `Anymodel` has been saved to /root/.cache/huggingface/stored_tokens
Your token has been saved to /root/.cache/huggingface/token
Login successful.
The current active token is: `Anymodel`


In [24]:
container = "nemo-rl"

!docker exec -it nemo-rl bash -c "
    source /opt/nemo_rl_venv/bin/activate && \
    uv run python nemo-rl/examples/run_dpo.py \
        cluster.gpus_per_node=1 \
        dpo.max_num_steps=10 \
        policy.model_name=meta-llama/Llama-3.2-1B-Instruct \
        policy.tokenizer.name=meta-llama/Llama-3.2-1B-Instruct
"

2025-12-09 06:55:17,299	INFO worker.py:1879 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m
INFO:nemo_rl.distributed.virtual_cluster:Started local cluster with tag 'nrl_tag_ALL': {'object_store_memory': 10000000000.0, 'accelerator_type:H100': 1.0, 'GPU': 1.0, 'memory': 178291661824.0, 'node:__internal_head__': 1.0, 'nrl_tag_ALL': 1.0, 'CPU': 28.0, 'node:172.17.0.2': 1.0}
[36m(_env_builder pid=18844)[0m INFO:nemo_rl.utils.venvs:Using existing venv at /opt/ray_venvs/nemo_rl.models.policy.dtensor_policy_worker.DTensorPolicyWorker
Loaded configuration from: /workspace/nemo-rl/examples/configs/dpo.yaml
Overrides: ['cluster.gpus_per_node=1', 'dpo.max_num_steps=10']
Applied CLI overrides
Final config:
{'checkpointing': {'checkpoint_dir': 'results/dpo',
                   'checkpoint_must_save_by': None,
                   'enabled': True,
                   'higher_is_better': False,
                   'keep_top_k': 3,
                   'met

In [26]:
container = "nemo-rl"

!docker exec {container} bash -c "source /opt/nemo_rl_venv/bin/activate && \
    uv run nemo-rl/examples/converters/convert_dcp_to_hf.py \
    --config ./results/dpo/step_10/config.yaml \
    --dcp-ckpt-path ./results/dpo/step_10/policy/weights \
    --hf-ckpt-path ./results/dpo/step_10/hf"

Saved HF checkpoint to: ./results/dpo/step_10/hf


In [27]:
%%writefile inference.py
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

hf_path = "./results/dpo/step_10/hf/"

tokenizer = AutoTokenizer.from_pretrained(hf_path)
model = AutoModelForCausalLM.from_pretrained(hf_path, torch_dtype=torch.bfloat16)
model.eval()

prompt = "Say hello in a friendly way."
inputs = tokenizer(prompt, return_tensors="pt")
out = model.generate(**inputs, max_new_tokens=50)
print(tokenizer.decode(out[0], skip_special_tokens=True))


Writing inference.py


In [28]:
container = "nemo-rl"
!docker exec {container} bash -c "source /opt/nemo_rl_venv/bin/activate && python inference.py"

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Say hello in a friendly way. I'm excited to meet you!

How's your day going so far? Anything exciting on the horizon?


In [29]:
%%writefile convert.py
from transformers import AutoModelForCausalLM, AutoTokenizer

src = "./results/dpo/step_10/hf"
dst = "./results/dpo/step_10/hf_st"

model = AutoModelForCausalLM.from_pretrained(src)
model.save_pretrained(dst, safe_serialization=True)

tok = AutoTokenizer.from_pretrained(src)
tok.save_pretrained(dst)

print("Saved to:", dst)


Writing convert.py


In [30]:
container = "nemo-rl"
!docker exec {container} bash -c "source /opt/nemo_rl_venv/bin/activate && python convert.py"

Saved to: ./results/dpo/step_10/hf_st


In [35]:
# ===============================
#   MultiLLM-NIM Container Launcher
#   (Detached mode)
# ===============================

# Choose container name
CONTAINER_NAME = "MultiLLM-NIM"

# NGC Multi-LLM NIM repo
Repository = "nim/nvidia/llm-nim"
TAG = "latest"
IMG_NAME = f"nvcr.io/{Repository}:{TAG}"

# Path to your local HF DPO model
LOCAL_MODEL_DIR = "./results/dpo/step_10/hf_st"

# Name to expose the served model
NIM_SERVED_MODEL_NAME = "dpo-llm"

# Local NIM cache (you chose ephemeral)
LOCAL_NIM_CACHE = "/ephemeral/cache/nim"

# Create cache directory
!mkdir -p "{LOCAL_NIM_CACHE}"
!chmod -R a+w "{LOCAL_NIM_CACHE}"

print("Starting MultiLLM-NIM container in detached mode...")
print("Container:", CONTAINER_NAME)
print("Image:", IMG_NAME)
print("Model Path:", LOCAL_MODEL_DIR)
print("NIM Cache:", LOCAL_NIM_CACHE)

Starting MultiLLM-NIM container in detached mode...
Container: MultiLLM-NIM
Image: nvcr.io/nim/nvidia/llm-nim:latest
Model Path: ./results/dpo/step_10/hf_st
NIM Cache: /ephemeral/cache/nim


In [36]:
# -------------------------------
# Run the container DETACHED
# -------------------------------
!docker run -d --rm --name={CONTAINER_NAME} \
  --runtime=nvidia \
  --gpus all \
  --shm-size=16GB \
  -e NIM_MODEL_PROFILE="e2f00b2cbfb168f907c8d6d4d40406f7261111fbab8b3417a485dcd19d10cc98" \
  -e NIM_MODEL_NAME="/opt/models/local_model" \
  -e NIM_SERVED_MODEL_NAME={NIM_SERVED_MODEL_NAME} \
  -v "{LOCAL_MODEL_DIR}:/opt/models/local_model" \
  -v "{LOCAL_NIM_CACHE}:/opt/nim/.cache" \
  -u $(id -u) \
  -p 8000:8000 \
  {IMG_NAME}

331249d51d0669e450ad8b18642b7f73d190b5abf5164bc669f67cbff49ba96c


In [37]:
import requests

url = 'http://localhost:8000/v1/health/ready' #make sure the LLM NIM port is correct
headers = {'accept': 'application/json'}

print("Checking MultiLLM NIM readiness...")
while True:
    try:
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            data = response.json()
            if data.get("message") == "Service is ready.":
                print("LLM NIM is ready.")
                break
            else:
                print("LLM NIM is not ready. Waiting for 30 seconds...")
        else:
            print(f"Unexpected status code {response.status_code}. Waiting for 30 seconds...")
    except requests.ConnectionError:
        print("LLM NIM is not ready. Waiting for 30 seconds...")
    time.sleep(30)

Checking MultiLLM NIM readiness...
LLM NIM is not ready. Waiting for 30 seconds...
LLM NIM is not ready. Waiting for 30 seconds...
LLM NIM is not ready. Waiting for 30 seconds...
LLM NIM is not ready. Waiting for 30 seconds...
LLM NIM is not ready. Waiting for 30 seconds...
LLM NIM is not ready. Waiting for 30 seconds...
LLM NIM is not ready. Waiting for 30 seconds...
LLM NIM is ready.


In [38]:
!curl -X POST 'http://localhost:8000/v1/completions' \
  -H 'accept: application/json' \
  -H 'Content-Type: application/json' \
  -d '{"model": "dpo-llm", "prompt": "Once upon a time", "max_tokens": 64}'

{"id":"cmpl-82d8eaf975ec4316bef8930c4ab3c276","object":"text_completion","created":1765264365,"model":"dpo-llm","choices":[{"index":0,"text":", in a small village nestled in the rolling hills of rural Spain, there lived a young and adventurous girl named Sofia. Sofia had always been fascinated by the old, mysterious forest that stood at the edge of the village. She loved listening to the whispers of the ancient trees and imagining the secrets they might hold.\n\nAs she","logprobs":null,"finish_reason":"length","stop_reason":null,"token_ids":null,"prompt_logprobs":null,"prompt_token_ids":null}],"service_tier":null,"system_fingerprint":null,"usage":{"prompt_tokens":5,"total_tokens":69,"completion_tokens":64,"prompt_tokens_details":null},"kv_transfer_params":null}

In [None]:
!docker stop {CONTAINER_NAME}