intel · xiangyuT · Oct 17, 2025 · Oct 15, 2025 · Oct 16, 2025
diff --git a/omni/README.md b/omni/README.md
@@ -13,7 +13,12 @@
 
 ## Getting Started with Omni Docker Image
 
-Build docker image:
+Pull docker image from dockerhub:
+```bash
+docker pull intel/llm-scaler-omni:0.1-b2
+```
+
+Or build docker image:
 
 ```bash
 bash build.sh
@@ -22,7 +27,7 @@ bash build.sh
 Run docker image:
 
 ```bash
-export DOCKER_IMAGE=intel/llm-scaler-omni:0.1-b1
+export DOCKER_IMAGE=intel/llm-scaler-omni:0.1-b2
 export CONTAINER_NAME=comfyui
 export MODEL_DIR=<your_model_dir>
 export COMFYUI_MODEL_DIR=<your_comfyui_model_dir>
@@ -45,24 +50,30 @@ docker exec -it comfyui bash
 ```bash
 cd /llm/ComfyUI
 
-MODEL_PATH=<your_comfyui_models_path>
-rm -rf /llm/ComfyUI/models
-ln -s $MODEL_PATH /llm/ComfyUI/models
-echo "Symbolic link created from $MODEL_PATH to /llm/ComfyUI/models"
-
 export http_proxy=<your_proxy>
 export https_proxy=<your_proxy>
 export no_proxy=localhost,127.0.0.1
 
 python3 main.py
 ```
 
-Then you can access the webUI at `http://<your_local_ip>:8188/`. On the left side, 
+Then you can access the webUI at `http://<your_local_ip>:8188/`. 
+
+### (Optional) Preview settings for ComfyUI
+
+Click the button on the top-right corner to launch ComfyUI Manager. 
+![comfyui_manager_logo](./assets/comfyui_manager_logo.png)
+
+Modify the `Preview method` to show the preview image during sampling iterations.
+
+![comfyui_manager_preview](./assets/comfyui_manager_preview.png)
 
-![workflow image](./assets/confyui_workflow.png)
 
 ### ComfyUI workflows
 
+On the left side of the web UI, you can find the workflows logo.
+![workflow image](./assets/confyui_workflow.png)
+
 Currently, the following workflows are supported on B60:
 - Qwen-Image (refer to https://raw.githubusercontent.com/Comfy-Org/example_workflows/main/image/qwen/image_qwen_image_distill.json)
 - Qwen-Image-Edit (refer to https://raw.githubusercontent.com/Comfy-Org/workflow_templates/refs/heads/main/templates/image_qwen_image_edit.json)
@@ -109,7 +120,6 @@ Set the `GPU` and `ulysses_degree` in `Ray Init Actor` node to GPU nums you want
 ## XInference
 
 ```bash
-export ZE_AFFINITY_MASK=0 # In multi XPU environment, clearly select GPU index to avoid issues.
 xinference-local --host 0.0.0.0 --port 9997
 ```
 Supported models:
@@ -139,10 +149,9 @@ Supported models:
 You can select model and launch service via WebUI (refer to [here](#1-access-xinference-web-ui)) or by command:
 
 ```bash
-export ZE_AFFINITY_MASK=0 # In multi XPU environment, clearly select GPU index to avoid issues.
 xinference-local --host 0.0.0.0 --port 9997
 
-xinference launch --model-name sd3.5-medium --model-type image --model-path /llm/models/stable-diffusion-3.5-medium/
+xinference launch --model-name sd3.5-medium --model-type image --model-path /llm/models/stable-diffusion-3.5-medium/ --gpu-idx 0
 ```
 
 #### 2. Post request in OpenAI API format

diff --git a/omni/assets/comfyui_manager_logo.png b/omni/assets/comfyui_manager_logo.png
diff --git a/omni/assets/comfyui_manager_preview.png b/omni/assets/comfyui_manager_preview.png
diff --git a/omni/build.sh b/omni/build.sh
@@ -3,4 +3,4 @@ set -x
 export HTTP_PROXY=<your_http_proxy>
 export HTTPS_PROXY=<your_https_proxy>
 
-docker build -f ./docker/Dockerfile . -t intel/llm-scaler-omni:0.1-b1 --build-arg https_proxy=$HTTPS_PROXY --build-arg http_proxy=$HTTP_PROXY
+docker build -f ./docker/Dockerfile . -t intel/llm-scaler-omni:0.1-b2 --build-arg https_proxy=$HTTPS_PROXY --build-arg http_proxy=$HTTP_PROXY
diff --git a/omni/docker/Dockerfile b/omni/docker/Dockerfile
@@ -11,6 +11,10 @@ ENV LD_LIBRARY_PATH="/usr/local/lib:/usr/local/lib/python3.10/dist-packages/torc
 COPY ./patches/yunchang_for_multi_arc.patch /tmp/
 COPY ./patches/xdit_for_multi_arc.patch /tmp/
 COPY ./patches/raylight_for_multi_arc.patch /tmp/
+COPY ./patches/xinference_device_utils.patch /tmp/
+COPY ./patches/comfyui_for_multi_arc.patch /tmp/
+COPY ./patches/comfyui_voxcpm_for_xpu.patch /tmp/
+
 
 # Add Intel oneAPI repo and PPA for GPU support
 RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
@@ -50,7 +54,8 @@ RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRO
     cd /llm && \
     git clone https://github.com/comfyanonymous/ComfyUI.git && \
     cd ComfyUI && \
-    git checkout 72212fef660bcd7d9702fa52011d089c027a64d8 && \
+    git checkout 51696e3fdcdfad657cb15854345fbcbbe70eef8d && \
+    git apply /tmp/comfyui_for_multi_arc.patch && \
     pip install -r requirements.txt && \
     cd custom_nodes && \
     git clone https://github.com/ltdrdata/ComfyUI-Manager.git comfyui-manager && \
@@ -60,19 +65,32 @@ RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRO
     cd .. && \
     git clone https://github.com/komikndr/raylight.git && \
     cd raylight && \
-    git checkout 290c934cdd498b003fbf083e74e91ffc8edb961a && \
+    git checkout ff8e90ba1f2c2d23e3ac23746910ddfb523fc8f1 && \
     git apply /tmp/raylight_for_multi_arc.patch && \
     pip install -r requirements.txt && \
     cd .. && \
     git clone https://github.com/yolain/ComfyUI-Easy-Use.git comfyui-easy-use && \
     cd comfyui-easy-use && \
     pip install -r requirements.txt && \
+    cd .. && \
+    git clone https://github.com/Fannovel16/comfyui_controlnet_aux.git && \
+    cd comfyui_controlnet_aux && \
+    apt install libcairo2-dev pkg-config python3-dev -y && \
+    pip install -r requirements.txt && \
+    cd .. && \
+    git clone https://github.com/wildminder/ComfyUI-VoxCPM.git comfyui-voxcpm && \
+    cd comfyui-voxcpm && \
+    git checkout 044dd93c0effc9090fb279117de5db4cd90242a0 && \
+    git apply /tmp/comfyui_voxcpm_for_xpu.patch && \
+    pip install -r requirements.txt && \
 # Install Xinference
     pip install "xinference[transformers]" && \
+    patch /usr/local/lib/python3.10/dist-packages/xinference/device_utils.py < /tmp/xinference_device_utils.patch && \
     pip install kokoro Jinja2==3.1.6 jieba ordered-set pypinyin cn2an pypinyin-dict && \
     # Clean
     rm -rf /tmp/*
 
 COPY ./workflows/* /llm/ComfyUI/user/default/workflows/
+COPY ./example_inputs/* /llm/ComfyUI/input/
 
 WORKDIR /llm/ComfyUI
diff --git a/omni/example_inputs/qwen_image_edit_input.png b/omni/example_inputs/qwen_image_edit_input.png
diff --git a/omni/example_inputs/voxcpm_example.wav b/omni/example_inputs/voxcpm_example.wav
diff --git a/omni/example_inputs/wan2.2_animate_basic_input.jpeg b/omni/example_inputs/wan2.2_animate_basic_input.jpeg
diff --git a/omni/example_inputs/wan2.2_animate_basic_input.mp4 b/omni/example_inputs/wan2.2_animate_basic_input.mp4
diff --git a/omni/patches/comfyui_for_multi_arc.patch b/omni/patches/comfyui_for_multi_arc.patch
@@ -0,0 +1,112 @@
+diff --git a/comfy/model_management.py b/comfy/model_management.py
+index 709ebc40..c43e8eab 100644
+--- a/comfy/model_management.py
++++ b/comfy/model_management.py
+@@ -148,6 +148,90 @@ def is_intel_xpu():
+             return True
+     return False
+
++import os
++if is_intel_xpu() and os.environ.get("_LLM_SCALER_DISABLE_INTERPOLATE_FIX") != "1":
++    import torch
++    import torch.nn.functional as F
++    import functools  # Used to preserve function metadata like docstrings
++
++    # Global variables to store the original function and patch status
++    _original_interpolate_func = None
++    _is_interpolate_patched = False
++
++
++    def patch_xpu_interpolate_to_cpu():
++        """
++        patches torch.nn.functional.interpolate. If an input tensor is on an XPU device,
++        it will be moved to CPU for interpolation, and the result will be moved back
++        to the original XPU device.
++        """
++        global _original_interpolate_func, _is_interpolate_patched
++
++        if _is_interpolate_patched:
++            print("torch.nn.functional.interpolate is already patched for XPU. Skipping.")
++            return
++
++        # Store the original function
++        _original_interpolate_func = F.interpolate
++
++        @functools.wraps(_original_interpolate_func)
++        def _custom_interpolate(input_tensor, *args, **kwargs):
++            """
++            Custom wrapper for interpolate. Moves XPU tensors to CPU for computation.
++            """
++
++            if input_tensor.device.type == "xpu":
++                # print(
++                #     f"Intercepted interpolate call for XPU tensor at device {input_tensor.device}. Moving to CPU for computation."
++                # )
++                original_device = input_tensor.device
++
++                # Move input to CPU
++                input_on_cpu = input_tensor.to("cpu")
++
++                # Call the original interpolate function on CPU
++                result_on_cpu = _original_interpolate_func(input_on_cpu, *args, **kwargs)
++
++                # Move the result back to the original XPU device
++                result_on_xpu = result_on_cpu.to(original_device)
++                # print(
++                #     f"Interpolation completed on CPU, result moved back to {original_device}."
++                # )
++                return result_on_xpu
++            else:
++                # If not an XPU tensor, just call the original function directly
++                return _original_interpolate_func(input_tensor, *args, **kwargs)
++
++        # Replace the original function with our custom one
++        F.interpolate = _custom_interpolate
++        _is_interpolate_patched = True
++        print(
++            "Successfully patched torch.nn.functional.interpolate to handle XPU tensors on CPU."
++        )
++
++
++    def unpatch_xpu_interpolate_to_cpu():
++        """
++        Restores the original torch.nn.functional.interpolate function if it was patched.
++        """
++        global _original_interpolate_func, _is_interpolate_patched
++
++        if not _is_interpolate_patched:
++            print(
++                "torch.nn.functional.interpolate is not currently patched. Skipping unpatch."
++            )
++            return
++
++        if _original_interpolate_func is not None:
++            F.interpolate = _original_interpolate_func
++            _original_interpolate_func = None
++            _is_interpolate_patched = False
++            print("Successfully unpatched torch.nn.functional.interpolate.")
++        else:
++            print("Error: Could not unpatch. Original function reference missing.")
++
++
++    patch_xpu_interpolate_to_cpu()
+ def is_ascend_npu():
+     global npu_available
+     if npu_available:
+@@ -720,7 +804,6 @@ def cleanup_models_gc():
+                 logging.warning("WARNING, memory leak with model {}. Please make sure it is not being referenced from somewhere.".format(cur.real_model().__class__.__name__))
+
+
+-
+ def cleanup_models():
+     to_delete = []
+     for i in range(len(current_loaded_models)):
+@@ -1399,7 +1482,7 @@ def unload_all_models():
+     free_memory(1e30, get_torch_device())
+
+
+-#TODO: might be cleaner to put this somewhere else
++# TODO: might be cleaner to put this somewhere else
+ import threading
+
+ class InterruptProcessingException(Exception):
diff --git a/omni/patches/comfyui_voxcpm_for_xpu.patch b/omni/patches/comfyui_voxcpm_for_xpu.patch
@@ -0,0 +1,93 @@
+diff --git a/src/voxcpm/model/utils.py b/src/voxcpm/model/utils.py
+index f92efaa..8663af9 100644
+--- a/src/voxcpm/model/utils.py
++++ b/src/voxcpm/model/utils.py
+@@ -130,6 +130,15 @@ def _is_hip_available():
+         return False
+
+
++def _is_xpu_available():
++    try:
++        _ = torch.xpu.device_count()
++        xpu_available = torch.xpu.is_available()
++    except:
++        xpu_available = False
++    return xpu_available
++
++
+ def get_dtype(dtype: str):
+     """Gets the torch dtype, automatically downgrading for incompatible hardware."""
+     device = "cpu"
+@@ -140,6 +149,8 @@ def get_dtype(dtype: str):
+                 device = "hip"
+     elif _is_directml_available():
+         device = "directml"
++    elif _is_xpu_available():
++        device = "xpu"
+     elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
+         device = "mps"
+
+diff --git a/src/voxcpm/model/voxcpm.py b/src/voxcpm/model/voxcpm.py
+index 5c49e5b..9cb48b7 100644
+--- a/src/voxcpm/model/voxcpm.py
++++ b/src/voxcpm/model/voxcpm.py
+@@ -73,7 +73,7 @@ class VoxCPMConfig(BaseModel):
+     dit_config: VoxCPMDitConfig
+
+     max_length: int = 4096
+-    device: str = "cuda"
++    device: str = "xpu"
+     dtype: str = "bfloat16"
+
+
+diff --git a/voxcpm_nodes.py b/voxcpm_nodes.py
+index 0046094..01b29b4 100644
+--- a/voxcpm_nodes.py
++++ b/voxcpm_nodes.py
+@@ -24,7 +24,10 @@ def get_available_devices():
+     devices = []
+     if torch.cuda.is_available():
+         devices.append("cuda")
+-    
++
++    if torch.xpu.is_available():
++        devices.append("xpu")
++
+     # Check for DirectML on Windows
+     try:
+         import platform
+@@ -37,14 +40,14 @@ def get_available_devices():
+     if hasattr(torch.version, 'hip') and torch.version.hip is not None:
+         try:
+             if torch.cuda.is_available() and torch.cuda.get_device_name(0).lower().find('amd') != -1:
+-                 devices.append("hip")
++                devices.append("hip")
+         except:
+             pass
+
+     # Check for MPS on Apple Silicon
+     if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
+         devices.append("mps")
+-        
++
+     devices.append("cpu")
+     return devices
+
+@@ -54,6 +57,8 @@ def set_seed(seed: int):
+     torch.manual_seed(seed)
+     if torch.cuda.is_available():
+         torch.cuda.manual_seed_all(seed)
++    if torch.xpu.is_available():
++        torch.xpu.manual_seed_all(seed)
+
+ class VoxCPMNode(io.ComfyNode):
+     CATEGORY = "audio/tts"
+@@ -112,7 +117,7 @@ class VoxCPMNode(io.ComfyNode):
+         if is_cloning and not prompt_text:
+             raise ValueError("Prompt text is required when providing prompt audio for voice cloning.")
+
+-        if device == "cuda":
++        if device in ["cuda", "xpu"]:
+             load_device = model_management.get_torch_device()
+             offload_device = model_management.intermediate_device()
+         else: