diff --git a/vllm/README.md b/vllm/README.md
index 8c80c85..93e4a6c 100644
--- a/vllm/README.md
+++ b/vllm/README.md
@@ -21,6 +21,7 @@ llm-scaler-vllm is an extended and optimized version of vLLM, specifically adapt
    2.5 [Omni Model Support](#25-omni-model-support)  
    2.6 [Data Parallelism (DP)](#26-data-parallelism-dp)  
    2.7 [Finding maximum Context Length](#27-finding-maximum-context-length)  
+   2.8 [Multi-Modal Webui](#28-multi-modal-webui)
 3. [Supported Models](#3-supported-models)  
 4. [Troubleshooting](#4-troubleshooting)
 5. [Performance tuning](#5-performance-tuning)
@@ -2314,6 +2315,33 @@ In this case, you should adjust the launch command with:
 --max-model-len 114432
 ```
 
+### 2.8 Multi-Modal Webui
+The project provides two optimized interfaces for interacting with Qwen2.5-VL models:
+
+
+#### 📌 Core Components
+- **Inference Engine**: vLLM (Intel-optimized)
+- **Interfaces**: 
+  - Gradio (for rapid prototyping)
+  - ComfyUI (for complex workflows)
+
+#### 🚀 Deployment Options
+
+#### Option 1: Gradio Deployment (Recommended for Most Users)
+- check `/llm-scaler/vllm/webui/multi-modal-gradio/README.md` for implementation details
+
+#### Option 2: ComfyUI Deployment (Advanced Workflows)
+- check `/llm-scaler/vllm/webui/multi-modal-comfyui/README.md` for implementation details
+
+
+#### 🔧 Configuration Guide
+
+| Parameter | Effect | Recommended Value |
+|-----------|--------|-------------------|
+| `--quantization fp8` | XPU acceleration | Required |
+| `-tp=2` | Tensor parallelism | Match GPU count |
+| `--max-model-len` | Context window | 32768 (max) |
+
 ---
 
 ## 3. Supported Models
diff --git a/vllm/webui/multi-modal-comfyui/README.md b/vllm/webui/multi-modal-comfyui/README.md
new file mode 100644
index 0000000..63203a3
--- /dev/null
+++ b/vllm/webui/multi-modal-comfyui/README.md
@@ -0,0 +1,284 @@
+# Qwen2.5-VL-3B-Instruct Deployment Guide (ComfyUI + Intel GPU + Linux)
+
+This document provides comprehensive instructions for deploying the `Qwen2.5-VL-3.5B-Instruct` multimodal LLM on Linux systems with `Intel GPU` acceleration via `ComfyUI` workflow.
+
+## 🛠️ Installation Procedure
+### 1. Environment Setup
+```bash
+# Install system dependencies
+sudo apt update && sudo apt install -y \
+    git python3-pip python3-venv \
+    ocl-icd-opencl-dev
+
+# Configure Intel GPU drivers (if not present)
+sudo apt install -y \
+    intel-opencl-icd \
+    intel-level-zero-gpu \
+    level-zero
+```
+
+### 2. Conda Environment Configuration
+```bash
+conda create -n comfyqwen python=3.11 
+conda activate comfyqwen
+```
+
+### 3. ComfyUI Installation
+```bash
+git clone https://github.com/comfyanonymous/ComfyUI.git
+cd ./ComfyUI
+
+# Install Intel-optimized PyTorch
+pip install torch torchvision torchaudio \
+    --index-url https://download.pytorch.org/whl/xpu
+
+# For nightly builds with potential performance improvements:
+# pip install --pre torch torchvision torchaudio \
+#     --index-url https://download.pytorch.org/whl/nightly/xpu
+
+pip install -r requirements.txt
+```
+
+### 4. Qwen2.5-VL Custom Node Deployment
+```bash
+# Download node definition files
+git clone https://github.com/IuvenisSapiens/ComfyUI_Qwen2_5-VL-Instruct
+
+Move the ComfyUI_Qwen2_5-VL-Instruct folder into /ComfyUI/custom_nodes/ directory
+
+Place the downloaded Qwen2.5-VL-3B-Instruct model folder into /ComfyUI/models/prompt_generator/
+# If prompt_generator subdirectory doesn't exist under models, please create it first
+```
+<details><summary>ComfyUI_Qwen2_5-VL-Instruct_workflow.json</summary>
+{
+  "id": "9f2dfc63-3d19-433d-a7c0-49d83464f553",
+  "revision": 0,
+  "last_node_id": 59,
+  "last_link_id": 72,
+  "nodes": [
+    {
+      "id": 56,
+      "type": "Qwen2_VQA",
+      "pos": [
+        199.93017578125,
+        46.947696685791016
+      ],
+      "size": [
+        322.1059265136719,
+        348
+      ],
+      "flags": {},
+      "order": 2,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "source_path",
+          "shape": 7,
+          "type": "PATH",
+          "link": 70
+        },
+        {
+          "name": "image",
+          "shape": 7,
+          "type": "IMAGE",
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "name": "STRING",
+          "type": "STRING",
+          "slot_index": 0,
+          "links": [
+            72
+          ]
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "Qwen2_VQA",
+        "widget_ue_connectable": {}
+      },
+      "widgets_values": [
+        "Describe the video in detail",
+        "Qwen2.5-VL-3B-Instruct",
+        "none",
+        false,
+        0.7,
+        2048,
+        200704,
+        1003520,
+        1444,
+        "randomize",
+        "eager"
+      ]
+    },
+    {
+      "id": 59,
+      "type": "PreviewAny",
+      "pos": [
+        702.7207641601562,
+        61.4115104675293
+      ],
+      "size": [
+        140,
+        76
+      ],
+      "flags": {},
+      "order": 3,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "source",
+          "type": "*",
+          "link": 72
+        }
+      ],
+      "outputs": [],
+      "properties": {
+        "Node name for S&R": "PreviewAny"
+      },
+      "widgets_values": []
+    },
+    {
+      "id": 58,
+      "type": "VideoLoader",
+      "pos": [
+        -513.0911254882812,
+        130.9906768798828
+      ],
+      "size": [
+        430.6719665527344,
+        452.4115295410156
+      ],
+      "flags": {},
+      "order": 0,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "VIDEO",
+          "type": "VIDEO",
+          "links": null
+        },
+        {
+          "name": "PATH",
+          "type": "PATH",
+          "links": [
+            71
+          ]
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "VideoLoader",
+        "widget_ue_connectable": {}
+      },
+      "widgets_values": [
+        "19_raw.mp4",
+        "image"
+      ]
+    },
+    {
+      "id": 57,
+      "type": "MultiplePathsInput",
+      "pos": [
+        -49.730098724365234,
+        137.55857849121094
+      ],
+      "size": [
+        210,
+        82
+      ],
+      "flags": {},
+      "order": 1,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "path_1",
+          "type": "PATH",
+          "link": 71
+        }
+      ],
+      "outputs": [
+        {
+          "name": "paths",
+          "type": "PATH",
+          "slot_index": 0,
+          "links": [
+            70
+          ]
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "MultiplePathsInput",
+        "widget_ue_connectable": {}
+      },
+      "widgets_values": [
+        1
+      ]
+    }
+  ],
+  "links": [
+    [
+      70,
+      57,
+      0,
+      56,
+      0,
+      "PATH"
+    ],
+    [
+      71,
+      58,
+      1,
+      57,
+      0,
+      "PATH"
+    ],
+    [
+      72,
+      56,
+      0,
+      59,
+      0,
+      "*"
+    ]
+  ],
+  "groups": [],
+  "config": {},
+  "extra": {
+    "ds": {
+      "scale": 0.9646149645000006,
+      "offset": [
+        788.9511067206646,
+        382.6344411516708
+      ]
+    },
+    "frontendVersion": "1.24.4",
+    "ue_links": [],
+    "links_added_by_ue": [],
+    "VHS_latentpreview": false,
+    "VHS_latentpreviewrate": 0,
+    "VHS_MetadataImage": true,
+    "VHS_KeepIntermediate": true
+  },
+  "version": 0.4
+}
+</details>
+
+## 🚀 Launching ComfyUI
+```bash
+python main.py
+```
+Access the web interface at: `http://localhost:8188`
+
+## Post-Installation Configuration
+1. Replace the final component node with `Preview Any` in your workflow
+2. Reference model path: `./models/prompt_generator/Qwen2.5-VL-3B-Instruct/`
+
+![Workflow Example](pic/image.png)
+
+## References
+- [ComfyUI GitHub](https://github.com/comfyanonymous/ComfyUI)
+- [Intel PyTorch XPU](https://intel.github.io/intel-extension-for-pytorch/)
+- [Qwen2.5 Model Card](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct)
+
diff --git a/vllm/webui/multi-modal-comfyui/pic/image.png b/vllm/webui/multi-modal-comfyui/pic/image.png
new file mode 100644
index 0000000..e9817a6
Binary files /dev/null and b/vllm/webui/multi-modal-comfyui/pic/image.png differ
diff --git a/vllm/webui/multi-modal-gradio/README.md b/vllm/webui/multi-modal-gradio/README.md
new file mode 100644
index 0000000..6e3f8c0
--- /dev/null
+++ b/vllm/webui/multi-modal-gradio/README.md
@@ -0,0 +1,89 @@
+# Qwen2.5-VL-7B-Instruct Multimodal Deployment Guide (Intel GPU/Docker/Gradio)
+
+![Intel XPU](https://img.shields.io/badge/Accelerator-Intel%20GPU-green) 
+![Docker](https://img.shields.io/badge/Container-Docker-2496ED)
+![Gradio](https://img.shields.io/badge/GUI-Gradio-FF4B4B)
+
+## 📌 Core Components
+
+- **Model**: Qwen2.5-VL-7B-Instruct (vision-language multimodal)
+- **Inference Engine**: vLLM with Intel XPU optimizations
+- **Interface**: Gradio WebUI
+- **Deployment**: Docker containerized
+
+
+## 🚀 Quick Deployment
+
+### 1. Launch Docker Container
+```bash
+sudo docker run -td \
+    --privileged \
+    --net=host \
+    --device=/dev/dri \
+    --name=yourcontainername \
+    -v /home/intel/LLM:/llm/models/ \
+    -e no_proxy=localhost,127.0.0.1 \
+    -e http_proxy=$http_proxy \
+    -e https_proxy=$https_proxy \
+    --shm-size="32g" \
+    --entrypoint /bin/bash \
+    intel/llm-scaler-vllm:latest
+```
+
+### 2. Start vLLM Service Inside Container
+```bash
+docker exec -it yourcontainername bash
+```
+```bash
+TORCH_LLM_ALLREDUCE=1 \
+VLLM_USE_V1=1 \
+CCL_ZE_IPC_EXCHANGE=pidfd \
+VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
+VLLM_WORKER_MULTIPROC_METHOD=spawn \
+python3 -m vllm.entrypoints.openai.api_server \
+    --model /llm/models/Qwen2.5-VL-7B-Instruct \
+    --dtype=float16 \
+    --device=xpu \
+    --enforce-eager \
+    --port 8000 \
+    --host 0.0.0.0 \
+    --trust-remote-code \
+    --gpu-memory-util=0.9 \
+    --no-enable-prefix-caching \
+    --max-num-batched-tokens=8192 \
+    --disable-log-requests \
+    --max-model-len=32768 \
+    --block-size 64 \
+    --quantization fp8 \
+    -tp=2
+```
+
+### 3. Launch Gradio Interface on Host
+```bash
+conda create -n qwen_gradio python=3.11
+conda activate qwen_gradio
+pip install gradio
+```
+```bash
+python /llm-scaler/vllm/webui/multi-modal-gradio/main.py --model /llm/models/Qwen2.5-VL-7B-Instruct 
+# The model need to be downloaded in advance to the directory
+```
+
+## 🌐 Access Interface
+Open in browser: `http://<host_ip>:8003`
+
+![alt text](pic/image.png)
+
+## 🔧 Key Parameters
+
+### vLLM Server Flags
+| Parameter | Purpose | Recommended Value |
+|-----------|---------|-------------------|
+| `--quantization fp8` | FP8 acceleration | Required |
+| `-tp=2` | Tensor parallelism | Adjust based on GPU count |
+| `--max-model-len` | Context window | 32768 |
+
+
+## 📜 License
+- Model: [Qwen License](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct)
+- Code: `Apache-2.0`
diff --git a/vllm/webui/multi-modal-gradio/main.py b/vllm/webui/multi-modal-gradio/main.py
new file mode 100644
index 0000000..e16f225
--- /dev/null
+++ b/vllm/webui/multi-modal-gradio/main.py
@@ -0,0 +1,238 @@
+# SPDX-License-Identifier: Apache-2.0
+import argparse
+import gradio as gr
+from openai import OpenAI, APIError
+from typing import List, Dict, Any, Optional, Tuple
+import os
+import base64
+from pathlib import Path
+import cv2
+import tempfile
+import shutil
+from uuid import uuid4
+
+VIDEO_TEMP_DIR = Path("gradio_temp_videos")
+if VIDEO_TEMP_DIR.exists():
+    shutil.rmtree(VIDEO_TEMP_DIR) 
+VIDEO_TEMP_DIR.mkdir()
+
+parser = argparse.ArgumentParser(description='Multimodal Chatbot with Video Support')
+parser.add_argument('--model-url', type=str, default='http://localhost:8000/v1', help='Model URL')
+parser.add_argument('-m', '--model', type=str, required=True, help='Model name')
+parser.add_argument('--temp', type=float, default=0.8, help='Temperature for generation')
+parser.add_argument('--stop-token-ids', type=str, default='', help='Comma-separated stop token IDs')
+parser.add_argument("--host", type=str, default="127.0.0.1")
+parser.add_argument("--port", type=int, default=8003)
+args = parser.parse_args()
+
+
+client = OpenAI(api_key="EMPTY", base_url=args.model_url)
+
+
+def is_image_file(filename: str) -> bool:
+    image_exts = ['.jpg', '.jpeg', '.png', '.webp', '.bmp']
+    return any(filename.lower().endswith(ext) for ext in image_exts)
+
+def is_video_file(filename: str) -> bool:
+    video_exts = ['.mp4', '.avi', '.mkv', '.mov', '.webm']
+    return any(filename.lower().endswith(ext) for ext in video_exts)
+
+def encode_file_to_base64(filepath: str) -> str:
+    with open(filepath, "rb") as file:
+        return base64.b64encode(file.read()).decode('utf-8')
+
+def extract_frames_from_video(video_path: str, num_frames: int = 10) -> List[str]:
+    try:
+        video = cv2.VideoCapture(video_path)
+        total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+        if total_frames <= 0: return []
+        
+        # <--- MODIFIED: 修正了抽帧逻辑，使用均匀间隔的帧索引
+        frame_indices = [int(i) for i in (total_frames / (num_frames + 1) * (j + 1) for j in range(num_frames))]
+        temp_files = []
+        
+        for frame_index in range(total_frames//15):
+            video.set(cv2.CAP_PROP_POS_FRAMES, frame_index)
+            success, frame = video.read()
+            if success:
+                with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_f:
+                    cv2.imwrite(temp_f.name, frame)
+                    temp_files.append(temp_f.name)
+        video.release()
+        return temp_files
+    except Exception as e:
+        print(f"视频抽帧时发生错误: {e}")
+        return []
+
+def predict(messages: List[Dict[str, Any]]):
+    """
+    调用模型API并以流式返回响应。
+    新增了错误处理逻辑。
+    """
+    try:
+        response = client.chat.completions.create(
+            model=args.model,
+            messages=messages,
+            temperature=args.temp,
+            stream=True,
+            extra_body={
+                "repetition_penalty": 1.0,
+                "stop_token_ids": [int(id) for id in args.stop_token_ids.split(",") if id]
+            }
+        )
+        for chunk in response:
+            if chunk.choices[0].delta.content is not None:
+                yield chunk.choices[0].delta.content, False 
+    except APIError as e:
+        error_message = f"抱歉，调用模型时出错: {e.message}"
+        if "longer than the maximum model length" in e.message:
+            error_message = "❌ **输入内容过长** ❌\n\n抱歉，您上传的文本、图片或视频帧的总长度超过了模型的处理上限。请尝试：\n\n- 缩短文字描述\n- 上传尺寸更小的图片\n- 截取更短时间的视频片段"
+        
+        yield error_message, True 
+
+
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🎥 Qwen2.5-VL-7B-Instruct Model Serving")
+    
+
+    chatbot = gr.Chatbot(height=1200, label="Qwen2.5-VL-7B-Instruct",  avatar_images=("👨", "🤖"), render_markdown=True)
+    
+    upload_visible = gr.State(False)
+
+
+    def toggle_upload(visible):
+        new_visible = not visible
+        return new_visible, gr.Row(visible=new_visible) 
+    
+    with gr.Group():
+        with gr.Row(equal_height=True):
+            msg = gr.Textbox(
+                placeholder="输入消息...", 
+                show_label=False,
+                container=False,
+                lines=2,
+                max_lines=8,
+                autofocus=True,
+                scale=95
+            )
+            attach_btn = gr.Button("📎", scale=5) 
+
+        upload_row = gr.Row(visible=False)
+        with upload_row:
+            file_upload = gr.Files(
+                file_types=["image", "video"],
+                show_label=False,
+                container=False
+            )
+    attach_btn.click(
+        toggle_upload,
+        inputs=upload_visible,
+        outputs=[upload_visible, upload_row],
+        show_progress=False
+    )
+
+
+
+    with gr.Row():
+        submit_btn = gr.Button("🚀 提交", variant="primary")
+        clear_btn = gr.Button("🧹 清空")
+    
+    api_history_state = gr.State([])
+
+    def user_and_bot_response(
+        gradio_history: List[Tuple[str, str]],
+        api_history: List[Dict[str, Any]],
+        user_message: str,
+        files: Optional[List[Any]]
+    ):
+        api_user_content = []
+        ui_display_string = ""
+
+        if user_message.strip():
+            api_user_content.append({"type": "text", "text": "用中文回答"+user_message.strip()})
+            ui_display_string += user_message.strip() + "\n\n"
+
+        if files:
+            for file in files:
+                filename = file.name
+                
+                if is_image_file(filename):
+                    base64_data = encode_file_to_base64(filename)
+                    mime_type = f"image/{Path(filename).suffix[1:].lower()}"
+                    data_url = f"data:{mime_type};base64,{base64_data}"
+                    ui_display_string += f"![{os.path.basename(filename)}]({data_url})\n"
+                    api_user_content.append({"type": "image_url", "image_url": {"url": data_url}})
+
+                elif is_video_file(filename):
+                    unique_filename = f"{uuid4()}{Path(filename).suffix}"
+                    new_video_path = VIDEO_TEMP_DIR / unique_filename
+
+                    shutil.copyfile(filename, new_video_path)
+                    print("Successfully uploaded")
+
+                    with open(new_video_path, "rb") as f:
+                        base64_data = base64.b64encode(f.read()).decode()
+                    ui_display_string += f"""<video controls width="50%">
+                            <source src="data:video/mp4;base64,{base64_data}" type="video/mp4">
+                        </video>"""
+
+                    print(ui_display_string)
+
+                    frame_paths = extract_frames_from_video(str(new_video_path), num_frames=10) 
+                    if frame_paths:
+                        for frame_path in frame_paths:
+                            base64_data = encode_file_to_base64(frame_path)
+                            api_user_content.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_data}"}})
+                            os.unlink(frame_path)
+                    
+        if not api_user_content:
+            yield gradio_history, api_history
+            return
+        api_history.append({"role": "user", "content": api_user_content})
+        gradio_history.append((ui_display_string, None))
+        yield gradio_history, api_history
+        response_stream = predict(api_history)
+        full_response = ""
+        is_error = False
+        for partial_response, error_flag in response_stream:
+            full_response += partial_response
+            is_error = error_flag
+            gradio_history[-1] = (ui_display_string, full_response)
+            yield gradio_history, api_history
+            if is_error:
+                break
+        if is_error:
+            api_history.pop()
+        else:
+            api_history.append({"role": "assistant", "content": full_response})
+        yield gradio_history, api_history
+
+    def clear_history():
+        return [], []
+
+    submit_btn.click(
+        user_and_bot_response,
+        inputs=[chatbot, api_history_state, msg, file_upload],
+        outputs=[chatbot, api_history_state],
+        queue=True
+    ).then(
+       lambda: (gr.Textbox(value=""), gr.Files(value=None)),
+       None,
+       [msg, file_upload],
+       queue=False
+    )
+    
+    clear_btn.click(
+        clear_history,
+        None,
+        [chatbot, api_history_state],
+        queue=True
+    )
+
+if __name__ == "__main__":
+    demo.queue().launch(
+        server_name=args.host,
+        server_port=args.port,
+        share=True,
+        allowed_paths=[str(VIDEO_TEMP_DIR)]
+    )
\ No newline at end of file
diff --git a/vllm/webui/multi-modal-gradio/pic/image.png b/vllm/webui/multi-modal-gradio/pic/image.png
new file mode 100644
index 0000000..25d2319
Binary files /dev/null and b/vllm/webui/multi-modal-gradio/pic/image.png differ