Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,25 +28,25 @@ if(USE_NPU)
if(DEVICE_TYPE STREQUAL "USE_A3")
message("downloading a3 arm xllm kernels")
file(DOWNLOAD
"https://9n-das-tools.s3.cn-north-1.jdcloud-oss.com/xllm-ai/xllm_kernels/0.6.0/xllm_kernels-1.0.0-Linux.a3.arm.rpm"
"https://9n-das-tools.s3.cn-north-1.jdcloud-oss.com/xllm-ai/xllm_kernels/0.6.0/xllm_kernels-1.2.1-Linux.a3.arm.rpm"
"${CMAKE_BINARY_DIR}/xllm_kernels.rpm"
)
else()
message("downloading a2 arm xllm_kernels")
if(DEVICE_ARCH STREQUAL "ARM")
message("downloading a2 arm xllm_kernels")
file(DOWNLOAD
"https://9n-das-tools.s3.cn-north-1.jdcloud-oss.com/xllm-ai/xllm_kernels/0.6.0/xllm_kernels-1.2.0-Linux.a2.arm.rpm"
"https://9n-das-tools.s3.cn-north-1.jdcloud-oss.com/xllm-ai/xllm_kernels/0.6.0/xllm_kernels-1.2.1-Linux.a2.arm.rpm"
"${CMAKE_BINARY_DIR}/xllm_kernels.rpm"
)
else()
message("downloading a3 x86 xllm_kernels")
file(DOWNLOAD
"https://9n-das-tools.s3.cn-north-1.jdcloud-oss.com/xllm-ai/xllm_kernels/0.6.0/xllm_kernels-1.2.0-Linux.a2.x86.rpm"
"https://9n-das-tools.s3.cn-north-1.jdcloud-oss.com/xllm-ai/xllm_kernels/0.6.0/xllm_kernels-1.2.1-Linux.a2.x86.rpm"
"${CMAKE_BINARY_DIR}/xllm_kernels.rpm"
)
endif()
endif()
execute_process(COMMAND rpm -ivh "${CMAKE_BINARY_DIR}/xllm_kernels.rpm")
execute_process(COMMAND rpm -ivh --replacepkgs --replacefiles "${CMAKE_BINARY_DIR}/xllm_kernels.rpm")
file(WRITE "${CMAKE_BINARY_DIR}/.xllm_installed" "")
endif()

Expand Down
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,18 +112,18 @@ Supported models list:
First, download the image we provide:
```bash
# A2 x86
docker pull xllm/xllm-ai:xllm-0.6.0-dev-hb-py3.11-oe24.03-lts
docker pull xllm/xllm-ai:xllm-0.6.0-dev-hb-rc2-py3.11-oe24.03-lts
# A2 arm
docker pull xllm/xllm-ai:xllm-0.6.0-dev-hb-py3.11-oe24.03-lts-aarch64
docker pull xllm/xllm-ai:xllm-0.6.0-dev-hb-rc2-py3.11-oe24.03-lts-aarch64
# A3 arm
docker pull xllm/xllm-ai:xllm-0.6.0-dev-hc-py3.11-oe24.03-lts-aarch64
docker pull xllm/xllm-ai:xllm-0.6.0-dev-hc-rc2-py3.11-oe24.03-lts-aarch64
# or
# A2 x86
docker pull quay.io/jd_xllm/xllm-ai:xllm-0.6.0-dev-hb-py3.11-oe24.03-lts
docker pull quay.io/jd_xllm/xllm-ai:xllm-0.6.0-dev-hb-rc2-py3.11-oe24.03-lts
# A2 arm
docker pull quay.io/jd_xllm/xllm-ai:xllm-0.6.0-dev-hb-py3.11-oe24.03-lts-aarch64
docker pull quay.io/jd_xllm/xllm-ai:xllm-0.6.0-dev-hb-rc2-py3.11-oe24.03-lts-aarch64
# A3 arm
docker pull quay.io/jd_xllm/xllm-ai:xllm-0.6.0-dev-hc-py3.11-oe24.03-lts-aarch64
docker pull quay.io/jd_xllm/xllm-ai:xllm-0.6.0-dev-hc-rc2-py3.11-oe24.03-lts-aarch64
```
Then create the corresponding container:
```bash
Expand Down
12 changes: 6 additions & 6 deletions README_zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,18 +112,18 @@ xLLM 提供了强大的智能计算能力,通过硬件系统的算力优化与
首先下载我们提供的镜像:
```bash
# A2 x86
docker pull xllm/xllm-ai:xllm-0.6.0-dev-hb-py3.11-oe24.03-lts
docker pull xllm/xllm-ai:xllm-0.6.0-dev-hb-rc2-py3.11-oe24.03-lts
# A2 arm
docker pull xllm/xllm-ai:xllm-0.6.0-dev-hb-py3.11-oe24.03-lts-aarch64
docker pull xllm/xllm-ai:xllm-0.6.0-dev-hb-rc2-py3.11-oe24.03-lts-aarch64
# A3 arm
docker pull xllm/xllm-ai:xllm-0.6.0-dev-hc-py3.11-oe24.03-lts-aarch64
docker pull xllm/xllm-ai:xllm-0.6.0-dev-hc-rc2-py3.11-oe24.03-lts-aarch64
# 或者
# A2 x86
docker pull quay.io/jd_xllm/xllm-ai:xllm-0.6.0-dev-hb-py3.11-oe24.03-lts
docker pull quay.io/jd_xllm/xllm-ai:xllm-0.6.0-dev-hb-rc2-py3.11-oe24.03-lts
# A2 arm
docker pull quay.io/jd_xllm/xllm-ai:xllm-0.6.0-dev-hb-py3.11-oe24.03-lts-aarch64
docker pull quay.io/jd_xllm/xllm-ai:xllm-0.6.0-dev-hb-rc2-py3.11-oe24.03-lts-aarch64
# A3 arm
docker pull quay.io/jd_xllm/xllm-ai:xllm-0.6.0-dev-hc-py3.11-oe24.03-lts-aarch64
docker pull quay.io/jd_xllm/xllm-ai:xllm-0.6.0-dev-hc-rc2-py3.11-oe24.03-lts-aarch64
```
然后创建对应的容器
```bash
Expand Down
2 changes: 1 addition & 1 deletion cibuild/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ function error() {
exit 1
}

IMAGE="9d0b6f5a80f6"
IMAGE="quay.io/jd_xllm/xllm-ai:xllm-0.6.0-dev-hb-rc2-py3.11-oe24.03-lts"

RUN_OPTS=(
--rm
Expand Down
6 changes: 3 additions & 3 deletions docs/en/getting_started/compile.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ sudo docker run -it --ipc=host -u 0 --privileged --name mydocker --network=host

| Device | Arch | Images |
|:---------:|:-----------:|:-------------:|
| A2 | x86 | xllm/xllm-ai:xllm-0.6.0-dev-hb-py3.11-oe24.03-lts |
| A2 | arm | xllm/xllm-ai:xllm-0.6.0-dev-hb-py3.11-oe24.03-lts-aarch64 |
| A3 | arm | xllm/xllm-ai:xllm-0.6.0-dev-hc-py3.11-oe24.03-lts-aarch64 |
| A2 | x86 | xllm/xllm-ai:xllm-0.6.0-dev-hb-rc2-py3.11-oe24.03-lts |
| A2 | arm | xllm/xllm-ai:xllm-0.6.0-dev-hb-rc2-py3.11-oe24.03-lts-aarch64 |
| A3 | arm | xllm/xllm-ai:xllm-0.6.0-dev-hc-rc2-py3.11-oe24.03-lts-aarch64 |


## Installation
Expand Down
6 changes: 3 additions & 3 deletions docs/zh/getting_started/compile.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ sudo docker run -it --ipc=host -u 0 --privileged --name mydocker --network=host

| Device | Arch | Images |
|:---------:|:-----------:|:-------------:|
| A2 | x86 | xllm/xllm-ai:xllm-0.6.0-dev-hb-py3.11-oe24.03-lts |
| A2 | arm | xllm/xllm-ai:xllm-0.6.0-dev-hb-py3.11-oe24.03-lts-aarch64 |
| A3 | arm | xllm/xllm-ai:xllm-0.6.0-dev-hc-py3.11-oe24.03-lts-aarch64 |
| A2 | x86 | xllm/xllm-ai:xllm-0.6.0-dev-hb-rc2-py3.11-oe24.03-lts |
| A2 | arm | xllm/xllm-ai:xllm-0.6.0-dev-hb-rc2-py3.11-oe24.03-lts-aarch64 |
| A3 | arm | xllm/xllm-ai:xllm-0.6.0-dev-hc-rc2-py3.11-oe24.03-lts-aarch64 |


## 安装
Expand Down
4 changes: 0 additions & 4 deletions xllm/core/distributed_runtime/worker_server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,7 @@ void WorkerServer::create_server(const runtime::Options& options,
#if defined(USE_NPU)
atb_speed::base::Mapping mapping;
mapping.ParseParam(mapping_data);
#if defined(USE_A3)
mapping.InitGlobalCommDomain(FLAGS_communication_backend);
#else
mapping.InitCommDomain(FLAGS_communication_backend);
#endif
auto moeEpParallelInfo = mapping.Get(atb_speed::base::MOE_EP);
auto dispatchAndCombinecommDomain =
atb_speed::GetSingleton<atb_speed::ExternalCommManager>().GetCommDomain(
Expand Down
8 changes: 4 additions & 4 deletions xllm/core/layers/npu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ cc_library(
atb_head_impl.h
siglip_encoder_layer.h
multi_head_attention.h
$<$<BOOL:${USE_A2}>:qwen2_5_vision_encoder_layer.h>
$<$<BOOL:${USE_A2}>:qwen3_moe_decoder_layer.h>
qwen2_5_vision_encoder_layer.h
qwen3_moe_decoder_layer.h
atb_linear.h
atb_parallel_linear.h
buffer/atb_buffer.h
Expand All @@ -30,8 +30,8 @@ cc_library(
llama_decoder_layer.cpp
qwen2_decoder_layer.cpp
qwen3_decoder_layer.cpp
$<$<BOOL:${USE_A2}>:qwen2_5_vision_encoder_layer.cpp>
$<$<BOOL:${USE_A2}>:qwen3_moe_decoder_layer.cpp>
qwen2_5_vision_encoder_layer.cpp
qwen3_moe_decoder_layer.cpp
attn_mask.cpp
atb_base.cpp
word_embedding.cpp
Expand Down
7 changes: 0 additions & 7 deletions xllm/core/layers/npu/atb_head_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,16 +60,9 @@ void AtbLmHeadImpl::param_from_args(atb_speed::common::LmHeadParam& param,
parallelInfo.rankIds.size();
param.linearParallelParam.tensorParallelInfo.backend =
FLAGS_communication_backend;
#if defined(USE_A3)
parallelInfo.InitCommDomain(
param.linearParallelParam.tensorParallelInfo.hcommInfo,
param.linearParallelParam.tensorParallelInfo.commDomain);
#else
param.linearParallelParam.tensorParallelInfo.hcommInfo =
parallelInfo.hcclComm;
param.linearParallelParam.tensorParallelInfo.commDomain =
parallelInfo.commDomain;
#endif
}
}
}
Expand Down
8 changes: 6 additions & 2 deletions xllm/core/layers/npu/deepseek_v2_decoder_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -437,9 +437,9 @@ void DeepseekV2DecoderImpl::initialize_basic_parameters(
}
param.maskfree = true; // TODO
param.enableSwiGLUQuantForSharedExperts = false; // TODO
#if defined(USE_A3)
param.scaledTopk = -1;
param.enableATBGateMatmul = 1;
#if defined(USE_A3)
param.enableLcocAll2All = 1;
#endif
num_key_value_heads_ = static_cast<int>(args.n_kv_heads().value());
Expand Down Expand Up @@ -526,7 +526,11 @@ void DeepseekV2DecoderImpl::initialize_mlp_parameters(
param.dispatchAndCombineHcclComm = parallel_args.dispatchAndCombineHcclComm();
param.dispatchAndCombinecommDomain =
parallel_args.dispatchAndCombinecommDomain();

#if defined(USE_A3)
param.enableIndexGmm = false;
#else
param.enableIndexGmm = true;
#endif
if (layer_id_ >= param.firstKDenseReplace) {
param.enableQkvdownDp = false;
param.enableSharedExpertDp = false;
Expand Down
2 changes: 1 addition & 1 deletion xllm/models/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ cc_library(
$<$<BOOL:${USE_NPU}>:llama.h>
$<$<BOOL:${USE_NPU}>:minicpmv.h>
$<$<BOOL:${USE_NPU}>:qwen2.h>
$<$<BOOL:${USE_A2}>:qwen2_5_vl.h>
$<$<BOOL:${USE_NPU}>:qwen2_5_vl.h>
$<$<BOOL:${USE_NPU}>:qwen3.h>
$<$<BOOL:${USE_NPU}>:qwen3_moe.h>
$<$<BOOL:${USE_NPU}>:qwen_base.h>
Expand Down
2 changes: 0 additions & 2 deletions xllm/models/clip_text_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@
#include "processors/input_processor.h"
#include "processors/pywarpper_image_processor.h"
#include "processors/qwen2_vl_image_processor.h"
#if defined(USE_A2)
#include "qwen2_5_vl.h"
#endif
#include "xllm_kernels/core/include/atb_speed/log.h"

namespace xllm::hf {
Expand Down
24 changes: 11 additions & 13 deletions xllm/models/models.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,18 @@ limitations under the License.
#pragma once

#if defined(USE_NPU)
#include "deepseek_v2.h" // IWYU pragma: keep
#include "deepseek_v2_mtp.h" // IWYU pragma: keep
#include "deepseek_v3.h" // IWYU pragma: keep
#include "kimi_k2.h" // IWYU pragma: keep
#include "llama.h" // IWYU pragma: keep
#include "llama3.h" // IWYU pragma: keep
#include "minicpmv.h" // IWYU pragma: keep
#include "qwen2.h" // IWYU pragma: keep
#include "qwen3.h" // IWYU pragma: keep
#include "qwen3_embedding.h" // IWYU pragma: keep
#include "qwen_base.h" // IWYU pragma: keep
#if defined(USE_A2)
#include "deepseek_v2.h" // IWYU pragma: keep
#include "deepseek_v2_mtp.h" // IWYU pragma: keep
#include "deepseek_v3.h" // IWYU pragma: keep
#include "flux/pipeline_flux.h" // IWYU pragma: keep
#include "kimi_k2.h" // IWYU pragma: keep
#include "llama.h" // IWYU pragma: keep
#include "llama3.h" // IWYU pragma: keep
#include "minicpmv.h" // IWYU pragma: keep
#include "qwen2.h" // IWYU pragma: keep
#include "qwen2_5_vl.h" // IWYU pragma: keep
#include "qwen3.h" // IWYU pragma: keep
#include "qwen3_embedding.h" // IWYU pragma: keep
#include "qwen3_moe.h" // IWYU pragma: keep
#include "qwen_base.h" // IWYU pragma: keep
#endif
#endif