jd-opensource · XuZhang99 · Oct 24, 2025 · Oct 24, 2025
diff --git a/xllm/core/framework/model/CMakeLists.txt b/xllm/core/framework/model/CMakeLists.txt
@@ -15,17 +15,12 @@ set(BASE_DEPS
 )
 
 if(USE_NPU)
-  # Modify dependencies for npu
-  list(APPEND BASE_DEPS torch_npu)
   list(APPEND BASE_DEPS :npu_layers)
   list(APPEND BASE_DEPS :platform_npu)
+else()
+  list(APPEND BASE_DEPS :common_layers)
 endif()
 
-if(USE_MLU)
-  # Modify dependencies for mlu
-  list(APPEND BASE_DEPS torch_mlu)
-  list(APPEND BASE_DEPS :mlu_layers)
-endif()
 
 # Define the library
 cc_library(

diff --git a/xllm/core/layers/CMakeLists.txt b/xllm/core/layers/CMakeLists.txt
@@ -79,7 +79,6 @@ cc_library(
 
 if(USE_NPU)
   add_subdirectory(npu)
-endif()
-if(USE_MLU)
-  add_subdirectory(mlu)
+else()
+  add_subdirectory(common)
 endif()
diff --git a/xllm/core/layers/mlu/CMakeLists.txt → xllm/core/layers/common/CMakeLists.txt b/xllm/core/layers/mlu/CMakeLists.txt → xllm/core/layers/common/CMakeLists.txt
@@ -4,7 +4,7 @@ include(cc_test)
 
 cc_library(
   NAME
-    mlu_layers 
+    common_layers 
   HDRS
     qwen3_attention.h
     attention.h
@@ -15,7 +15,7 @@ cc_library(
     qwen3_decoder_layer.h
     qwen3_moe_decoder_layer.h
     linear_impl.h
-    mlu_word_embedding_impl.h
+    word_embedding_impl.h
   SRCS
     qwen3_attention.cpp
     attention.cpp

diff --git a/xllm/core/layers/mlu/attention.cpp → xllm/core/layers/common/attention.cpp b/xllm/core/layers/mlu/attention.cpp → xllm/core/layers/common/attention.cpp
diff --git a/xllm/core/layers/mlu/attention.h → xllm/core/layers/common/attention.h b/xllm/core/layers/mlu/attention.h → xllm/core/layers/common/attention.h
diff --git a/xllm/core/layers/mlu/dense_mlp.cpp → xllm/core/layers/common/dense_mlp.cpp b/xllm/core/layers/mlu/dense_mlp.cpp → xllm/core/layers/common/dense_mlp.cpp
diff --git a/xllm/core/layers/mlu/dense_mlp.h → xllm/core/layers/common/dense_mlp.h b/xllm/core/layers/mlu/dense_mlp.h → xllm/core/layers/common/dense_mlp.h
diff --git a/xllm/core/layers/mlu/fuse_norm.cpp → xllm/core/layers/common/fuse_norm.cpp b/xllm/core/layers/mlu/fuse_norm.cpp → xllm/core/layers/common/fuse_norm.cpp
diff --git a/xllm/core/layers/mlu/fuse_norm.h → xllm/core/layers/common/fuse_norm.h b/xllm/core/layers/mlu/fuse_norm.h → xllm/core/layers/common/fuse_norm.h
diff --git a/xllm/core/layers/mlu/fused_moe.cpp → xllm/core/layers/common/fused_moe.cpp b/xllm/core/layers/mlu/fused_moe.cpp → xllm/core/layers/common/fused_moe.cpp
diff --git a/xllm/core/layers/mlu/fused_moe.h → xllm/core/layers/common/fused_moe.h b/xllm/core/layers/mlu/fused_moe.h → xllm/core/layers/common/fused_moe.h
diff --git a/xllm/core/layers/mlu/linear_impl.cpp → xllm/core/layers/common/linear_impl.cpp b/xllm/core/layers/mlu/linear_impl.cpp → xllm/core/layers/common/linear_impl.cpp
diff --git a/xllm/core/layers/mlu/linear_impl.h → xllm/core/layers/common/linear_impl.h b/xllm/core/layers/mlu/linear_impl.h → xllm/core/layers/common/linear_impl.h
diff --git a/xllm/core/layers/mlu/qwen3_attention.cpp → xllm/core/layers/common/qwen3_attention.cpp b/xllm/core/layers/mlu/qwen3_attention.cpp → xllm/core/layers/common/qwen3_attention.cpp
diff --git a/xllm/core/layers/mlu/qwen3_attention.h → xllm/core/layers/common/qwen3_attention.h b/xllm/core/layers/mlu/qwen3_attention.h → xllm/core/layers/common/qwen3_attention.h
diff --git a/xllm/core/layers/mlu/qwen3_decoder_layer.cpp → ...ore/layers/common/qwen3_decoder_layer.cpp b/xllm/core/layers/mlu/qwen3_decoder_layer.cpp → ...ore/layers/common/qwen3_decoder_layer.cpp
diff --git a/xllm/core/layers/mlu/qwen3_decoder_layer.h → .../core/layers/common/qwen3_decoder_layer.h b/xllm/core/layers/mlu/qwen3_decoder_layer.h → .../core/layers/common/qwen3_decoder_layer.h
diff --git a/...re/layers/mlu/qwen3_moe_decoder_layer.cpp → ...layers/common/qwen3_moe_decoder_layer.cpp b/...re/layers/mlu/qwen3_moe_decoder_layer.cpp → ...layers/common/qwen3_moe_decoder_layer.cpp
diff --git a/...core/layers/mlu/qwen3_moe_decoder_layer.h → ...e/layers/common/qwen3_moe_decoder_layer.h b/...core/layers/mlu/qwen3_moe_decoder_layer.h → ...e/layers/common/qwen3_moe_decoder_layer.h
diff --git a/xllm/core/layers/mlu/rotary_embedding.cpp → xllm/core/layers/common/rotary_embedding.cpp b/xllm/core/layers/mlu/rotary_embedding.cpp → xllm/core/layers/common/rotary_embedding.cpp
diff --git a/xllm/core/layers/mlu/rotary_embedding.h → xllm/core/layers/common/rotary_embedding.h b/xllm/core/layers/mlu/rotary_embedding.h → xllm/core/layers/common/rotary_embedding.h
diff --git a/...core/layers/mlu/mlu_word_embedding_impl.h → .../core/layers/common/word_embedding_impl.h b/...core/layers/mlu/mlu_word_embedding_impl.h → .../core/layers/common/word_embedding_impl.h
@@ -28,12 +28,12 @@ namespace xllm {
 namespace layer {
 
 // Embedding parallelized in the embedding dimension.
-class MluWordEmbeddingImpl : public torch::nn::Module {
+class WordEmbeddingImpl : public torch::nn::Module {
  public:
-  MluWordEmbeddingImpl(int64_t num_embeddings,
-                       int64_t embedding_dim,
-                       const ParallelArgs& parallel_args,
-                       const torch::TensorOptions& options)
+  WordEmbeddingImpl(int64_t num_embeddings,
+                    int64_t embedding_dim,
+                    const ParallelArgs& parallel_args,
+                    const torch::TensorOptions& options)
       : parallel_args_(parallel_args) {
     rank_ = parallel_args_.tp_group_->rank();
     world_size_ = parallel_args_.tp_group_->world_size();

diff --git a/xllm/core/layers/linear.h b/xllm/core/layers/linear.h
@@ -19,7 +19,7 @@ limitations under the License.
 #include <torch/torch.h>
 
 #if defined(USE_MLU)
-#include "mlu/linear_impl.h"
+#include "common/linear_impl.h"
 #endif
 
 namespace xllm {

diff --git a/xllm/core/layers/lm_head.h b/xllm/core/layers/lm_head.h
@@ -17,8 +17,8 @@ limitations under the License.
 
 #if defined(USE_NPU)
 #include "npu/npu_lm_head_impl.h"
-#elif defined(USE_MLU)
-#include "mlu/linear_impl.h"
+#else
+#include "common/linear_impl.h"
 #endif
 
 namespace xllm {
@@ -33,7 +33,7 @@ class LmHead : public torch::nn::ModuleHolder<NpuLmHeadImpl> {
   LmHead(const ModelContext& context)
       : ModuleHolder(std::make_shared<NpuLmHeadImpl>(context)) {}
 };
-#elif defined(USE_MLU)
+#else
 class LmHead : public torch::nn::ModuleHolder<ColumnParallelLinearImpl> {
  public:
   using torch::nn::ModuleHolder<ColumnParallelLinearImpl>::ModuleHolder;

diff --git a/xllm/core/layers/qwen3_decoder_layer.h b/xllm/core/layers/qwen3_decoder_layer.h
@@ -17,8 +17,8 @@ limitations under the License.
 
 #if defined(USE_NPU)
 #include "npu/npu_qwen3_decoder_layer_impl.h"
-#elif defined(USE_MLU)
-#include "mlu/qwen3_decoder_layer.h"
+#else
+#include "common/qwen3_decoder_layer.h"
 #endif
 
 namespace xllm {
@@ -34,7 +34,7 @@ class Qwen3DecoderLayer
   Qwen3DecoderLayer(const ModelContext& context)
       : ModuleHolder(std::make_shared<NpuQwen3DecoderLayerImpl>(context)) {}
 };
-#elif defined(USE_MLU)
+#else
 class Qwen3DecoderLayer : public torch::nn::ModuleHolder<Qwen3DecoderImpl> {
  public:
   using torch::nn::ModuleHolder<Qwen3DecoderImpl>::ModuleHolder;

diff --git a/xllm/core/layers/qwen3_moe_decoder_layer.h b/xllm/core/layers/qwen3_moe_decoder_layer.h
@@ -17,8 +17,8 @@ limitations under the License.
 
 #if defined(USE_NPU)
 #include "npu/npu_qwen3_moe_decoder_layer_impl.h"
-#elif defined(USE_MLU)
-#include "mlu/qwen3_moe_decoder_layer.h"
+#else
+#include "common/qwen3_moe_decoder_layer.h"
 #endif
 
 namespace xllm {
@@ -35,7 +35,7 @@ class Qwen3MoeDecoderLayer
       : Qwen3MoeDecoderLayer(
             std::make_shared<NpuQwen3MoeDecoderLayerImpl>(context, layer_id)) {}
 };
-#elif defined(USE_MLU)
+#else
 class Qwen3MoeDecoderLayer
     : public torch::nn::ModuleHolder<Qwen3MoeDecoderImpl> {
  public:

diff --git a/xllm/core/layers/rms_norm.h b/xllm/core/layers/rms_norm.h
@@ -16,8 +16,8 @@ limitations under the License.
 #pragma once
 #if defined(USE_NPU)
 #include "npu/npu_rms_norm_impl.h"
-#elif defined(USE_MLU)
-#include "mlu/fuse_norm.h"
+#else
+#include "common/fuse_norm.h"
 #endif
 
 namespace xllm {
@@ -32,7 +32,7 @@ class RmsNorm : public torch::nn::ModuleHolder<NpuRmsNormImpl> {
   RmsNorm(const ModelContext& context)
       : ModuleHolder(std::make_shared<NpuRmsNormImpl>(context)) {}
 };
-#elif defined(USE_MLU)
+#else
 class RmsNorm : public torch::nn::ModuleHolder<FusedRMSNormImpl> {
  public:
   using torch::nn::ModuleHolder<FusedRMSNormImpl>::ModuleHolder;

diff --git a/xllm/core/layers/rotary_embedding.cpp b/xllm/core/layers/rotary_embedding.cpp
@@ -16,19 +16,14 @@ limitations under the License.
 
 #include "rotary_embedding.h"
 
-#include <c10/core/ScalarType.h>
 #include <glog/logging.h>
-#include <torch/torch.h>
-#if defined(USE_NPU)
-#include <torch_npu/torch_npu.h>
-#endif
 
 #include <boost/algorithm/string.hpp>
 #include <cmath>
 #include <memory>
 
-#include "common/global_flags.h"
-#include "util/slice.h"
+#include "core/common/global_flags.h"
+#include "core/util/slice.h"
 
 namespace xllm {
 

diff --git a/xllm/core/layers/rotary_embedding.h b/xllm/core/layers/rotary_embedding.h
@@ -19,9 +19,6 @@ limitations under the License.
 #include <c10/core/Device.h>
 #include <c10/core/ScalarType.h>
 #include <torch/torch.h>
-#if defined(USE_NPU)
-#include <torch_npu/torch_npu.h>
-#endif
 
 #include <tuple>
 

diff --git a/xllm/core/layers/word_embedding.h b/xllm/core/layers/word_embedding.h
@@ -17,8 +17,8 @@ limitations under the License.
 
 #if defined(USE_NPU)
 #include "npu/npu_word_embedding_impl.h"
-#elif defined(USE_MLU)
-#include "mlu/mlu_word_embedding_impl.h"
+#else
+#include "common/word_embedding_impl.h"
 #endif
 
 namespace xllm {
@@ -33,20 +33,20 @@ class WordEmbedding : public torch::nn::ModuleHolder<NpuWordEmbeddingImpl> {
       : ModuleHolder(std::make_shared<NpuWordEmbeddingImpl>(context)) {}
 };
 
-#elif defined(USE_MLU)
+#else
 
-class WordEmbedding : public torch::nn::ModuleHolder<MluWordEmbeddingImpl> {
+class WordEmbedding : public torch::nn::ModuleHolder<WordEmbeddingImpl> {
  public:
-  using torch::nn::ModuleHolder<MluWordEmbeddingImpl>::ModuleHolder;
-  using Impl __attribute__((__unused__)) = MluWordEmbeddingImpl;
+  using torch::nn::ModuleHolder<WordEmbeddingImpl>::ModuleHolder;
+  using Impl __attribute__((__unused__)) = WordEmbeddingImpl;
   WordEmbedding(int64_t num_embeddings,
                 int64_t embedding_dim,
                 const ParallelArgs& parallel_args,
                 const torch::TensorOptions& options)
-      : ModuleHolder(std::make_shared<MluWordEmbeddingImpl>(num_embeddings,
-                                                            embedding_dim,
-                                                            parallel_args,
-                                                            options)) {}
+      : ModuleHolder(std::make_shared<WordEmbeddingImpl>(num_embeddings,
+                                                         embedding_dim,
+                                                         parallel_args,
+                                                         options)) {}
 };
 
 #endif

diff --git a/xllm/models/llm/llm_model_base.h b/xllm/models/llm/llm_model_base.h
@@ -39,8 +39,8 @@ limitations under the License.
 #include "models/model_registry.h"
 #if defined(USE_NPU)
 #include "xllm_kernels/core/include/atb_speed/log.h"
-#elif defined(USE_MLU)
-#include "core/layers/mlu/attention.h"
+#else
+#include "core/layers/common/attention.h"
 #endif
 
 namespace xllm {