diff --git a/xllm/core/framework/model/CMakeLists.txt b/xllm/core/framework/model/CMakeLists.txt index a3f9fc7c..9bdd452d 100644 --- a/xllm/core/framework/model/CMakeLists.txt +++ b/xllm/core/framework/model/CMakeLists.txt @@ -15,17 +15,12 @@ set(BASE_DEPS ) if(USE_NPU) - # Modify dependencies for npu - list(APPEND BASE_DEPS torch_npu) list(APPEND BASE_DEPS :npu_layers) list(APPEND BASE_DEPS :platform_npu) +else() + list(APPEND BASE_DEPS :common_layers) endif() -if(USE_MLU) - # Modify dependencies for mlu - list(APPEND BASE_DEPS torch_mlu) - list(APPEND BASE_DEPS :mlu_layers) -endif() # Define the library cc_library( diff --git a/xllm/core/layers/CMakeLists.txt b/xllm/core/layers/CMakeLists.txt index 3b0696fb..6ad3d0c7 100644 --- a/xllm/core/layers/CMakeLists.txt +++ b/xllm/core/layers/CMakeLists.txt @@ -79,7 +79,6 @@ cc_library( if(USE_NPU) add_subdirectory(npu) -endif() -if(USE_MLU) - add_subdirectory(mlu) +else() + add_subdirectory(common) endif() diff --git a/xllm/core/layers/mlu/CMakeLists.txt b/xllm/core/layers/common/CMakeLists.txt similarity index 93% rename from xllm/core/layers/mlu/CMakeLists.txt rename to xllm/core/layers/common/CMakeLists.txt index cca8f5fb..837bdbdf 100755 --- a/xllm/core/layers/mlu/CMakeLists.txt +++ b/xllm/core/layers/common/CMakeLists.txt @@ -4,7 +4,7 @@ include(cc_test) cc_library( NAME - mlu_layers + common_layers HDRS qwen3_attention.h attention.h @@ -15,7 +15,7 @@ cc_library( qwen3_decoder_layer.h qwen3_moe_decoder_layer.h linear_impl.h - mlu_word_embedding_impl.h + word_embedding_impl.h SRCS qwen3_attention.cpp attention.cpp diff --git a/xllm/core/layers/mlu/attention.cpp b/xllm/core/layers/common/attention.cpp similarity index 100% rename from xllm/core/layers/mlu/attention.cpp rename to xllm/core/layers/common/attention.cpp diff --git a/xllm/core/layers/mlu/attention.h b/xllm/core/layers/common/attention.h similarity index 100% rename from xllm/core/layers/mlu/attention.h rename to xllm/core/layers/common/attention.h diff --git a/xllm/core/layers/mlu/dense_mlp.cpp b/xllm/core/layers/common/dense_mlp.cpp similarity index 100% rename from xllm/core/layers/mlu/dense_mlp.cpp rename to xllm/core/layers/common/dense_mlp.cpp diff --git a/xllm/core/layers/mlu/dense_mlp.h b/xllm/core/layers/common/dense_mlp.h similarity index 100% rename from xllm/core/layers/mlu/dense_mlp.h rename to xllm/core/layers/common/dense_mlp.h diff --git a/xllm/core/layers/mlu/fuse_norm.cpp b/xllm/core/layers/common/fuse_norm.cpp similarity index 100% rename from xllm/core/layers/mlu/fuse_norm.cpp rename to xllm/core/layers/common/fuse_norm.cpp diff --git a/xllm/core/layers/mlu/fuse_norm.h b/xllm/core/layers/common/fuse_norm.h similarity index 100% rename from xllm/core/layers/mlu/fuse_norm.h rename to xllm/core/layers/common/fuse_norm.h diff --git a/xllm/core/layers/mlu/fused_moe.cpp b/xllm/core/layers/common/fused_moe.cpp similarity index 100% rename from xllm/core/layers/mlu/fused_moe.cpp rename to xllm/core/layers/common/fused_moe.cpp diff --git a/xllm/core/layers/mlu/fused_moe.h b/xllm/core/layers/common/fused_moe.h similarity index 100% rename from xllm/core/layers/mlu/fused_moe.h rename to xllm/core/layers/common/fused_moe.h diff --git a/xllm/core/layers/mlu/linear_impl.cpp b/xllm/core/layers/common/linear_impl.cpp similarity index 100% rename from xllm/core/layers/mlu/linear_impl.cpp rename to xllm/core/layers/common/linear_impl.cpp diff --git a/xllm/core/layers/mlu/linear_impl.h b/xllm/core/layers/common/linear_impl.h similarity index 100% rename from xllm/core/layers/mlu/linear_impl.h rename to xllm/core/layers/common/linear_impl.h diff --git a/xllm/core/layers/mlu/qwen3_attention.cpp b/xllm/core/layers/common/qwen3_attention.cpp similarity index 100% rename from xllm/core/layers/mlu/qwen3_attention.cpp rename to xllm/core/layers/common/qwen3_attention.cpp diff --git a/xllm/core/layers/mlu/qwen3_attention.h b/xllm/core/layers/common/qwen3_attention.h similarity index 100% rename from xllm/core/layers/mlu/qwen3_attention.h rename to xllm/core/layers/common/qwen3_attention.h diff --git a/xllm/core/layers/mlu/qwen3_decoder_layer.cpp b/xllm/core/layers/common/qwen3_decoder_layer.cpp similarity index 100% rename from xllm/core/layers/mlu/qwen3_decoder_layer.cpp rename to xllm/core/layers/common/qwen3_decoder_layer.cpp diff --git a/xllm/core/layers/mlu/qwen3_decoder_layer.h b/xllm/core/layers/common/qwen3_decoder_layer.h similarity index 100% rename from xllm/core/layers/mlu/qwen3_decoder_layer.h rename to xllm/core/layers/common/qwen3_decoder_layer.h diff --git a/xllm/core/layers/mlu/qwen3_moe_decoder_layer.cpp b/xllm/core/layers/common/qwen3_moe_decoder_layer.cpp similarity index 100% rename from xllm/core/layers/mlu/qwen3_moe_decoder_layer.cpp rename to xllm/core/layers/common/qwen3_moe_decoder_layer.cpp diff --git a/xllm/core/layers/mlu/qwen3_moe_decoder_layer.h b/xllm/core/layers/common/qwen3_moe_decoder_layer.h similarity index 100% rename from xllm/core/layers/mlu/qwen3_moe_decoder_layer.h rename to xllm/core/layers/common/qwen3_moe_decoder_layer.h diff --git a/xllm/core/layers/mlu/rotary_embedding.cpp b/xllm/core/layers/common/rotary_embedding.cpp similarity index 100% rename from xllm/core/layers/mlu/rotary_embedding.cpp rename to xllm/core/layers/common/rotary_embedding.cpp diff --git a/xllm/core/layers/mlu/rotary_embedding.h b/xllm/core/layers/common/rotary_embedding.h similarity index 100% rename from xllm/core/layers/mlu/rotary_embedding.h rename to xllm/core/layers/common/rotary_embedding.h diff --git a/xllm/core/layers/mlu/mlu_word_embedding_impl.h b/xllm/core/layers/common/word_embedding_impl.h similarity index 92% rename from xllm/core/layers/mlu/mlu_word_embedding_impl.h rename to xllm/core/layers/common/word_embedding_impl.h index f69523a7..a6c66653 100644 --- a/xllm/core/layers/mlu/mlu_word_embedding_impl.h +++ b/xllm/core/layers/common/word_embedding_impl.h @@ -28,12 +28,12 @@ namespace xllm { namespace layer { // Embedding parallelized in the embedding dimension. -class MluWordEmbeddingImpl : public torch::nn::Module { +class WordEmbeddingImpl : public torch::nn::Module { public: - MluWordEmbeddingImpl(int64_t num_embeddings, - int64_t embedding_dim, - const ParallelArgs& parallel_args, - const torch::TensorOptions& options) + WordEmbeddingImpl(int64_t num_embeddings, + int64_t embedding_dim, + const ParallelArgs& parallel_args, + const torch::TensorOptions& options) : parallel_args_(parallel_args) { rank_ = parallel_args_.tp_group_->rank(); world_size_ = parallel_args_.tp_group_->world_size(); diff --git a/xllm/core/layers/linear.h b/xllm/core/layers/linear.h index 57f1cf9f..68a374a3 100644 --- a/xllm/core/layers/linear.h +++ b/xllm/core/layers/linear.h @@ -19,7 +19,7 @@ limitations under the License. #include #if defined(USE_MLU) -#include "mlu/linear_impl.h" +#include "common/linear_impl.h" #endif namespace xllm { diff --git a/xllm/core/layers/lm_head.h b/xllm/core/layers/lm_head.h index 554e03b1..1c79d87b 100644 --- a/xllm/core/layers/lm_head.h +++ b/xllm/core/layers/lm_head.h @@ -17,8 +17,8 @@ limitations under the License. #if defined(USE_NPU) #include "npu/npu_lm_head_impl.h" -#elif defined(USE_MLU) -#include "mlu/linear_impl.h" +#else +#include "common/linear_impl.h" #endif namespace xllm { @@ -33,7 +33,7 @@ class LmHead : public torch::nn::ModuleHolder { LmHead(const ModelContext& context) : ModuleHolder(std::make_shared(context)) {} }; -#elif defined(USE_MLU) +#else class LmHead : public torch::nn::ModuleHolder { public: using torch::nn::ModuleHolder::ModuleHolder; diff --git a/xllm/core/layers/qwen3_decoder_layer.h b/xllm/core/layers/qwen3_decoder_layer.h index 9924f052..324738d5 100644 --- a/xllm/core/layers/qwen3_decoder_layer.h +++ b/xllm/core/layers/qwen3_decoder_layer.h @@ -17,8 +17,8 @@ limitations under the License. #if defined(USE_NPU) #include "npu/npu_qwen3_decoder_layer_impl.h" -#elif defined(USE_MLU) -#include "mlu/qwen3_decoder_layer.h" +#else +#include "common/qwen3_decoder_layer.h" #endif namespace xllm { @@ -34,7 +34,7 @@ class Qwen3DecoderLayer Qwen3DecoderLayer(const ModelContext& context) : ModuleHolder(std::make_shared(context)) {} }; -#elif defined(USE_MLU) +#else class Qwen3DecoderLayer : public torch::nn::ModuleHolder { public: using torch::nn::ModuleHolder::ModuleHolder; diff --git a/xllm/core/layers/qwen3_moe_decoder_layer.h b/xllm/core/layers/qwen3_moe_decoder_layer.h index 1e4ddf8e..e3fa240d 100644 --- a/xllm/core/layers/qwen3_moe_decoder_layer.h +++ b/xllm/core/layers/qwen3_moe_decoder_layer.h @@ -17,8 +17,8 @@ limitations under the License. #if defined(USE_NPU) #include "npu/npu_qwen3_moe_decoder_layer_impl.h" -#elif defined(USE_MLU) -#include "mlu/qwen3_moe_decoder_layer.h" +#else +#include "common/qwen3_moe_decoder_layer.h" #endif namespace xllm { @@ -35,7 +35,7 @@ class Qwen3MoeDecoderLayer : Qwen3MoeDecoderLayer( std::make_shared(context, layer_id)) {} }; -#elif defined(USE_MLU) +#else class Qwen3MoeDecoderLayer : public torch::nn::ModuleHolder { public: diff --git a/xllm/core/layers/rms_norm.h b/xllm/core/layers/rms_norm.h index a40ce7ce..d8920c68 100644 --- a/xllm/core/layers/rms_norm.h +++ b/xllm/core/layers/rms_norm.h @@ -16,8 +16,8 @@ limitations under the License. #pragma once #if defined(USE_NPU) #include "npu/npu_rms_norm_impl.h" -#elif defined(USE_MLU) -#include "mlu/fuse_norm.h" +#else +#include "common/fuse_norm.h" #endif namespace xllm { @@ -32,7 +32,7 @@ class RmsNorm : public torch::nn::ModuleHolder { RmsNorm(const ModelContext& context) : ModuleHolder(std::make_shared(context)) {} }; -#elif defined(USE_MLU) +#else class RmsNorm : public torch::nn::ModuleHolder { public: using torch::nn::ModuleHolder::ModuleHolder; diff --git a/xllm/core/layers/rotary_embedding.cpp b/xllm/core/layers/rotary_embedding.cpp index 109f3284..0538fc00 100644 --- a/xllm/core/layers/rotary_embedding.cpp +++ b/xllm/core/layers/rotary_embedding.cpp @@ -16,19 +16,14 @@ limitations under the License. #include "rotary_embedding.h" -#include #include -#include -#if defined(USE_NPU) -#include -#endif #include #include #include -#include "common/global_flags.h" -#include "util/slice.h" +#include "core/common/global_flags.h" +#include "core/util/slice.h" namespace xllm { diff --git a/xllm/core/layers/rotary_embedding.h b/xllm/core/layers/rotary_embedding.h index 6a25b9bf..31b82bb6 100644 --- a/xllm/core/layers/rotary_embedding.h +++ b/xllm/core/layers/rotary_embedding.h @@ -19,9 +19,6 @@ limitations under the License. #include #include #include -#if defined(USE_NPU) -#include -#endif #include diff --git a/xllm/core/layers/word_embedding.h b/xllm/core/layers/word_embedding.h index 58e23fc5..c377dcc2 100644 --- a/xllm/core/layers/word_embedding.h +++ b/xllm/core/layers/word_embedding.h @@ -17,8 +17,8 @@ limitations under the License. #if defined(USE_NPU) #include "npu/npu_word_embedding_impl.h" -#elif defined(USE_MLU) -#include "mlu/mlu_word_embedding_impl.h" +#else +#include "common/word_embedding_impl.h" #endif namespace xllm { @@ -33,20 +33,20 @@ class WordEmbedding : public torch::nn::ModuleHolder { : ModuleHolder(std::make_shared(context)) {} }; -#elif defined(USE_MLU) +#else -class WordEmbedding : public torch::nn::ModuleHolder { +class WordEmbedding : public torch::nn::ModuleHolder { public: - using torch::nn::ModuleHolder::ModuleHolder; - using Impl __attribute__((__unused__)) = MluWordEmbeddingImpl; + using torch::nn::ModuleHolder::ModuleHolder; + using Impl __attribute__((__unused__)) = WordEmbeddingImpl; WordEmbedding(int64_t num_embeddings, int64_t embedding_dim, const ParallelArgs& parallel_args, const torch::TensorOptions& options) - : ModuleHolder(std::make_shared(num_embeddings, - embedding_dim, - parallel_args, - options)) {} + : ModuleHolder(std::make_shared(num_embeddings, + embedding_dim, + parallel_args, + options)) {} }; #endif diff --git a/xllm/models/llm/llm_model_base.h b/xllm/models/llm/llm_model_base.h index 16228a63..dc3c00b3 100644 --- a/xllm/models/llm/llm_model_base.h +++ b/xllm/models/llm/llm_model_base.h @@ -39,8 +39,8 @@ limitations under the License. #include "models/model_registry.h" #if defined(USE_NPU) #include "xllm_kernels/core/include/atb_speed/log.h" -#elif defined(USE_MLU) -#include "core/layers/mlu/attention.h" +#else +#include "core/layers/common/attention.h" #endif namespace xllm {