Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 2 additions & 7 deletions xllm/core/framework/model/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,12 @@ set(BASE_DEPS
)

if(USE_NPU)
# Modify dependencies for npu
list(APPEND BASE_DEPS torch_npu)
list(APPEND BASE_DEPS :npu_layers)
list(APPEND BASE_DEPS :platform_npu)
else()
list(APPEND BASE_DEPS :common_layers)
endif()

if(USE_MLU)
# Modify dependencies for mlu
list(APPEND BASE_DEPS torch_mlu)
list(APPEND BASE_DEPS :mlu_layers)
endif()

# Define the library
cc_library(
Expand Down
5 changes: 2 additions & 3 deletions xllm/core/layers/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ cc_library(

if(USE_NPU)
add_subdirectory(npu)
endif()
if(USE_MLU)
add_subdirectory(mlu)
else()
add_subdirectory(common)
endif()
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ include(cc_test)

cc_library(
NAME
mlu_layers
common_layers
HDRS
qwen3_attention.h
attention.h
Expand All @@ -15,7 +15,7 @@ cc_library(
qwen3_decoder_layer.h
qwen3_moe_decoder_layer.h
linear_impl.h
mlu_word_embedding_impl.h
word_embedding_impl.h
SRCS
qwen3_attention.cpp
attention.cpp
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ namespace xllm {
namespace layer {

// Embedding parallelized in the embedding dimension.
class MluWordEmbeddingImpl : public torch::nn::Module {
class WordEmbeddingImpl : public torch::nn::Module {
public:
MluWordEmbeddingImpl(int64_t num_embeddings,
int64_t embedding_dim,
const ParallelArgs& parallel_args,
const torch::TensorOptions& options)
WordEmbeddingImpl(int64_t num_embeddings,
int64_t embedding_dim,
const ParallelArgs& parallel_args,
const torch::TensorOptions& options)
: parallel_args_(parallel_args) {
rank_ = parallel_args_.tp_group_->rank();
world_size_ = parallel_args_.tp_group_->world_size();
Expand Down
2 changes: 1 addition & 1 deletion xllm/core/layers/linear.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ limitations under the License.
#include <torch/torch.h>

#if defined(USE_MLU)
#include "mlu/linear_impl.h"
#include "common/linear_impl.h"
#endif

namespace xllm {
Expand Down
6 changes: 3 additions & 3 deletions xllm/core/layers/lm_head.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ limitations under the License.

#if defined(USE_NPU)
#include "npu/npu_lm_head_impl.h"
#elif defined(USE_MLU)
#include "mlu/linear_impl.h"
#else
#include "common/linear_impl.h"
#endif

namespace xllm {
Expand All @@ -33,7 +33,7 @@ class LmHead : public torch::nn::ModuleHolder<NpuLmHeadImpl> {
LmHead(const ModelContext& context)
: ModuleHolder(std::make_shared<NpuLmHeadImpl>(context)) {}
};
#elif defined(USE_MLU)
#else
class LmHead : public torch::nn::ModuleHolder<ColumnParallelLinearImpl> {
public:
using torch::nn::ModuleHolder<ColumnParallelLinearImpl>::ModuleHolder;
Expand Down
6 changes: 3 additions & 3 deletions xllm/core/layers/qwen3_decoder_layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ limitations under the License.

#if defined(USE_NPU)
#include "npu/npu_qwen3_decoder_layer_impl.h"
#elif defined(USE_MLU)
#include "mlu/qwen3_decoder_layer.h"
#else
#include "common/qwen3_decoder_layer.h"
#endif

namespace xllm {
Expand All @@ -34,7 +34,7 @@ class Qwen3DecoderLayer
Qwen3DecoderLayer(const ModelContext& context)
: ModuleHolder(std::make_shared<NpuQwen3DecoderLayerImpl>(context)) {}
};
#elif defined(USE_MLU)
#else
class Qwen3DecoderLayer : public torch::nn::ModuleHolder<Qwen3DecoderImpl> {
public:
using torch::nn::ModuleHolder<Qwen3DecoderImpl>::ModuleHolder;
Expand Down
6 changes: 3 additions & 3 deletions xllm/core/layers/qwen3_moe_decoder_layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ limitations under the License.

#if defined(USE_NPU)
#include "npu/npu_qwen3_moe_decoder_layer_impl.h"
#elif defined(USE_MLU)
#include "mlu/qwen3_moe_decoder_layer.h"
#else
#include "common/qwen3_moe_decoder_layer.h"
#endif

namespace xllm {
Expand All @@ -35,7 +35,7 @@ class Qwen3MoeDecoderLayer
: Qwen3MoeDecoderLayer(
std::make_shared<NpuQwen3MoeDecoderLayerImpl>(context, layer_id)) {}
};
#elif defined(USE_MLU)
#else
class Qwen3MoeDecoderLayer
: public torch::nn::ModuleHolder<Qwen3MoeDecoderImpl> {
public:
Expand Down
6 changes: 3 additions & 3 deletions xllm/core/layers/rms_norm.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ limitations under the License.
#pragma once
#if defined(USE_NPU)
#include "npu/npu_rms_norm_impl.h"
#elif defined(USE_MLU)
#include "mlu/fuse_norm.h"
#else
#include "common/fuse_norm.h"
#endif

namespace xllm {
Expand All @@ -32,7 +32,7 @@ class RmsNorm : public torch::nn::ModuleHolder<NpuRmsNormImpl> {
RmsNorm(const ModelContext& context)
: ModuleHolder(std::make_shared<NpuRmsNormImpl>(context)) {}
};
#elif defined(USE_MLU)
#else
class RmsNorm : public torch::nn::ModuleHolder<FusedRMSNormImpl> {
public:
using torch::nn::ModuleHolder<FusedRMSNormImpl>::ModuleHolder;
Expand Down
9 changes: 2 additions & 7 deletions xllm/core/layers/rotary_embedding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,14 @@ limitations under the License.

#include "rotary_embedding.h"

#include <c10/core/ScalarType.h>
#include <glog/logging.h>
#include <torch/torch.h>
#if defined(USE_NPU)
#include <torch_npu/torch_npu.h>
#endif

#include <boost/algorithm/string.hpp>
#include <cmath>
#include <memory>

#include "common/global_flags.h"
#include "util/slice.h"
#include "core/common/global_flags.h"
#include "core/util/slice.h"

namespace xllm {

Expand Down
3 changes: 0 additions & 3 deletions xllm/core/layers/rotary_embedding.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@ limitations under the License.
#include <c10/core/Device.h>
#include <c10/core/ScalarType.h>
#include <torch/torch.h>
#if defined(USE_NPU)
#include <torch_npu/torch_npu.h>
#endif

#include <tuple>

Expand Down
20 changes: 10 additions & 10 deletions xllm/core/layers/word_embedding.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ limitations under the License.

#if defined(USE_NPU)
#include "npu/npu_word_embedding_impl.h"
#elif defined(USE_MLU)
#include "mlu/mlu_word_embedding_impl.h"
#else
#include "common/word_embedding_impl.h"
#endif

namespace xllm {
Expand All @@ -33,20 +33,20 @@ class WordEmbedding : public torch::nn::ModuleHolder<NpuWordEmbeddingImpl> {
: ModuleHolder(std::make_shared<NpuWordEmbeddingImpl>(context)) {}
};

#elif defined(USE_MLU)
#else

class WordEmbedding : public torch::nn::ModuleHolder<MluWordEmbeddingImpl> {
class WordEmbedding : public torch::nn::ModuleHolder<WordEmbeddingImpl> {
public:
using torch::nn::ModuleHolder<MluWordEmbeddingImpl>::ModuleHolder;
using Impl __attribute__((__unused__)) = MluWordEmbeddingImpl;
using torch::nn::ModuleHolder<WordEmbeddingImpl>::ModuleHolder;
using Impl __attribute__((__unused__)) = WordEmbeddingImpl;
WordEmbedding(int64_t num_embeddings,
int64_t embedding_dim,
const ParallelArgs& parallel_args,
const torch::TensorOptions& options)
: ModuleHolder(std::make_shared<MluWordEmbeddingImpl>(num_embeddings,
embedding_dim,
parallel_args,
options)) {}
: ModuleHolder(std::make_shared<WordEmbeddingImpl>(num_embeddings,
embedding_dim,
parallel_args,
options)) {}
};

#endif
Expand Down
4 changes: 2 additions & 2 deletions xllm/models/llm/llm_model_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ limitations under the License.
#include "models/model_registry.h"
#if defined(USE_NPU)
#include "xllm_kernels/core/include/atb_speed/log.h"
#elif defined(USE_MLU)
#include "core/layers/mlu/attention.h"
#else
#include "core/layers/common/attention.h"
#endif

namespace xllm {
Expand Down