From 2b0e6bcdcd9733b923d3fc930ba116eb6f6e9f9d Mon Sep 17 00:00:00 2001 From: tianzedavid Date: Tue, 30 Apr 2024 15:32:13 +0800 Subject: [PATCH] chore: remove repetitive words --- gallery/how_to/deploy_models/deploy_prequantized.py | 2 +- include/tvm/relax/dataflow_pattern.h | 2 +- src/runtime/contrib/vllm/attention_kernels.cu | 4 ++-- src/runtime/relax_vm/kv_state.h | 2 +- src/runtime/relax_vm/paged_kv_cache.cc | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/gallery/how_to/deploy_models/deploy_prequantized.py b/gallery/how_to/deploy_models/deploy_prequantized.py index b93ed5e4dacb..c55e608baf9b 100644 --- a/gallery/how_to/deploy_models/deploy_prequantized.py +++ b/gallery/how_to/deploy_models/deploy_prequantized.py @@ -162,7 +162,7 @@ def quantize_model(model, inp): # # You would see operators specific to quantization such as # qnn.quantize, qnn.dequantize, qnn.requantize, and qnn.conv2d etc. -input_name = "input" # the input name can be be arbitrary for PyTorch frontend. +input_name = "input" # the input name can be arbitrary for PyTorch frontend. input_shapes = [(input_name, (1, 3, 224, 224))] mod, params = relay.frontend.from_pytorch(script_module, input_shapes) # print(mod) # comment in to see the QNN IR dump diff --git a/include/tvm/relax/dataflow_pattern.h b/include/tvm/relax/dataflow_pattern.h index 0d8e7678c2c1..f7094b221221 100644 --- a/include/tvm/relax/dataflow_pattern.h +++ b/include/tvm/relax/dataflow_pattern.h @@ -914,7 +914,7 @@ class ExternFuncPatternNode : public DFPatternNode { public: String global_symbol_; /*!< The global symbol name of the external function */ - /*! \brief The the external function name */ + /*! \brief The external function name */ const String& global_symbol() const { return global_symbol_; } void VisitAttrs(tvm::AttrVisitor* v) { v->Visit("global_symbol", &global_symbol_); } diff --git a/src/runtime/contrib/vllm/attention_kernels.cu b/src/runtime/contrib/vllm/attention_kernels.cu index fe6e974dad9d..2b59044f844c 100644 --- a/src/runtime/contrib/vllm/attention_kernels.cu +++ b/src/runtime/contrib/vllm/attention_kernels.cu @@ -145,7 +145,7 @@ __device__ void paged_attention_kernel( // Load the query to registers. // Each thread in a thread group has a different part of the query. - // For example, if the the thread group size is 4, then the first thread in the group + // For example, if the thread group size is 4, then the first thread in the group // has 0, 4, 8, ... th vectors of the query, and the second thread has 1, 5, 9, ... // th vectors of the query, and so on. // NOTE(woosuk): Because q is split from a qkv tensor, it may not be contiguous. @@ -185,7 +185,7 @@ __device__ void paged_attention_kernel( // Load a key to registers. // Each thread in a thread group has a different part of the key. - // For example, if the the thread group size is 4, then the first thread in the group + // For example, if the thread group size is 4, then the first thread in the group // has 0, 4, 8, ... th vectors of the key, and the second thread has 1, 5, 9, ... th // vectors of the key, and so on. for (int i = 0; i < NUM_TOKENS_PER_THREAD_GROUP; i++) { diff --git a/src/runtime/relax_vm/kv_state.h b/src/runtime/relax_vm/kv_state.h index e3c6e9608c3f..7b90ffce50b2 100644 --- a/src/runtime/relax_vm/kv_state.h +++ b/src/runtime/relax_vm/kv_state.h @@ -83,7 +83,7 @@ class KVStateObj : public Object { * with prefill length "10", "15", "20", then we pass `[5, 1, 8]` * as the seq_ids and `[10, 15, 20]` as the append_lengths. * This method is invoked right before entering the model forward - * function, and contains operations to prepare the the incoming + * function, and contains operations to prepare the incoming * forward. For instance, this method may send auxiliary KV cache * data structures to GPUs so that they can be operated * in the model forward function. diff --git a/src/runtime/relax_vm/paged_kv_cache.cc b/src/runtime/relax_vm/paged_kv_cache.cc index efedac235bfc..43edd4cbfd0d 100644 --- a/src/runtime/relax_vm/paged_kv_cache.cc +++ b/src/runtime/relax_vm/paged_kv_cache.cc @@ -85,7 +85,7 @@ struct Block { int32_t start_pos = 0; /*! * \brief The current attention sink length of the block. - * It means the the **first** sink size elements will be pinned + * It means the **first** sink size elements will be pinned * in the KV cache even when sliding window is enabled. */ int32_t sink_length = 0; @@ -247,7 +247,7 @@ class PagedKVCacheAuxDataManager { /*! * \brief Copy the append length indptr array on device. * \note Since the Q/K/V data may have raggedness in terms of lengths, - * we represent the the append lengths in CSR format. + * we represent the append lengths in CSR format. */ virtual NDArray CopyCurAppendLengthIndptrAsync(std::vector* data) = 0; /*! \brief Copy the k position offset of applying RoPE for each sequence. */