[NNAdapter] Add fill_range fuse pass、eliminate range op、move scale an…

…d expand_v2 op (PaddlePaddle#7200)
csy0225 · Oct 29, 2021 · b4f44d2 · b4f44d2
1 parent 2444c96
commit b4f44d2
Show file tree

Hide file tree

Showing 26 changed files with 869 additions and 352 deletions.
diff --git a/lite/api/paddle_use_passes.h b/lite/api/paddle_use_passes.h
@@ -114,3 +114,5 @@ USE_MIR_PASS(__xpu__dynamic_lstm_fuse_pass);
 USE_MIR_PASS(__xpu__multi_softmax_fuse_pass);
 USE_MIR_PASS(__xpu__max_pooling_pad_zero_detect_fuse_pass);
 USE_MIR_PASS(x86_int8_attribute_pass);
+USE_MIR_PASS(fill_range_fuse_pass);
+USE_MIR_PASS(range_calc_offline_pass);
diff --git a/lite/backends/nnadapter/nnadapter/core/operation/all.h b/lite/backends/nnadapter/nnadapter/core/operation/all.h
@@ -41,6 +41,7 @@ REGISTER_OPERATION(GREATER, PrepareComparisons)
 REGISTER_OPERATION(GREATER_EQUAL, PrepareComparisons)
 REGISTER_OPERATION(HARD_SIGMOID, PrepareHardSigmoidSwish)
 REGISTER_OPERATION(HARD_SWISH, PrepareHardSigmoidSwish)
+REGISTER_OPERATION(EXPAND, PrepareExpand)
 REGISTER_OPERATION(INSTANCE_NORMALIZATION, PrepareInstanceNormalization)
 REGISTER_OPERATION(LAYER_NORMALIZATION, PrepareLayerNormalization)
 REGISTER_OPERATION(LEAKY_RELU, PrepareLeakyRelu)
@@ -55,6 +56,7 @@ REGISTER_OPERATION(MUL, PrepareElementwise)
 REGISTER_OPERATION(NOT_EQUAL, PrepareComparisons)
 REGISTER_OPERATION(POW, PrepareElementwise)
 REGISTER_OPERATION(PRELU, PreparePRelu)
+REGISTER_OPERATION(RANGE, PrepareRange)
 REGISTER_OPERATION(REDUCE_MEAN, PrepareReduce)
 REGISTER_OPERATION(RELU, PrepareUnaryActivations)
 REGISTER_OPERATION(RELU6, PrepareUnaryActivations)

diff --git a/lite/backends/nnadapter/nnadapter/core/operation/expand.cc b/lite/backends/nnadapter/nnadapter/core/operation/expand.cc
@@ -0,0 +1,99 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "core/operation/expand.h"
+#include <vector>
+#include "core/hal/types.h"
+#include "utility/debug.h"
+#include "utility/logging.h"
+#include "utility/modeling.h"
+#include "utility/utility.h"
+
+namespace nnadapter {
+namespace operation {
+
+int PrepareExpand(hal::Operation* operation) {
+  EXPAND_OPERATION_EXTRACT_INPUTS_OUTPUTS
+
+  // Infer the shape and type of output operands
+  auto input_type = input_operand->type;
+  auto& output_type = output_operand->type;
+  CopyOperandTypeWithQuantParams(&output_type, input_type);
+
+  output_type.dimensions.count = shape_count;
+
+  auto infer_output_shape = [&](int32_t* input_dimensions_data,
+                                uint32_t input_dimensions_count,
+                                int32_t* output_dimensions_data) {
+    std::vector<int> input_dims_vec;
+    for (uint32_t i = 0; i < input_dimensions_count; i++) {
+      input_dims_vec.push_back(input_dimensions_data[i]);
+    }
+    auto diff = shape_count - input_dimensions_count;
+    input_dims_vec.insert(input_dims_vec.begin(), diff, 1);
+    std::vector<int> final_expand_shape(input_dimensions_count);
+    for (uint32_t i = 0; i < input_dims_vec.size(); ++i) {
+      NNADAPTER_CHECK_NE(shape_data[i], 0)
+          << "The expanded size cannot be zero.";
+      if (i < diff) {
+        // shape_data = [3,4,-1,-1], X = [10,2] --> // final_expand_shape =
+        // [3,4,10,2]
+        NNADAPTER_CHECK_GT(shape_data[i], 0)
+            << "The expanded size " << shape_data[i]
+            << "for non-existing dimensions must be positive for expand_v2 op.";
+        final_expand_shape[i] = shape_data[i];
+      } else if (shape_data[i] > 0) {
+        // shape_data = [3,4,10,4], X = [10,1] --> final_expand_shape =
+        // [3,4,10,4]
+        if (input_dims_vec[i] != 1) {
+          NNADAPTER_CHECK_EQ(input_dims_vec[i], shape_data[i])
+              << "The value " << input_dims_vec[i]
+              << " of the non-singleton dimension does not match the "
+                 "corresponding value "
+              << shape_data[i] << " in shape for expand_v2 op.";
+          final_expand_shape[i] = shape_data[i];
+        } else {
+          final_expand_shape[i] = shape_data[i];
+        }
+      } else {
+        // shape_data = [3,4,-1,-1], X = [10,2] --> final_expand_shape =
+        // [3,4,10,2]
+        NNADAPTER_CHECK_EQ(shape_data[i], -1)
+            << "When the value in shape is negative for expand_v2 op, "
+               "only -1 is supported, but the value received is "
+            << shape_data[i];
+        final_expand_shape[i] = input_dims_vec[i];
+      }
+    }
+
+    for (uint32_t i = 0; i < shape_count; ++i) {
+      shape_data[i] = final_expand_shape[i];
+      output_dimensions_data[i] = final_expand_shape[i];
+    }
+  };
+
+  infer_output_shape(input_type.dimensions.data,
+                     input_type.dimensions.count,
+                     output_type.dimensions.data);
+  for (uint32_t i = 0; i < input_type.dimensions.dynamic_count; i++) {
+    infer_output_shape(input_type.dimensions.dynamic_data[i],
+                       input_type.dimensions.count,
+                       output_type.dimensions.dynamic_data[i]);
+  }
+  NNADAPTER_VLOG(5) << "output: " << OperandToString(output_operand);
+  return NNADAPTER_NO_ERROR;
+}
+
+}  // namespace operation
+}  // namespace nnadapter
diff --git a/lite/backends/nnadapter/nnadapter/core/operation/expand.h b/lite/backends/nnadapter/nnadapter/core/operation/expand.h
@@ -29,8 +29,26 @@ namespace operation {
   NNADAPTER_VLOG(5) << "input_operand: " << OperandToString(input_operand); \
   /* Shape */                                                               \
   auto shape_operand = input_operands[1];                                   \
-  NNADAPTER_VLOG(5) << "shape operand: "                                    \
-                    << OperandValueToString(shape_operand);                 \
+  NNADAPTER_VLOG(5) << "shape operand: " << OperandToString(shape_operand); \
+  uint32_t shape_count;                                                     \
+  int32_t* shape_data;                                                      \
+  auto& shape_type = shape_operand->type;                                   \
+  if (IsConstantOperand(shape_operand)) {                                   \
+    shape_count = shape_operand->length / sizeof(int32_t);                  \
+    shape_data = reinterpret_cast<int32_t*>(shape_operand->buffer);         \
+  } else if (shape_type.lifetime == NNADAPTER_TEMPORARY_SHAPE) {            \
+    auto shape_operand_dimension =                                          \
+        *reinterpret_cast<NNAdapterOperandDimensionType*>(                  \
+            shape_operand->buffer);                                         \
+    shape_count = shape_operand_dimension.count;                            \
+    shape_data = shape_operand_dimension.data;                              \
+  } else {                                                                  \
+    shape_count = shape_operand->type.dimensions.count;                     \
+    shape_data = shape_operand->type.dimensions.data;                       \
+  }                                                                         \
+  for (uint32_t i = 0; i < shape_count; i++) {                              \
+    NNADAPTER_VLOG(5) << "shape[" << i << "] = " << shape_data[i];          \
+  }                                                                         \
   /* Output */                                                              \
   auto output_operand = output_operands[0];                                 \
   NNADAPTER_VLOG(5) << "output_operand: " << OperandToString(output_operand);

diff --git a/lite/backends/nnadapter/nnadapter/core/operation/fill.cc b/lite/backends/nnadapter/nnadapter/core/operation/fill.cc
@@ -62,6 +62,7 @@ int PrepareFill(hal::Operation* operation) {
     return NNADAPTER_INVALID_PARAMETER;
   }
   output_type.precision = value_operand->type.precision;
+  output_type.lifetime = NNADAPTER_TEMPORARY_VARIABLE;
   NNADAPTER_VLOG(5) << "output: " << OperandToString(output_operand);
   return NNADAPTER_NO_ERROR;
 }

diff --git a/lite/backends/nnadapter/nnadapter/core/operation/range.cc b/lite/backends/nnadapter/nnadapter/core/operation/range.cc
@@ -0,0 +1,53 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "core/operation/range.h"
+#include "core/hal/types.h"
+#include "utility/debug.h"
+#include "utility/logging.h"
+#include "utility/modeling.h"
+#include "utility/utility.h"
+
+namespace nnadapter {
+namespace operation {
+
+int PrepareRange(hal::Operation* operation) {
+  RANGE_OPERATION_EXTRACT_INPUTS_OUTPUTS
+
+  // Infer the shape and type of output operands
+  auto& output_type = output_operand->type;
+  NNADAPTER_CHECK_EQ(start_operand->type.dimensions.count, 1);
+  NNADAPTER_CHECK_EQ(limit_operand->type.dimensions.count, 1);
+  NNADAPTER_CHECK_EQ(delta_operand->type.dimensions.count, 1);
+
+  if (IsConstantOperand(start_operand) && IsConstantOperand(limit_operand) &&
+      IsConstantOperand(delta_operand)) {
+    auto start_data = reinterpret_cast<float*>(start_operand->buffer)[0];
+    auto limit_data = reinterpret_cast<float*>(limit_operand->buffer)[0];
+    auto delta_data = reinterpret_cast<float*>(delta_operand->buffer)[0];
+    output_type.dimensions.data[0] =
+        GetSpanCount(start_data, limit_data, delta_data);
+  } else {
+    output_type.dimensions.data[0] = NNADAPTER_UNKNOWN;
+  }
+  output_type.precision = start_operand->type.precision;
+  output_type.lifetime = NNADAPTER_TEMPORARY_VARIABLE;
+  output_type.dimensions.count = 1;
+
+  NNADAPTER_VLOG(5) << "output: " << OperandToString(output_operand);
+  return NNADAPTER_NO_ERROR;
+}
+
+}  // namespace operation
+}  // namespace nnadapter
diff --git a/lite/backends/nnadapter/nnadapter/driver/huawei_ascend_npu/converter/expand.cc b/lite/backends/nnadapter/nnadapter/driver/huawei_ascend_npu/converter/expand.cc
@@ -34,10 +34,13 @@ int ConvertExpand(Converter* converter, hal::Operation* operation) {
   if (!input_operator) {
     input_operator = converter->ConvertOperand(input_operand);
   }
-  auto shape_operator = converter->ConvertOperand(shape_operand);
-  auto expand_op = converter->AddOperator<ge::op::Expand>(output_operand);
+  auto expand_op = converter->AddOperator<ge::op::ExpandD>(output_operand);
+  std::vector<int64_t> expand_shape(shape_count);
+  for (uint32_t i = 0; i < shape_count; i++) {
+    expand_shape[i] = shape_data[i];
+  }
+  expand_op->set_attr_shape(ge::Operator::OpListInt(expand_shape));
   SET_INPUT(expand_op, x, input_operator);
-  SET_INPUT(expand_op, shape, shape_operator);
   MAP_OUTPUT(expand_op, y, output_operand);
   return NNADAPTER_NO_ERROR;
 }

diff --git a/lite/backends/nnadapter/nnadapter/utility/utility.h b/lite/backends/nnadapter/nnadapter/utility/utility.h
@@ -16,6 +16,7 @@
 
 #include <sys/time.h>
 #include <algorithm>
+#include <cmath>
 #include <map>
 #include <string>
 #include <vector>
@@ -218,4 +219,11 @@ inline int64_t GetCurrentUS() {
   return 1000000LL * (int64_t)time.tv_sec + (int64_t)time.tv_usec;
 }
 
+template <typename T>
+int64_t GetSpanCount(T start, T end, T step) {
+  return std::is_integral<T>::value
+             ? ((std::abs(end - start) + std::abs(step) - 1) / std::abs(step))
+             : std::ceil(std::abs((end - start) / step));
+}
+
 }  // namespace nnadapter
diff --git a/lite/core/optimizer/mir/elimination/range_calc_offline_pass.cc b/lite/core/optimizer/mir/elimination/range_calc_offline_pass.cc
@@ -0,0 +1,100 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/core/optimizer/mir/elimination/range_calc_offline_pass.h"
+#include <algorithm>
+#include <cmath>
+#include <list>
+#include <memory>
+#include <set>
+#include <vector>
+#include "lite/core/optimizer/mir/pass.h"
+#include "lite/core/optimizer/mir/pass_registry.h"
+#include "lite/core/optimizer/mir/pattern_matcher.h"
+#include "lite/model_parser/cpp_desc.h"
+
+namespace paddle {
+namespace lite {
+namespace mir {
+
+template <typename T>
+int64_t GetSpanCount(T start, T end, T step) {
+  return std::is_integral<T>::value
+             ? ((std::abs(end - start) + std::abs(step) - 1) / std::abs(step))
+             : std::ceil(std::abs((end - start) / step));
+}
+
+void RangeCalcOfflinePass::Apply(const std::unique_ptr<SSAGraph>& graph) {
+  RemoveRangePattern(graph);
+}
+
+void RangeCalcOfflinePass::RemoveRangePattern(
+    const std::unique_ptr<SSAGraph>& graph) {
+  for (auto& node : graph->StmtTopologicalOrder()) {
+    if (node->AsStmt().picked_kernel().op_type() != "range") continue;
+
+    std::set<const Node*> nodes2rm_;
+    auto& range_instruct = node->AsStmt();
+    auto* scope = range_instruct.op()->scope();
+    auto op_desc = range_instruct.mutable_op_info();
+
+    // Get range's input tensor
+    auto start_var = scope->FindVar(op_desc->Input("Start").front());
+    auto end_var = scope->FindVar(op_desc->Input("End").front());
+    auto step_var = scope->FindVar(op_desc->Input("Step").front());
+    auto start_t = start_var->GetMutable<lite::Tensor>();
+    auto end_t = end_var->GetMutable<lite::Tensor>();
+    auto step_t = step_var->GetMutable<lite::Tensor>();
+    if (!start_t->persistable() || !end_t->persistable() ||
+        !step_t->persistable()) {
+      LOG(WARNING) << "RangeCalcOfflinePass does not support input that is not "
+                      "persistent";
+      return;
+    }
+    auto start = start_t->mutable_data<float>()[0];
+    auto end = end_t->mutable_data<float>()[0];
+    auto step = step_t->mutable_data<float>()[0];
+    // Get range's output tensor
+    auto out_var = scope->FindVar(op_desc->Output("Out").front());
+    auto out_t = out_var->GetMutable<lite::Tensor>();
+
+    // Calc range
+    int64_t size = GetSpanCount(start, end, step);
+
+    out_t->Resize(DDim({size}));
+    auto out_data = out_t->mutable_data<float>();
+
+    float value = start;
+    for (int64_t i = 0; i < size; ++i) {
+      out_data[i] = value;
+      value += step;
+    }
+    // Offline calc range, only retain output tensor as persistable tensor
+    out_t->set_persistable(true);
+    auto range_outlinks = node->outlinks;
+    for (auto& range_out_link : range_outlinks) {
+      range_out_link->arg()->is_weight = true;
+    }
+    nodes2rm_.insert(node);
+    GraphSafeRemoveNodes(graph.get(), nodes2rm_);
+  }
+}
+
+}  // namespace mir
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_MIR_PASS(range_calc_offline_pass,
+                  paddle::lite::mir::RangeCalcOfflinePass)
+    .BindTargets({TARGET(kNNAdapter)});
diff --git a/lite/core/optimizer/mir/elimination/range_calc_offline_pass.h b/lite/core/optimizer/mir/elimination/range_calc_offline_pass.h
@@ -0,0 +1,38 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <limits>
+#include <memory>
+#include <string>
+#include <vector>
+#include "lite/core/optimizer/mir/pass.h"
+#include "lite/core/optimizer/mir/pass_registry.h"
+#include "lite/core/tensor.h"
+#include "lite/core/types.h"
+
+namespace paddle {
+namespace lite {
+namespace mir {
+
+class RangeCalcOfflinePass : public mir::StmtPass {
+ public:
+  void Apply(const std::unique_ptr<SSAGraph>& graph) override;
+  void RemoveRangePattern(const std::unique_ptr<SSAGraph>& graph);
+};
+
+}  // namespace mir
+}  // namespace lite
+}  // namespace paddle