apache · sgrechanik-h · Feb 20, 2019 · Feb 27, 2019 · Feb 27, 2019 · Feb 28, 2019
diff --git a/include/tvm/ir_operator.h b/include/tvm/ir_operator.h
@@ -85,6 +85,16 @@ inline const uint64_t* as_const_uint(const Expr& x) {
  */
 inline bool is_const_int(const Expr& x, int64_t value);
 
+/*!
+ * \brief Check if the given expr is a const of any type equal to the given integer value.
+ * \param e The expression.
+ * \param value The value to compare to.
+ * \return Whether the expression is a const equal to the value.
+ * \tparam ValueType The value type
+ */
+template <typename ValueType>
+inline bool is_const_value(const Expr& e, ValueType value);
+
 /*!
  * \brief Check whether stmt is nop.
  * \param stmt The input statement
@@ -503,18 +513,31 @@ inline bool is_negative_const(const Expr& a) {
   }
 }
 
+template <typename ValueType>
+inline bool is_const_value(const Expr& e, ValueType value) {
+  static_assert(std::is_integral<ValueType>::value,
+                "Comparison to non-integer values is forbidden.");
+  // This implementation was copy-pasted from HalideIR
+  if (const ir::IntImm* i = e.as<ir::IntImm>()) {
+    return i->value == value;
+  } else if (const ir::UIntImm* i = e.as<ir::UIntImm>()) {
+    return (value >= 0) && (i->value == static_cast<uint64_t>(value));
+  } else if (const ir::FloatImm* i = e.as<ir::FloatImm>()) {
+    return i->value == value;
+  } else if (const ir::Cast* c = e.as<ir::Cast>()) {
+    return is_const_value(c->value, value);
+  } else if (const ir::Broadcast* b = e.as<ir::Broadcast>()) {
+    return is_const_value(b->value, value);
+  } else {
+    return false;
+  }
+}
+
 inline bool is_const_int(const Expr& x, int64_t value) {
-  if (const auto* op = x.as<ir::IntImm>()) {
-    return op->value == value;
-  } else if (const auto* op = x.as<ir::UIntImm>()) {
-    return op->value == static_cast<uint64_t>(value);
+  if (x.as<ir::IntImm>() || x.as<ir::UIntImm>()) {
+    return is_const_value(x, value);
   } else if (const auto* op = x.as<ir::Broadcast>()) {
-    const Expr& val = op->value;
-    if (const auto* opv = val.as<ir::IntImm>()) {
-      return opv->value == value;
-    } else if (const auto* opv = val.as<ir::UIntImm>()) {
-      return opv->value == static_cast<uint64_t>(value);
-    }
+    return !op->value.as<ir::Broadcast>() && is_const_int(op->value, value);
   }
   return false;
 }

diff --git a/python/tvm/testing.py b/python/tvm/testing.py
@@ -1,6 +1,7 @@
 """ TVM testing utilities """
 import logging
 import numpy as np
+import tvm
 
 def assert_allclose(actual, desired, rtol=1e-7, atol=1e-7):
     """ Version of np.testing.assert_allclose with `atol` and `rtol` fields set
@@ -145,3 +146,200 @@ def compare_derivative(j, n_der, grad):
         logging.info("Numerical grad test wrt '%s' of shape %s passes, "
                      "dist = %f, max_diff = %f, avg_diff = %f",
                      x_name, grad.shape, dist, max_diff, avg_diff)
+
+
+class PerformanceEstimate:
+    """A result of static performance estimation.
+
+    Parameters
+    ----------
+    iterations : int
+        The total number of iterations of all the loops.
+
+    multiplications : int
+        The total number of expensive operations like multiplications.
+
+    memory : int
+        The amount of memory to allocate.
+    """
+    def __init__(self, iterations=0, multiplications=0, memory=0):
+        self.iterations = iterations
+        self.multiplications = multiplications
+        self.memory = memory
+
+    def as_tuple(self):
+        return (self.iterations, self.multiplications, self.memory)
+
+    def __add__(self, other):
+        return PerformanceEstimate(iterations=self.iterations + other.iterations,
+                                   multiplications=self.multiplications + other.multiplications,
+                                   memory=self.memory + other.memory)
+
+    def max(self, other):
+        return PerformanceEstimate(
+            iterations=max(self.iterations, other.iterations),
+            multiplications=max(self.multiplications, other.multiplications),
+            memory=max(self.memory, other.memory))
+
+    def times(self, iters):
+        return PerformanceEstimate(iterations=self.iterations*iters,
+                                   multiplications=self.multiplications*iters,
+                                   memory=self.memory)
+
+    def __repr__(self):
+        return "PerformanceEstimate(iterations={}, multiplications={}, memory={})".format(
+            self.iterations, self.multiplications, self.memory)
+
+    def __le__(self, other):
+        return \
+            self.iterations <= other.iterations and \
+            self.multiplications <= other.multiplications and \
+            self.memory <= other.memory
+
+
+def estimate_performance(s, param_values=None, _processed_ops=None):
+    """Statically estimate performance of statements, expressions and tensors. Note that the
+    estimate is very rough, it mustn't be used to predict future performance, its only purpose is
+    to detect possible performance regressions.
+
+    Parameters
+    ----------
+    s
+        A statement, an expression, a tensor, an operation, or a list
+        of any of the above.
+
+    param_values : Dict[tvm.expr.Var, int], optional
+        Values for parameters (free variables), see the example.
+
+    _processed_ops, optional
+        A dict mapping already processed operations to the corresponding estimations.
+        This parameter is used internally.
+
+    Returns
+    -------
+    estimate : PerformanceEstimate
+
+    Example
+    -------
+    .. code-block:: python
+
+        m = tvm.var('m')
+        X = tvm.placeholder((10, m), name='X')
+        W = tvm.placeholder((m + 5, m), name='W')
+        A = topi.nn.dense(X, W)
+        tvm.testing.estimate_performance(A, param_values={m: 5})
+    """
+    from tvm import stmt
+    from tvm import expr
+
+    if param_values is None:
+        param_values = {}
+
+    if _processed_ops is None:
+        _processed_ops = {}
+        res = estimate_performance(s, param_values=param_values, _processed_ops=_processed_ops)
+        for op_est in _processed_ops.values():
+            res += op_est
+        return res
+
+    def est(expression, param_values=param_values, _processed_ops=_processed_ops):
+        return estimate_performance(expression,
+                                    param_values=param_values,
+                                    _processed_ops=_processed_ops)
+
+    def _eval(expression, param_values=param_values):
+        return tvm.ir_pass.Simplify(tvm.ir_pass.Substitute(expression, param_values)).value
+
+    def _prod(elems):
+        res = 1
+        for x in elems:
+            res *= x
+        return res
+
+    if s is None or isinstance(s, (stmt.AssertStmt, stmt.Free, stmt.Prefetch,
+                                   expr.ConstExpr, expr.Var, tvm.tensor.PlaceholderOp)):
+        return PerformanceEstimate()
+    elif isinstance(s, list):
+        res = PerformanceEstimate()
+        for item in s:
+            res += est(item)
+        return res
+    elif s in _processed_ops:
+        return PerformanceEstimate()
+    elif isinstance(s, stmt.Allocate):
+        mem = _prod([_eval(e) for e in s.extents])
+        return est(s.condition) + est(s.body) + PerformanceEstimate(memory=mem)
+    elif isinstance(s, stmt.Block):
+        return est(s.first) + est(s.rest)
+    elif isinstance(s, stmt.Evaluate):
+        return est(s.value)
+    elif isinstance(s, stmt.For):
+        body_est = est(s.body)
+        body_est.iterations = max(1, body_est.iterations)
+        return body_est.times(_eval(s.extent))
+    elif isinstance(s, stmt.IfThenElse):
+        return est(s.condition) + est(s.then_case) + est(s.else_case)
+    elif isinstance(s, stmt.LetStmt):
+        return est(s.value) + est(s.body)
+    elif isinstance(s, (stmt.ProducerConsumer, stmt.AttrStmt)):
+        return est(s.body)
+    elif isinstance(s, stmt.Provide):
+        return est(s.value)
+    elif isinstance(s, stmt.Realize):
+        return est(s.condition) + est(s.body)
+    elif isinstance(s, stmt.Store):
+        return est(s.value) + est(s.index) + est(s.predicate)
+    elif isinstance(s, (expr.Mul, expr.Div, expr.Mod)):
+        return est(s.a) + est(s.b) + PerformanceEstimate(multiplications=1)
+    elif isinstance(s, (expr.BinaryOpExpr, expr.CmpExpr, expr.LogicalExpr)):
+        if not hasattr(s, 'b'):
+            return est(s.a)
+        return est(s.a) + est(s.b)
+    elif isinstance(s, expr.Call):
+        res = PerformanceEstimate()
+        for a in s.args:
+            res += est(a)
+        if s.call_type == expr.Call.Halide:
+            # The estimate is added to _processed_ops, we don't need the result here
+            est(s.func)
+        elif s.name == "tvm_if_then_else":
+            pass
+        else:
+            # expr.If it is a non-halide call (e.g. exp or log), consider it a mul
+            res += PerformanceEstimate(multiplications=1)
+        return res
+    elif isinstance(s, expr.Cast):
+        return est(s.value)
+    elif isinstance(s, expr.Load):
+        return est(s.index) + est(s.predicate)
+    elif isinstance(s, expr.Select):
+        return est(s.condition) + est(s.true_value) + est(s.false_value)
+    elif isinstance(s, expr.Reduce):
+        iterations = _prod([_eval(iv.dom.extent) for iv in s.axis])
+        res = PerformanceEstimate()
+        for id_elem in s.combiner.identity_element:
+            res += est(id_elem)
+        on_each_iter = est(s.condition)
+        for src in s.source:
+            on_each_iter += est(src)
+        for comb_res in s.combiner.result:
+            on_each_iter += est(comb_res)
+        on_each_iter.iterations = max(1, on_each_iter.iterations)
+        return res + on_each_iter.times(iterations)
+    elif isinstance(s, tvm.tensor.Tensor):
+        return est(s.op)
+    elif isinstance(s, tvm.tensor.ComputeOp):
+        iterations = _prod([_eval(iv.dom.extent) for iv in s.axis])
+        if s.reduce_axis:
+            res = est(s.body[0])
+        else:
+            res = PerformanceEstimate()
+            for b in s.body:
+                res += est(b)
+        res.iterations = max(1, res.iterations)
+        res = res.times(iterations) + PerformanceEstimate(memory=iterations*len(s.body))
+        _processed_ops[s] = res
+        return PerformanceEstimate()
+
+    raise ValueError("Don't know how to estimate performance of {} of type {}"
+                     .format(s, type(s)))
diff --git a/src/op/op_util.cc b/src/op/op_util.cc
@@ -245,5 +245,48 @@ ir::ForType IterVarTypeToForType(IterVarType iter_type) {
   }
 }
 
+Tensor TensorFromExpr(const Expr& expr, const Array<IterVar>& axis,
+                      const std::string& name, const std::string& tag,
+                      const Map<std::string, NodeRef>& attrs) {
+  Array<Expr> new_bodies;
+  int new_value_index = 0;
+
+  // If this is a reduction then we have to clone its body
+  if (const Reduce* red = expr.as<Reduce>()) {
+    new_value_index = red->value_index;
+
+    for (size_t i = 0; i < red->source.size(); ++i) {
+      Expr ith_red = Reduce::make(red->combiner, red->source, red->axis, red->condition, i);
+      new_bodies.push_back(ith_red);
+    }
+  } else {
+    new_value_index = 0;
+    new_bodies.push_back(expr);
+  }
+
+  return ComputeOpNode::make(name, tag, attrs, axis, new_bodies).output(new_value_index);
+}
+
+Tensor TransformBody(const Tensor& tensor,
+                     std::function<Expr(const Expr&, const Array<IterVar>&)> func) {
+  if (const ComputeOpNode* op = tensor->op.as<ComputeOpNode>()) {
+    // Transform only one body
+    Expr new_body = func(op->body[tensor->value_index], op->axis);
+
+    // If the body didn't change then we can return the same tensor
+    if (new_body.same_as(op->body[tensor->value_index])) {
+      return tensor;
+    }
+
+    return TensorFromExpr(new_body, op->axis, op->name, op->tag, op->attrs);
+  } else {
+    return tensor;
+  }
+}
+
+Tensor TransformBody(const Tensor& tensor, std::function<Expr(const Expr&)> func) {
+  return TransformBody(tensor, [func](const Expr& e, const Array<IterVar>&) { return func(e); });
+}
+
 }  // namespace op
 }  // namespace tvm
diff --git a/src/op/op_util.h b/src/op/op_util.h
@@ -11,6 +11,7 @@
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
+#include <string>
 #include "../pass/ir_util.h"
 #include "../pass/arg_binder.h"
 #include "../schedule/message_passing.h"
@@ -84,6 +85,45 @@ IterVarType ForTypeToIterVarType(ir::ForType for_type);
  */
 ir::ForType IterVarTypeToForType(IterVarType iter_type);
 
+/*!
+ * \brief Create a tensor from an expression. The expression may be a reduction, in which
+ *  case its body will be correctly duplicated if it is a multi-valued reduction.
+ *
+ * \param expr The expr which will be the tensor's body.
+ * \param axis The input variables with ranges.
+ * \param name The tensor's name.
+ * \param tag The tensor's tag.
+ * \param attrs The tensor's attrs.
+ * \return A tensor.
+ */
+Tensor TensorFromExpr(const Expr& expr, const Array<IterVar>& axis,
+                      const std::string& name = "tensor", const std::string& tag = "",
+                      const Map<std::string, NodeRef>& attrs = {});
+
+/*!
+ * \brief Transform the body of a tensor if it is a compute tensor, otherwise return it
+ *  unchanged. Note that if the compute returns a tuple, it transforms only one element,
+ *  other elements are discarded.
+ *
+ * \param tensor The tensor to transform.
+ * \param func The transformation function working on expressions and additionally taking
+ *  the array of the tensor's itervars.
+ * \return The transformed tensor.
+ */
+Tensor TransformBody(const Tensor& tensor,
+                     std::function<Expr(const Expr&, const Array<IterVar>&)> func);
+
+/*!
+ * \brief Transform the body of a tensor if it is a compute tensor, otherwise return it
+ *  unchanged. Note that if the compute returns a tuple, it transforms only one element,
+ *  other elements are discarded.
+ *
+ * \param tensor The tensor to transform.
+ * \param func The transformation function (working on expressions).
+ * \return The transformed tensor.
+ */
+Tensor TransformBody(const Tensor& tensor, std::function<Expr(const Expr&)> func);
+
 }  // namespace op
 }  // namespace tvm
 #endif  // TVM_OP_OP_UTIL_H_