Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TVM] Zero elimination #2634

Closed
wants to merge 10 commits into from
43 changes: 33 additions & 10 deletions include/tvm/ir_operator.h
Expand Up @@ -85,6 +85,16 @@ inline const uint64_t* as_const_uint(const Expr& x) {
*/
inline bool is_const_int(const Expr& x, int64_t value);

/*!
* \brief Check if the given expr is a const of any type equal to the given integer value.
* \param e The expression.
* \param value The value to compare to.
* \return Whether the expression is a const equal to the value.
* \tparam ValueType The value type
*/
template <typename ValueType>
inline bool is_const_value(const Expr& e, ValueType value);

/*!
* \brief Check whether stmt is nop.
* \param stmt The input statement
Expand Down Expand Up @@ -503,18 +513,31 @@ inline bool is_negative_const(const Expr& a) {
}
}

template <typename ValueType>
inline bool is_const_value(const Expr& e, ValueType value) {
static_assert(std::is_integral<ValueType>::value,
"Comparison to non-integer values is forbidden.");
// This implementation was copy-pasted from HalideIR
if (const ir::IntImm* i = e.as<ir::IntImm>()) {
return i->value == value;
} else if (const ir::UIntImm* i = e.as<ir::UIntImm>()) {
return (value >= 0) && (i->value == static_cast<uint64_t>(value));
} else if (const ir::FloatImm* i = e.as<ir::FloatImm>()) {
sgrechanik-h marked this conversation as resolved.
Show resolved Hide resolved
return i->value == value;
} else if (const ir::Cast* c = e.as<ir::Cast>()) {
return is_const_value(c->value, value);
} else if (const ir::Broadcast* b = e.as<ir::Broadcast>()) {
return is_const_value(b->value, value);
} else {
return false;
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This method's implementation is very similar to that of tvm::is_const_int(). I wonder whether it's possible to reuse one in the other.

one difference I see is in how UIntImm is handled. Seems to me that your handling is correct and is_const_int's handling is not. So maybe the latter need to be fixed and then it will be more similar to yours.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I think it makes sense to express is_const_int through is_const_value.


inline bool is_const_int(const Expr& x, int64_t value) {
if (const auto* op = x.as<ir::IntImm>()) {
return op->value == value;
} else if (const auto* op = x.as<ir::UIntImm>()) {
return op->value == static_cast<uint64_t>(value);
if (x.as<ir::IntImm>() || x.as<ir::UIntImm>()) {
return is_const_value(x, value);
} else if (const auto* op = x.as<ir::Broadcast>()) {
const Expr& val = op->value;
if (const auto* opv = val.as<ir::IntImm>()) {
return opv->value == value;
} else if (const auto* opv = val.as<ir::UIntImm>()) {
return opv->value == static_cast<uint64_t>(value);
}
return !op->value.as<ir::Broadcast>() && is_const_int(op->value, value);
}
return false;
}
Expand Down
198 changes: 198 additions & 0 deletions python/tvm/testing.py
@@ -1,6 +1,7 @@
""" TVM testing utilities """
import logging
import numpy as np
import tvm

def assert_allclose(actual, desired, rtol=1e-7, atol=1e-7):
""" Version of np.testing.assert_allclose with `atol` and `rtol` fields set
Expand Down Expand Up @@ -145,3 +146,200 @@ def compare_derivative(j, n_der, grad):
logging.info("Numerical grad test wrt '%s' of shape %s passes, "
"dist = %f, max_diff = %f, avg_diff = %f",
x_name, grad.shape, dist, max_diff, avg_diff)


class PerformanceEstimate:
"""A result of static performance estimation.

Parameters
----------
iterations : int
The total number of iterations of all the loops.

multiplications : int
The total number of expensive operations like multiplications.

memory : int
The amount of memory to allocate.
"""
def __init__(self, iterations=0, multiplications=0, memory=0):
self.iterations = iterations
self.multiplications = multiplications
self.memory = memory

def as_tuple(self):
return (self.iterations, self.multiplications, self.memory)

def __add__(self, other):
return PerformanceEstimate(iterations=self.iterations + other.iterations,
multiplications=self.multiplications + other.multiplications,
memory=self.memory + other.memory)

def max(self, other):
return PerformanceEstimate(
iterations=max(self.iterations, other.iterations),
multiplications=max(self.multiplications, other.multiplications),
memory=max(self.memory, other.memory))

def times(self, iters):
return PerformanceEstimate(iterations=self.iterations*iters,
multiplications=self.multiplications*iters,
memory=self.memory)

def __repr__(self):
return "PerformanceEstimate(iterations={}, multiplications={}, memory={})".format(
self.iterations, self.multiplications, self.memory)

def __le__(self, other):
return \
self.iterations <= other.iterations and \
self.multiplications <= other.multiplications and \
self.memory <= other.memory


def estimate_performance(s, param_values=None, _processed_ops=None):
"""Statically estimate performance of statements, expressions and tensors. Note that the
estimate is very rough, it mustn't be used to predict future performance, its only purpose is
to detect possible performance regressions.

Parameters
----------
s
A statement, an expression, a tensor, an operation, or a list
of any of the above.

param_values : Dict[tvm.expr.Var, int], optional
Values for parameters (free variables), see the example.

_processed_ops, optional
A dict mapping already processed operations to the corresponding estimations.
This parameter is used internally.

sgrechanik-h marked this conversation as resolved.
Show resolved Hide resolved
Returns
-------
estimate : PerformanceEstimate

Example
-------
.. code-block:: python

m = tvm.var('m')
X = tvm.placeholder((10, m), name='X')
W = tvm.placeholder((m + 5, m), name='W')
A = topi.nn.dense(X, W)
tvm.testing.estimate_performance(A, param_values={m: 5})
"""
from tvm import stmt
from tvm import expr

if param_values is None:
param_values = {}

if _processed_ops is None:
_processed_ops = {}
res = estimate_performance(s, param_values=param_values, _processed_ops=_processed_ops)
for op_est in _processed_ops.values():
res += op_est
return res

def est(expression, param_values=param_values, _processed_ops=_processed_ops):
return estimate_performance(expression,
param_values=param_values,
_processed_ops=_processed_ops)

def _eval(expression, param_values=param_values):
return tvm.ir_pass.Simplify(tvm.ir_pass.Substitute(expression, param_values)).value

def _prod(elems):
res = 1
for x in elems:
res *= x
return res

if s is None or isinstance(s, (stmt.AssertStmt, stmt.Free, stmt.Prefetch,
expr.ConstExpr, expr.Var, tvm.tensor.PlaceholderOp)):
return PerformanceEstimate()
elif isinstance(s, list):
res = PerformanceEstimate()
for item in s:
res += est(item)
return res
elif s in _processed_ops:
return PerformanceEstimate()
elif isinstance(s, stmt.Allocate):
mem = _prod([_eval(e) for e in s.extents])
return est(s.condition) + est(s.body) + PerformanceEstimate(memory=mem)
elif isinstance(s, stmt.Block):
return est(s.first) + est(s.rest)
elif isinstance(s, stmt.Evaluate):
return est(s.value)
elif isinstance(s, stmt.For):
body_est = est(s.body)
body_est.iterations = max(1, body_est.iterations)
return body_est.times(_eval(s.extent))
elif isinstance(s, stmt.IfThenElse):
return est(s.condition) + est(s.then_case) + est(s.else_case)
elif isinstance(s, stmt.LetStmt):
return est(s.value) + est(s.body)
elif isinstance(s, (stmt.ProducerConsumer, stmt.AttrStmt)):
return est(s.body)
elif isinstance(s, stmt.Provide):
return est(s.value)
elif isinstance(s, stmt.Realize):
return est(s.condition) + est(s.body)
elif isinstance(s, stmt.Store):
return est(s.value) + est(s.index) + est(s.predicate)
elif isinstance(s, (expr.Mul, expr.Div, expr.Mod)):
return est(s.a) + est(s.b) + PerformanceEstimate(multiplications=1)
elif isinstance(s, (expr.BinaryOpExpr, expr.CmpExpr, expr.LogicalExpr)):
if not hasattr(s, 'b'):
return est(s.a)
return est(s.a) + est(s.b)
elif isinstance(s, expr.Call):
res = PerformanceEstimate()
for a in s.args:
res += est(a)
if s.call_type == expr.Call.Halide:
# The estimate is added to _processed_ops, we don't need the result here
est(s.func)
elif s.name == "tvm_if_then_else":
pass
else:
# expr.If it is a non-halide call (e.g. exp or log), consider it a mul
res += PerformanceEstimate(multiplications=1)
return res
elif isinstance(s, expr.Cast):
return est(s.value)
elif isinstance(s, expr.Load):
return est(s.index) + est(s.predicate)
elif isinstance(s, expr.Select):
return est(s.condition) + est(s.true_value) + est(s.false_value)
elif isinstance(s, expr.Reduce):
iterations = _prod([_eval(iv.dom.extent) for iv in s.axis])
res = PerformanceEstimate()
for id_elem in s.combiner.identity_element:
res += est(id_elem)
on_each_iter = est(s.condition)
for src in s.source:
on_each_iter += est(src)
for comb_res in s.combiner.result:
on_each_iter += est(comb_res)
on_each_iter.iterations = max(1, on_each_iter.iterations)
return res + on_each_iter.times(iterations)
elif isinstance(s, tvm.tensor.Tensor):
return est(s.op)
elif isinstance(s, tvm.tensor.ComputeOp):
iterations = _prod([_eval(iv.dom.extent) for iv in s.axis])
if s.reduce_axis:
res = est(s.body[0])
else:
res = PerformanceEstimate()
for b in s.body:
res += est(b)
res.iterations = max(1, res.iterations)
res = res.times(iterations) + PerformanceEstimate(memory=iterations*len(s.body))
_processed_ops[s] = res
return PerformanceEstimate()

raise ValueError("Don't know how to estimate performance of {} of type {}"
.format(s, type(s)))
43 changes: 43 additions & 0 deletions src/op/op_util.cc
Expand Up @@ -245,5 +245,48 @@ ir::ForType IterVarTypeToForType(IterVarType iter_type) {
}
}

Tensor TensorFromExpr(const Expr& expr, const Array<IterVar>& axis,
const std::string& name, const std::string& tag,
const Map<std::string, NodeRef>& attrs) {
Array<Expr> new_bodies;
int new_value_index = 0;

// If this is a reduction then we have to clone its body
if (const Reduce* red = expr.as<Reduce>()) {
new_value_index = red->value_index;

for (size_t i = 0; i < red->source.size(); ++i) {
Expr ith_red = Reduce::make(red->combiner, red->source, red->axis, red->condition, i);
new_bodies.push_back(ith_red);
}
} else {
new_value_index = 0;
new_bodies.push_back(expr);
}

return ComputeOpNode::make(name, tag, attrs, axis, new_bodies).output(new_value_index);
}

Tensor TransformBody(const Tensor& tensor,
std::function<Expr(const Expr&, const Array<IterVar>&)> func) {
if (const ComputeOpNode* op = tensor->op.as<ComputeOpNode>()) {
// Transform only one body
Expr new_body = func(op->body[tensor->value_index], op->axis);

// If the body didn't change then we can return the same tensor
if (new_body.same_as(op->body[tensor->value_index])) {
return tensor;
}

return TensorFromExpr(new_body, op->axis, op->name, op->tag, op->attrs);
} else {
return tensor;
}
}

Tensor TransformBody(const Tensor& tensor, std::function<Expr(const Expr&)> func) {
return TransformBody(tensor, [func](const Expr& e, const Array<IterVar>&) { return func(e); });
}

} // namespace op
} // namespace tvm
40 changes: 40 additions & 0 deletions src/op/op_util.h
Expand Up @@ -11,6 +11,7 @@
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <string>
#include "../pass/ir_util.h"
#include "../pass/arg_binder.h"
#include "../schedule/message_passing.h"
Expand Down Expand Up @@ -84,6 +85,45 @@ IterVarType ForTypeToIterVarType(ir::ForType for_type);
*/
ir::ForType IterVarTypeToForType(IterVarType iter_type);

/*!
* \brief Create a tensor from an expression. The expression may be a reduction, in which
* case its body will be correctly duplicated if it is a multi-valued reduction.
*
* \param expr The expr which will be the tensor's body.
* \param axis The input variables with ranges.
* \param name The tensor's name.
* \param tag The tensor's tag.
* \param attrs The tensor's attrs.
* \return A tensor.
*/
Tensor TensorFromExpr(const Expr& expr, const Array<IterVar>& axis,
const std::string& name = "tensor", const std::string& tag = "",
const Map<std::string, NodeRef>& attrs = {});

/*!
* \brief Transform the body of a tensor if it is a compute tensor, otherwise return it
* unchanged. Note that if the compute returns a tuple, it transforms only one element,
* other elements are discarded.
*
* \param tensor The tensor to transform.
* \param func The transformation function working on expressions and additionally taking
* the array of the tensor's itervars.
* \return The transformed tensor.
*/
Tensor TransformBody(const Tensor& tensor,
std::function<Expr(const Expr&, const Array<IterVar>&)> func);

/*!
* \brief Transform the body of a tensor if it is a compute tensor, otherwise return it
* unchanged. Note that if the compute returns a tuple, it transforms only one element,
* other elements are discarded.
*
* \param tensor The tensor to transform.
* \param func The transformation function (working on expressions).
* \return The transformed tensor.
*/
Tensor TransformBody(const Tensor& tensor, std::function<Expr(const Expr&)> func);

} // namespace op
} // namespace tvm
#endif // TVM_OP_OP_UTIL_H_