Skip to content

Commit

Permalink
Merge pull request #1673 from devitocodes/minmaxbounds
Browse files Browse the repository at this point in the history
compiler: Introducing min/max bounds to replace 'bf' elemental functions
  • Loading branch information
FabioLuporini committed Jul 26, 2021
2 parents 1471d53 + c38526b commit 4f2a1fb
Show file tree
Hide file tree
Showing 22 changed files with 804 additions and 508 deletions.
2 changes: 1 addition & 1 deletion devito/core/arm.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def _specialize_iet(cls, graph, **kwargs):
mpiize(graph, mode=options['mpi'])

# Lower IncrDimensions so that blocks of arbitrary shape may be used
relax_incr_dimensions(graph, sregistry=sregistry)
relax_incr_dimensions(graph)

# Parallelism
parizer = cls._Target.Parizer(sregistry, options, platform)
Expand Down
11 changes: 9 additions & 2 deletions devito/core/autotuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def autotune(operator, args, level, mode):
i.torank = MPI.PROC_NULL

roots = [operator.body] + [i.root for i in operator._func_table.values()]
trees = filter_ordered(retrieve_iteration_tree(roots), key=lambda i: i.root)
trees = filter_ordered(retrieve_iteration_tree(roots))

# Detect the time-stepping Iteration; shrink its iteration range so that
# each autotuning run only takes a few iterations
Expand All @@ -89,9 +89,14 @@ def autotune(operator, args, level, mode):

# Perform autotuning
timings = {}
seen = set()
for n, tree in enumerate(trees):
blockable = [i.dim for i in tree if not is_integer(i.step)]
# Continue if `blockable` appear more than once under a tree
if all(i in seen for i in blockable):
continue

seen.update(blockable)
# Tunable arguments
try:
tunable = []
Expand Down Expand Up @@ -250,7 +255,9 @@ def finalize_time_bounds(stepper, at_args, args, mode):


def calculate_nblocks(tree, blockable):
collapsed = tree[:(tree[0].ncollapsed or 1)]
block_indices = [n for n, i in enumerate(tree) if i.dim in blockable]
index = block_indices[0]
collapsed = tree[index:index + (tree[index].ncollapsed or index+1)]
blocked = [i.dim for i in collapsed if i.dim in blockable]
remainders = [(d.root.symbolic_max-d.root.symbolic_min+1) % d.step for d in blocked]
niters = [d.root.symbolic_max - i for d, i in zip(blocked, remainders)]
Expand Down
4 changes: 2 additions & 2 deletions devito/core/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def _specialize_iet(cls, graph, **kwargs):
mpiize(graph, mode=options['mpi'])

# Lower IncrDimensions so that blocks of arbitrary shape may be used
relax_incr_dimensions(graph, sregistry=sregistry)
relax_incr_dimensions(graph)

# Parallelism
parizer = cls._Target.Parizer(sregistry, options, platform)
Expand Down Expand Up @@ -310,7 +310,7 @@ def _make_iet_passes_mapper(cls, **kwargs):
return {
'denormals': avoid_denormals,
'optcomms': optimize_halospots,
'blocking': partial(relax_incr_dimensions, sregistry=sregistry),
'blocking': partial(relax_incr_dimensions),
'parallel': parizer.make_parallel,
'openmp': parizer.make_parallel,
'mpi': partial(mpiize, mode=options['mpi']),
Expand Down
1 change: 1 addition & 0 deletions devito/core/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def _normalize_kwargs(cls, **kwargs):
# Blocking
o['blockinner'] = oo.pop('blockinner', True)
o['blocklevels'] = oo.pop('blocklevels', cls.BLOCK_LEVELS)
o['skewing'] = oo.pop('skewing', False)

# CIRE
o['min-storage'] = False
Expand Down
31 changes: 4 additions & 27 deletions devito/ir/iet/utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from devito.ir.iet import Iteration, List, IterationTree, FindSections, FindSymbols
from devito.ir.iet import IterationTree, FindSections, FindSymbols
from devito.symbolics import Literal, Macro
from devito.tools import as_tuple, flatten, split
from devito.types import Array, LocalObject

__all__ = ['filter_iterations', 'retrieve_iteration_tree', 'compose_nodes',
'derive_parameters', 'diff_parameters']
__all__ = ['filter_iterations', 'retrieve_iteration_tree', 'derive_parameters',
'diff_parameters']


def retrieve_iteration_tree(node, mode='normal'):
Expand All @@ -20,7 +20,7 @@ def retrieve_iteration_tree(node, mode='normal'):
Iteration i
expr0
Iteration j
Iteraion k
Iteration k
expr1
Iteration p
expr2
Expand Down Expand Up @@ -66,29 +66,6 @@ def filter_iterations(tree, key=lambda i: i):
return filtered


def compose_nodes(nodes, retrieve=False):
"""Build an IET by nesting ``nodes``."""
l = list(nodes)
tree = []

if not isinstance(l[0], Iteration):
# Nothing to compose
body = flatten(l)
body = List(body=body) if len(body) > 1 else body[0]
else:
body = l.pop(-1)
while l:
handle = l.pop(-1)
body = handle._rebuild(body, **handle.args_frozen)
tree.append(body)

if retrieve is True:
tree = list(reversed(tree))
return body, tree
else:
return body


def derive_parameters(iet, drop_locals=False):
"""
Derive all input parameters (function call arguments) from an IET
Expand Down
2 changes: 1 addition & 1 deletion devito/ir/iet/visitors.py
Original file line number Diff line number Diff line change
Expand Up @@ -746,7 +746,7 @@ def visit_Iteration(self, o, found=False, nomore=False):
class Transformer(Visitor):

"""
Given an Iteration/Expression tree T and a mapper from nodes in T to
Given an Iteration/Expression tree T and a mapper M from nodes in T to
a set of new nodes L, M : N --> L, build a new Iteration/Expression tree T'
where a node ``n`` in N is replaced with ``M[n]``.
Expand Down
116 changes: 64 additions & 52 deletions devito/passes/iet/misc.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from itertools import product

import cgen

from devito.ir.iet import (List, Prodder, FindNodes, Transformer, make_efunc,
compose_nodes, filter_iterations, retrieve_iteration_tree)
from devito.passes.iet.engine import iet_pass
from devito.tools import flatten, is_integer, split
from devito.ir.iet import (List, Prodder, FindNodes, Transformer, filter_iterations,
retrieve_iteration_tree)
from devito.ir.support import Forward
from devito.logger import warning
from devito.passes.iet.engine import iet_pass
from devito.symbolics import MIN, MAX
from devito.tools import split, is_integer

__all__ = ['avoid_denormals', 'hoist_prodders', 'relax_incr_dimensions', 'is_on_device']

Expand Down Expand Up @@ -57,13 +57,24 @@ def hoist_prodders(iet):
@iet_pass
def relax_incr_dimensions(iet, **kwargs):
"""
Recast Iterations over IncrDimensions as ElementalFunctions; insert
ElementalCalls to iterate over the "main" and "remainder" regions induced
by the IncrDimensions.
This pass adjusts the bounds of blocked Iterations in order to include the "remainder
regions". Without the relaxation that occurs in this pass, the only way to iterate
over the entire iteration space is to have step increments that are perfect divisors
of the iteration space (e.g. in case of an iteration space of size 67 and block size
8 only 64 iterations would be computed, as `67 - 67mod8 = 64`.
A simple 1D example: nested Iterations are transformed from:
<Iteration x0_blk0; (x_m, x_M, x0_blk0_size)>
<Iteration x; (x0_blk0, x0_blk0 + x0_blk0_size - 1, 1)>
to:
<Iteration x0_blk0; (x_m, x_M, x0_blk0_size)>
<Iteration x; (x0_blk0, MIN(x_M, x0_blk0 + x0_blk0_size - 1)), 1)>
"""
sregistry = kwargs['sregistry']

efuncs = []
mapper = {}
for tree in retrieve_iteration_tree(iet):
iterations = [i for i in tree if i.dim.is_Incr]
Expand All @@ -74,49 +85,50 @@ def relax_incr_dimensions(iet, **kwargs):
if root in mapper:
continue

assert all(i.direction is Forward for i in iterations)
outer, inner = split(iterations, lambda i: not i.dim.parent.is_Incr)

# Compute the iteration ranges
ranges = []
for i in outer:
maxb = i.symbolic_max - (i.symbolic_size % i.dim.step)
ranges.append(((i.symbolic_min, maxb, i.dim.step),
(maxb + 1, i.symbolic_max, i.symbolic_max - maxb)))

# Remove any offsets
# E.g., `x = x_m + 2 to x_M - 2` --> `x = x_m to x_M`
outer = [i._rebuild(limits=(i.dim.root.symbolic_min, i.dim.root.symbolic_max,
i.step))
for i in outer]

# Create the ElementalFunction
name = sregistry.make_name(prefix="bf")
body = compose_nodes(outer)
dynamic_parameters = flatten((i.symbolic_bounds, i.step) for i in outer)
dynamic_parameters.extend([i.step for i in inner if not is_integer(i.step)])
efunc = make_efunc(name, body, dynamic_parameters)

efuncs.append(efunc)

# Create the ElementalCalls
calls = []
for p in product(*ranges):
dynamic_args_mapper = {}
for i, (m, M, b) in zip(outer, p):
dynamic_args_mapper[i.symbolic_min] = m
dynamic_args_mapper[i.symbolic_max] = M
dynamic_args_mapper[i.step] = b
for j in inner:
if j.dim.root is i.dim.root and not is_integer(j.step):
value = j.step if b is i.step else b
dynamic_args_mapper[j.step] = (value,)
calls.append(efunc.make_call(dynamic_args_mapper))

mapper[root] = List(body=calls)

iet = Transformer(mapper).visit(iet)

return iet, {'efuncs': efuncs}
# Get root's `symbolic_max` out of each outer Dimension
roots_max = {i.dim.root: i.symbolic_max for i in outer}

# A dictionary to map maximum of processed parent dimensions. Helps to neatly
# handle bounds in hierarchical blocking and SubDimensions
proc_parents_max = {}

# Process inner iterations and adjust their bounds
for n, i in enumerate(inner):
if i.dim.parent in proc_parents_max and i.symbolic_size == i.dim.parent.step:
# Use parent's Iteration max in hierarchical blocking
iter_max = proc_parents_max[i.dim.parent]
else:
# The Iteration's maximum is the MIN of (a) the `symbolic_max` of current
# Iteration e.g. `x0_blk0 + x0_blk0_size - 1` and (b) the `symbolic_max`
# of the current Iteration's root Dimension e.g. `x_M`. The generated
# maximum will be `MIN(x0_blk0 + x0_blk0_size - 1, x_M)

# In some corner cases an offset may be added (e.g. after CIRE passes)
# E.g. assume `i.symbolic_max = x0_blk0 + x0_blk0_size + 1` and
# `i.dim.symbolic_max = x0_blk0 + x0_blk0_size - 1` then the generated
# maximum will be `MIN(x0_blk0 + x0_blk0_size + 1, x_M + 2)`

root_max = roots_max[i.dim.root] + i.symbolic_max - i.dim.symbolic_max

try:
iter_max = (min(i.symbolic_max, root_max))
bool(iter_max) # Can it be evaluated?
except TypeError:
iter_max = MIN(i.symbolic_max, root_max)

proc_parents_max[i.dim] = iter_max

mapper[i] = i._rebuild(limits=(i.symbolic_min, iter_max, i.step))

iet = Transformer(mapper, nested=True).visit(iet)

headers = [('%s(a,b)' % MIN.name, ('(((a) < (b)) ? (a) : (b))')),
('%s(a,b)' % MAX.name, ('(((a) > (b)) ? (a) : (b))'))]

return iet, {'headers': headers}


def is_on_device(obj, gpu_fit):
Expand Down
19 changes: 6 additions & 13 deletions devito/passes/iet/parpragma.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
import numpy as np
import cgen as c
from sympy import And, Or, Max
from sympy import And, Max

from devito.data import FULL
from devito.ir import (DummyEq, Conditional, Dereference, Expression, ExpressionBundle,
List, ParallelTree, Prodder, FindSymbols, FindNodes, Return,
VECTORIZED, Transformer, IsPerfectIteration, filter_iterations,
retrieve_iteration_tree)
from devito.symbolics import CondEq, INT, ccode
from devito.ir import (Conditional, DummyEq, Dereference, Expression, ExpressionBundle,
FindSymbols, FindNodes, ParallelTree, Prodder, List, Transformer,
IsPerfectIteration, filter_iterations, retrieve_iteration_tree,
VECTORIZED)
from devito.passes.iet.engine import iet_pass
from devito.passes.iet.langbase import LangBB, LangTransformer, DeviceAwareMixin
from devito.passes.iet.misc import is_on_device
from devito.symbolics import INT, ccode
from devito.tools import as_tuple, prod
from devito.types import Symbol, NThreadsBase

Expand Down Expand Up @@ -274,13 +274,6 @@ def _make_parregion(self, partree, parrays):
return self.Region(partree)

def _make_guard(self, parregion):
# Do not enter the parallel region if the step increment is 0; this
# would raise a `Floating point exception (core dumped)` in some OpenMP
# implementations. Note that using an OpenMP `if` clause won't work
cond = Or(*[CondEq(i.step, 0) for i in parregion.collapsed
if isinstance(i.step, Symbol)])
if cond != False: # noqa: `cond` may be a sympy.False which would be == False
parregion = List(body=[Conditional(cond, Return()), parregion])
return parregion

def _make_nested_partree(self, partree):
Expand Down
6 changes: 5 additions & 1 deletion devito/symbolics/extended_sympy.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
__all__ = ['CondEq', 'CondNe', 'IntDiv', 'FunctionFromPointer', 'FieldFromPointer',
'FieldFromComposite', 'ListInitializer', 'Byref', 'IndexedPointer',
'DefFunction', 'InlineIf', 'Macro', 'Literal', 'INT', 'FLOAT', 'DOUBLE',
'FLOOR', 'cast_mapper']
'FLOOR', 'MAX', 'MIN', 'cast_mapper']


class CondEq(sympy.Eq):
Expand Down Expand Up @@ -405,6 +405,10 @@ class Literal(sympy.Symbol):

FLOOR = Function('floor')

# Functions used for adjusting loop bounds
MAX = Function('MAX')
MIN = Function('MIN')

cast_mapper = {
np.int: INT,
np.int32: INT,
Expand Down

0 comments on commit 4f2a1fb

Please sign in to comment.