Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

compiler: Introducing min/max bounds to replace 'bf' elemental functions #1673

Merged
merged 26 commits into from
Jul 26, 2021
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
22822b0
compiler: Start work on min/max bounds
georgebisbas Apr 14, 2021
e2df478
tests: dse/dle/autotuner
georgebisbas Apr 18, 2021
a2e0cdf
tests: working on mpi tests
georgebisbas Apr 19, 2021
97c51a7
compiler: fix nblocks calculation in autotuning
georgebisbas Apr 20, 2021
3cfc0b9
compiler: rewrite/improve function, misc improvements
georgebisbas Apr 20, 2021
40b95ed
compiler: drop guards from parallel loops
georgebisbas Apr 26, 2021
2a6f6d0
compiler: update tests, parpragma
georgebisbas Apr 26, 2021
5799aec
compiler: Restructure minmax algorithm
georgebisbas Apr 28, 2021
5bbc96e
compiler: Restructure relax_incr and minor gpu-related
georgebisbas Jun 1, 2021
a678925
compiler: autotuner, docs fixing and tests
georgebisbas Jun 10, 2021
d7a92b3
compiler: autotuning
georgebisbas Jun 11, 2021
616b563
compiler: tweak MIN/MAX
georgebisbas Jun 25, 2021
ca81063
compiler: add_minmax
georgebisbas Jun 28, 2021
ca5b6c6
compiler: tweak MINMAX defines
georgebisbas Jun 28, 2021
a1e1890
compiler: enrich tests, fix AT, notebooks
georgebisbas Jun 29, 2021
15d8f01
compiler: strengthen tests, MINMAX as Func, notebooks
georgebisbas Jul 1, 2021
86ba949
compiler: Openmp compliant, tests, docs
georgebisbas Jul 6, 2021
15cf8c1
compiler: refresh/simplify relax_incr_dimensions
georgebisbas Jul 6, 2021
0ce6a13
compiler: simplify algorithm and docs in relax
georgebisbas Jul 7, 2021
097f08c
tests: strengthen testing
georgebisbas Jul 13, 2021
3b32223
tests: update tests with helper funcs
georgebisbas Jul 16, 2021
df1e67d
tests: update tests with helper funcs II
georgebisbas Jul 17, 2021
29b832b
tests: simplify by ret ParallelBlocks
georgebisbas Jul 20, 2021
9cbad5f
tests: improve docs, parametrize
georgebisbas Jul 21, 2021
4d25407
tests: improve tests, docs
georgebisbas Jul 26, 2021
c38526b
tests: improve conftest, dle
georgebisbas Jul 26, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion devito/core/arm.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def _specialize_iet(cls, graph, **kwargs):
mpiize(graph, mode=options['mpi'])

# Lower IncrDimensions so that blocks of arbitrary shape may be used
relax_incr_dimensions(graph, sregistry=sregistry)
relax_incr_dimensions(graph)

# Parallelism
parizer = cls._Target.Parizer(sregistry, options, platform)
Expand Down
11 changes: 9 additions & 2 deletions devito/core/autotuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def autotune(operator, args, level, mode):
i.torank = MPI.PROC_NULL

roots = [operator.body] + [i.root for i in operator._func_table.values()]
trees = filter_ordered(retrieve_iteration_tree(roots), key=lambda i: i.root)
trees = filter_ordered(retrieve_iteration_tree(roots))
georgebisbas marked this conversation as resolved.
Show resolved Hide resolved

# Detect the time-stepping Iteration; shrink its iteration range so that
# each autotuning run only takes a few iterations
Expand All @@ -89,9 +89,14 @@ def autotune(operator, args, level, mode):

# Perform autotuning
timings = {}
seen = set()
for n, tree in enumerate(trees):
blockable = [i.dim for i in tree if not is_integer(i.step)]
# Continue if `blockable` appear more than once under a tree
if all(i in seen for i in blockable):
continue

seen.update(blockable)
# Tunable arguments
try:
tunable = []
Expand Down Expand Up @@ -250,7 +255,9 @@ def finalize_time_bounds(stepper, at_args, args, mode):


def calculate_nblocks(tree, blockable):
collapsed = tree[:(tree[0].ncollapsed or 1)]
FabioLuporini marked this conversation as resolved.
Show resolved Hide resolved
block_indices = [n for n, i in enumerate(tree) if i.dim in blockable]
index = block_indices[0]
collapsed = tree[index:index + (tree[index].ncollapsed or index+1)]
blocked = [i.dim for i in collapsed if i.dim in blockable]
remainders = [(d.root.symbolic_max-d.root.symbolic_min+1) % d.step for d in blocked]
niters = [d.root.symbolic_max - i for d, i in zip(blocked, remainders)]
Expand Down
4 changes: 2 additions & 2 deletions devito/core/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def _specialize_iet(cls, graph, **kwargs):
mpiize(graph, mode=options['mpi'])

# Lower IncrDimensions so that blocks of arbitrary shape may be used
relax_incr_dimensions(graph, sregistry=sregistry)
relax_incr_dimensions(graph)

# Parallelism
parizer = cls._Target.Parizer(sregistry, options, platform)
Expand Down Expand Up @@ -310,7 +310,7 @@ def _make_iet_passes_mapper(cls, **kwargs):
return {
'denormals': avoid_denormals,
'optcomms': optimize_halospots,
'blocking': partial(relax_incr_dimensions, sregistry=sregistry),
'blocking': partial(relax_incr_dimensions),
'parallel': parizer.make_parallel,
'openmp': parizer.make_parallel,
'mpi': partial(mpiize, mode=options['mpi']),
Expand Down
1 change: 1 addition & 0 deletions devito/core/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def _normalize_kwargs(cls, **kwargs):
# Blocking
o['blockinner'] = oo.pop('blockinner', True)
o['blocklevels'] = oo.pop('blocklevels', cls.BLOCK_LEVELS)
o['skewing'] = oo.pop('skewing', False)

# CIRE
o['min-storage'] = False
Expand Down
31 changes: 4 additions & 27 deletions devito/ir/iet/utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from devito.ir.iet import Iteration, List, IterationTree, FindSections, FindSymbols
from devito.ir.iet import IterationTree, FindSections, FindSymbols
from devito.symbolics import Literal, Macro
from devito.tools import as_tuple, flatten, split
from devito.types import Array, LocalObject

__all__ = ['filter_iterations', 'retrieve_iteration_tree', 'compose_nodes',
'derive_parameters', 'diff_parameters']
__all__ = ['filter_iterations', 'retrieve_iteration_tree', 'derive_parameters',
'diff_parameters']


def retrieve_iteration_tree(node, mode='normal'):
Expand All @@ -20,7 +20,7 @@ def retrieve_iteration_tree(node, mode='normal'):
Iteration i
expr0
Iteration j
Iteraion k
Iteration k
expr1
Iteration p
expr2
Expand Down Expand Up @@ -66,29 +66,6 @@ def filter_iterations(tree, key=lambda i: i):
return filtered


def compose_nodes(nodes, retrieve=False):
georgebisbas marked this conversation as resolved.
Show resolved Hide resolved
"""Build an IET by nesting ``nodes``."""
l = list(nodes)
tree = []

if not isinstance(l[0], Iteration):
# Nothing to compose
body = flatten(l)
body = List(body=body) if len(body) > 1 else body[0]
else:
body = l.pop(-1)
while l:
handle = l.pop(-1)
body = handle._rebuild(body, **handle.args_frozen)
tree.append(body)

if retrieve is True:
tree = list(reversed(tree))
return body, tree
else:
return body


def derive_parameters(iet, drop_locals=False):
"""
Derive all input parameters (function call arguments) from an IET
Expand Down
2 changes: 1 addition & 1 deletion devito/ir/iet/visitors.py
Original file line number Diff line number Diff line change
Expand Up @@ -746,7 +746,7 @@ def visit_Iteration(self, o, found=False, nomore=False):
class Transformer(Visitor):

"""
Given an Iteration/Expression tree T and a mapper from nodes in T to
Given an Iteration/Expression tree T and a mapper M from nodes in T to
a set of new nodes L, M : N --> L, build a new Iteration/Expression tree T'
where a node ``n`` in N is replaced with ``M[n]``.

Expand Down
116 changes: 64 additions & 52 deletions devito/passes/iet/misc.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from itertools import product

import cgen

from devito.ir.iet import (List, Prodder, FindNodes, Transformer, make_efunc,
compose_nodes, filter_iterations, retrieve_iteration_tree)
from devito.passes.iet.engine import iet_pass
from devito.tools import flatten, is_integer, split
from devito.ir.iet import (List, Prodder, FindNodes, Transformer, filter_iterations,
retrieve_iteration_tree)
from devito.ir.support import Forward
from devito.logger import warning
from devito.passes.iet.engine import iet_pass
from devito.symbolics import MIN, MAX
from devito.tools import split, is_integer

__all__ = ['avoid_denormals', 'hoist_prodders', 'relax_incr_dimensions', 'is_on_device']

Expand Down Expand Up @@ -57,13 +57,24 @@ def hoist_prodders(iet):
@iet_pass
def relax_incr_dimensions(iet, **kwargs):
"""
Recast Iterations over IncrDimensions as ElementalFunctions; insert
ElementalCalls to iterate over the "main" and "remainder" regions induced
by the IncrDimensions.
This pass adjusts the bounds of blocked Iterations in order to include the "remainder
regions". Without the relaxation that occurs in this pass, the only way to iterate
over the entire iteration space is to have step increments that are perfect divisors
of the iteration space (e.g. in case of an iteration space of size 67 and block size
8 only 64 iterations would be computed, as `67 - 67mod8 = 64`.

A simple 1D example: nested Iterations are transformed from:

<Iteration x0_blk0; (x_m, x_M, x0_blk0_size)>
<Iteration x; (x0_blk0, x0_blk0 + x0_blk0_size - 1, 1)>

to:

<Iteration x0_blk0; (x_m, x_M, x0_blk0_size)>
<Iteration x; (x0_blk0, MIN(x_M, x0_blk0 + x0_blk0_size - 1)), 1)>

FabioLuporini marked this conversation as resolved.
Show resolved Hide resolved
"""
sregistry = kwargs['sregistry']

efuncs = []
mapper = {}
for tree in retrieve_iteration_tree(iet):
iterations = [i for i in tree if i.dim.is_Incr]
Expand All @@ -74,49 +85,50 @@ def relax_incr_dimensions(iet, **kwargs):
if root in mapper:
continue

assert all(i.direction is Forward for i in iterations)
outer, inner = split(iterations, lambda i: not i.dim.parent.is_Incr)

# Compute the iteration ranges
ranges = []
for i in outer:
maxb = i.symbolic_max - (i.symbolic_size % i.dim.step)
ranges.append(((i.symbolic_min, maxb, i.dim.step),
(maxb + 1, i.symbolic_max, i.symbolic_max - maxb)))

# Remove any offsets
# E.g., `x = x_m + 2 to x_M - 2` --> `x = x_m to x_M`
outer = [i._rebuild(limits=(i.dim.root.symbolic_min, i.dim.root.symbolic_max,
i.step))
for i in outer]

# Create the ElementalFunction
name = sregistry.make_name(prefix="bf")
body = compose_nodes(outer)
dynamic_parameters = flatten((i.symbolic_bounds, i.step) for i in outer)
dynamic_parameters.extend([i.step for i in inner if not is_integer(i.step)])
efunc = make_efunc(name, body, dynamic_parameters)

efuncs.append(efunc)

# Create the ElementalCalls
calls = []
for p in product(*ranges):
dynamic_args_mapper = {}
for i, (m, M, b) in zip(outer, p):
dynamic_args_mapper[i.symbolic_min] = m
dynamic_args_mapper[i.symbolic_max] = M
dynamic_args_mapper[i.step] = b
for j in inner:
if j.dim.root is i.dim.root and not is_integer(j.step):
value = j.step if b is i.step else b
dynamic_args_mapper[j.step] = (value,)
calls.append(efunc.make_call(dynamic_args_mapper))

mapper[root] = List(body=calls)

iet = Transformer(mapper).visit(iet)

return iet, {'efuncs': efuncs}
# Get root's `symbolic_max` out of each outer Dimension
FabioLuporini marked this conversation as resolved.
Show resolved Hide resolved
roots_max = {i.dim.root: i.symbolic_max for i in outer}

# A dictionary to map maximum of processed parent dimensions. Helps to neatly
# handle bounds in hierarchical blocking and SubDimensions
proc_parents_max = {}

# Process inner iterations and adjust their bounds
for n, i in enumerate(inner):
if i.dim.parent in proc_parents_max and i.symbolic_size == i.dim.parent.step:
# Use parent's Iteration max in hierarchical blocking
iter_max = proc_parents_max[i.dim.parent]
else:
# The Iteration's maximum is the MIN of (a) the `symbolic_max` of current
# Iteration e.g. `x0_blk0 + x0_blk0_size - 1` and (b) the `symbolic_max`
# of the current Iteration's root Dimension e.g. `x_M`. The generated
# maximum will be `MIN(x0_blk0 + x0_blk0_size - 1, x_M)

# In some corner cases an offset may be added (e.g. after CIRE passes)
# E.g. assume `i.symbolic_max = x0_blk0 + x0_blk0_size + 1` and
# `i.dim.symbolic_max = x0_blk0 + x0_blk0_size - 1` then the generated
# maximum will be `MIN(x0_blk0 + x0_blk0_size + 1, x_M + 2)`

root_max = roots_max[i.dim.root] + i.symbolic_max - i.dim.symbolic_max

try:
iter_max = (min(i.symbolic_max, root_max))
bool(iter_max) # Can it be evaluated?
except TypeError:
iter_max = MIN(i.symbolic_max, root_max)

proc_parents_max[i.dim] = iter_max

mapper[i] = i._rebuild(limits=(i.symbolic_min, iter_max, i.step))

iet = Transformer(mapper, nested=True).visit(iet)

headers = [('%s(a,b)' % MIN.name, ('(((a) < (b)) ? (a) : (b))')),
('%s(a,b)' % MAX.name, ('(((a) > (b)) ? (a) : (b))'))]

return iet, {'headers': headers}


def is_on_device(obj, gpu_fit):
Expand Down
19 changes: 6 additions & 13 deletions devito/passes/iet/parpragma.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
import numpy as np
import cgen as c
from sympy import And, Or, Max
from sympy import And, Max
georgebisbas marked this conversation as resolved.
Show resolved Hide resolved

from devito.data import FULL
from devito.ir import (DummyEq, Conditional, Dereference, Expression, ExpressionBundle,
List, ParallelTree, Prodder, FindSymbols, FindNodes, Return,
VECTORIZED, Transformer, IsPerfectIteration, filter_iterations,
retrieve_iteration_tree)
from devito.symbolics import CondEq, INT, ccode
from devito.ir import (Conditional, DummyEq, Dereference, Expression, ExpressionBundle,
FindSymbols, FindNodes, ParallelTree, Prodder, List, Transformer,
IsPerfectIteration, filter_iterations, retrieve_iteration_tree,
VECTORIZED)
from devito.passes.iet.engine import iet_pass
from devito.passes.iet.langbase import LangBB, LangTransformer, DeviceAwareMixin
from devito.passes.iet.misc import is_on_device
from devito.symbolics import INT, ccode
from devito.tools import as_tuple, prod
from devito.types import Symbol, NThreadsBase

Expand Down Expand Up @@ -274,13 +274,6 @@ def _make_parregion(self, partree, parrays):
return self.Region(partree)

def _make_guard(self, parregion):
FabioLuporini marked this conversation as resolved.
Show resolved Hide resolved
# Do not enter the parallel region if the step increment is 0; this
# would raise a `Floating point exception (core dumped)` in some OpenMP
# implementations. Note that using an OpenMP `if` clause won't work
cond = Or(*[CondEq(i.step, 0) for i in parregion.collapsed
if isinstance(i.step, Symbol)])
if cond != False: # noqa: `cond` may be a sympy.False which would be == False
parregion = List(body=[Conditional(cond, Return()), parregion])
return parregion

def _make_nested_partree(self, partree):
Expand Down
6 changes: 5 additions & 1 deletion devito/symbolics/extended_sympy.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
__all__ = ['CondEq', 'CondNe', 'IntDiv', 'FunctionFromPointer', 'FieldFromPointer',
'FieldFromComposite', 'ListInitializer', 'Byref', 'IndexedPointer',
'DefFunction', 'InlineIf', 'Macro', 'Literal', 'INT', 'FLOAT', 'DOUBLE',
'FLOOR', 'cast_mapper']
'FLOOR', 'MAX', 'MIN', 'cast_mapper']


class CondEq(sympy.Eq):
Expand Down Expand Up @@ -405,6 +405,10 @@ class Literal(sympy.Symbol):

FLOOR = Function('floor')

# Functions used for adjusting loop bounds
MAX = Function('MAX')
MIN = Function('MIN')

cast_mapper = {
np.int: INT,
np.int32: INT,
Expand Down