Skip to content

Commit

Permalink
Merge pull request #1762 from devitocodes/fix-issue-1761
Browse files Browse the repository at this point in the history
compiler: Patch and improve SubDomainSet
  • Loading branch information
FabioLuporini committed Oct 1, 2021
2 parents 587e0f6 + e34ec3d commit 74e7859
Show file tree
Hide file tree
Showing 11 changed files with 334 additions and 68 deletions.
8 changes: 7 additions & 1 deletion devito/core/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from devito.passes.equations import collect_derivatives
from devito.passes.clusters import (Lift, blocking, buffering, cire, cse,
extract_increments, factorize, fission, fuse,
optimize_pows)
optimize_pows, optimize_msds)
from devito.passes.iet import (CTarget, OmpTarget, avoid_denormals, linearize, mpiize,
optimize_halospots, hoist_prodders, relax_incr_dimensions)
from devito.tools import timed_pass
Expand Down Expand Up @@ -161,6 +161,9 @@ def _specialize_clusters(cls, clusters, **kwargs):
platform = kwargs['platform']
sregistry = kwargs['sregistry']

# Optimize MultiSubDomains
clusters = optimize_msds(clusters)

# Toposort+Fusion (the former to expose more fusion opportunities)
clusters = fuse(clusters, toposort=True)

Expand Down Expand Up @@ -247,6 +250,9 @@ def _specialize_clusters(cls, clusters, **kwargs):
platform = kwargs['platform']
sregistry = kwargs['sregistry']

# Optimize MultiSubDomains
clusters = optimize_msds(clusters)

# Toposort+Fusion (the former to expose more fusion opportunities)
clusters = fuse(clusters, toposort=True)

Expand Down
5 changes: 4 additions & 1 deletion devito/core/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from devito.passes.equations import collect_derivatives
from devito.passes.clusters import (Lift, Streaming, Tasker, blocking, buffering,
cire, cse, extract_increments, factorize,
fission, fuse, optimize_pows)
fission, fuse, optimize_pows, optimize_msds)
from devito.passes.iet import (DeviceOmpTarget, DeviceAccTarget, optimize_halospots,
mpiize, hoist_prodders, is_on_device, linearize)
from devito.tools import as_tuple, timed_pass
Expand Down Expand Up @@ -150,6 +150,9 @@ def _specialize_clusters(cls, clusters, **kwargs):
platform = kwargs['platform']
sregistry = kwargs['sregistry']

# Optimize MultiSubDomains
clusters = optimize_msds(clusters)

# Toposort+Fusion (the former to expose more fusion opportunities)
clusters = fuse(clusters, toposort=True, options=options)

Expand Down
6 changes: 4 additions & 2 deletions devito/ir/clusters/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,8 +286,10 @@ def rule(size, e):
exprs = [e.apply(func) for e in exprs]

# Augment IterationSpace
ispace = IterationSpace(c.ispace.intervals,
{**c.ispace.sub_iterators, **{d: tuple(mds)}},
sub_iterators = dict(c.ispace.sub_iterators)
sub_iterators[d] = tuple(i for i in sub_iterators[d] + tuple(mds)
if i not in subiters)
ispace = IterationSpace(c.ispace.intervals, sub_iterators,
c.ispace.directions)

processed.append(c.rebuild(exprs=exprs, ispace=ispace))
Expand Down
7 changes: 4 additions & 3 deletions devito/ir/equations/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def generate_implicit_exprs(expressions):
Currently, implicit expressions stem from the following:
* ``SubDomainSet``'s attached to input equations.
* MultiSubDomains attached to input equations.
"""
found = {}
processed = []
Expand All @@ -91,9 +91,9 @@ def generate_implicit_exprs(expressions):
try:
dims = [d.root for d in e.free_symbols if isinstance(d, Dimension)]
sub_dims = [d.root for d in e.subdomain.dimensions]
sub_dims.append(e.subdomain.implicit_dimension)
sub_dims.extend(e.subdomain.implicit_dimensions)
dims = [d for d in dims if d not in frozenset(sub_dims)]
dims.append(e.subdomain.implicit_dimension)
dims.extend(e.subdomain.implicit_dimensions)
if e.subdomain not in found:
grid = list(retrieve_functions(e, mode='unique'))[0].grid
found[e.subdomain] = [i.func(*i.args, implicit_dims=dims) for i in
Expand All @@ -103,6 +103,7 @@ def generate_implicit_exprs(expressions):
new_e = Eq(e.lhs, e.rhs, subdomain=e.subdomain, implicit_dims=dims)
processed.append(new_e)
except AttributeError:
# Not a MultiSubDomain
processed.append(e)
else:
processed.append(e)
Expand Down
9 changes: 9 additions & 0 deletions devito/ir/support/space.py
Original file line number Diff line number Diff line change
Expand Up @@ -815,6 +815,15 @@ def lift(self, d=None, v=None):

return IterationSpace(intervals, self.sub_iterators, self.directions)

def relaxed(self, dims):
f = lambda d: d in as_tuple(dims)

intervals = [i.relaxed if f(i.dim) else i for i in self.intervals]
sub_iterators = {k.root if f(k) else k: v for k, v in self.sub_iterators.items()}
directions = {k.root if f(k) else k: v for k, v in self.directions.items()}

return IterationSpace(intervals, sub_iterators, directions)

def is_compatible(self, other):
"""
A relaxed version of ``__eq__``, in which only non-derived dimensions
Expand Down
4 changes: 2 additions & 2 deletions devito/ir/support/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def build_iterators(mapper):
iterators = OrderedDict()
for k, v in mapper.items():
for d in v:
if d.is_Stepping:
if d.is_Stepping or d.is_Incr:
values = iterators.setdefault(d.root, [])
if d not in values:
values.append(d)
Expand All @@ -98,7 +98,7 @@ def build_intervals(stencil):
for d, offs in stencil.items():
if d.is_Stepping:
mapper[d.root].update(offs)
elif d.is_Conditional:
elif d.is_Conditional or d.is_Incr:
mapper[d.parent].update(offs)
elif d.is_Modulo:
mapper[d.root].update({d.offset - d.root + i for i in offs})
Expand Down
41 changes: 41 additions & 0 deletions devito/passes/clusters/aliases.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
split)
from devito.types import (Array, TempFunction, Eq, Symbol, ModuloDimension,
CustomDimension, IncrDimension, Indexed)
from devito.types.grid import MultiSubDimension

__all__ = ['cire']

Expand Down Expand Up @@ -108,6 +109,7 @@ def __init__(self, sregistry, options, platform):
self.opt_rotate = options['cire-rotate']
self.opt_ftemps = options['cire-ftemps']
self.opt_mingain = options['cire-mingain']
self.opt_multisubdomain = True

def _aliases_from_clusters(self, clusters, exclude, meta):
exprs = flatten([c.exprs for c in clusters])
Expand Down Expand Up @@ -140,6 +142,10 @@ def _aliases_from_clusters(self, clusters, exclude, meta):
# Schedule -> [Clusters]_k
processed, subs = lower_schedule(schedule, meta, self.sregistry, self.opt_ftemps)

# [Clusters]_k -> [Clusters]_k (optimization)
if self.opt_multisubdomain:
processed = optimize_clusters_msds(processed)

# [Clusters]_k -> [Clusters]_{k+n}
for c in clusters:
n = len(c.exprs)
Expand Down Expand Up @@ -315,6 +321,7 @@ def __init__(self, sregistry, options, platform):

self.opt_maxpar = options['cire-maxpar']
self.opt_schedule_strategy = options['cire-schedule']
self.opt_multisubdomain = False

def process(self, clusters):
processed = []
Expand Down Expand Up @@ -898,6 +905,40 @@ def lower_schedule(schedule, meta, sregistry, ftemps):
return clusters, subs


def optimize_clusters_msds(clusters):
"""
Relax the clusters by letting the expressions defined over MultiSubDomains to
rather be computed over the entire domain. This increases the likelihood of
code lifting by later passes.
"""
processed = []
for c in clusters:
msds = [d for d in c.ispace.itdimensions if isinstance(d, MultiSubDimension)]

if msds:
mapper = {d: d.root for d in msds}
exprs = [uxreplace(e, mapper) for e in c.exprs]

ispace = c.ispace.relaxed(msds)

accesses = detect_accesses(exprs)
parts = {k: IntervalGroup(build_intervals(v)).relaxed
for k, v in accesses.items() if k}
intervals = [i for i in c.dspace if i.dim not in msds]
dspace = DataSpace(intervals, parts)

guards = {mapper.get(d, d): v for d, v in c.guards.items()}
properties = {mapper.get(d, d): v for d, v in c.properties.items()}
syncs = {mapper.get(d, d): v for d, v in c.syncs.items()}

processed.append(c.rebuild(exprs=exprs, ispace=ispace, dspace=dspace,
guards=guards, properties=properties, syncs=syncs))
else:
processed.append(c)

return processed


def pick_best(variants, schedule_strategy, eval_variants_delta):
"""
Return the variant with the best trade-off between operation count
Expand Down
68 changes: 67 additions & 1 deletion devito/passes/clusters/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@
from devito.symbolics import pow_to_mul
from devito.tools import DAG, Stamp, as_tuple, flatten, frozendict, timed_pass
from devito.types import Symbol
from devito.types.grid import MultiSubDimension

__all__ = ['Lift', 'fuse', 'optimize_pows', 'extract_increments',
'fission']
'fission', 'optimize_msds']


class Lift(Queue):
Expand Down Expand Up @@ -365,3 +366,68 @@ def fission(clusters):
..
"""
return Fission().process(clusters)


class MSDOptimizer(Queue):

"""
Implement MultiSubDomains optimization.
Currently, the following optimizations are performed:
* Removal of redundant thicknesses assignments. These stem from Eqs
defined over the same MultiSubDomain in the very same loop nest.
The redundant assignments obviously do not impact correctness,
but they may affect other optimizations, such as fusion.
"""

def callback(self, clusters, prefix):
if not prefix or any(isinstance(i.dim, MultiSubDimension) for i in prefix):
return clusters

msds = {d for d in set().union(*[c.dimensions for c in clusters])
if isinstance(d, MultiSubDimension)}
if not msds:
return clusters

# Remove redundant thicknesses assignments

thicknesses = set().union(*[list(i._thickness_map) for i in msds])
candidates = [c for c in clusters if set(c.scope.writes) & thicknesses]

# First of all, make sure we analyze all and only the thicknesses assignments
# at the same depth
d = prefix[-1].dim
if any(c.itintervals[-1].dim is not d for c in candidates):
return clusters

# Then, attempt extirpation of redundancies
schedulable = set(thicknesses)
processed = []
for c in clusters:
if c in candidates:
exprs = []
for e in c.exprs:
try:
schedulable.remove(e.lhs)
exprs.append(e)
except KeyError:
# Already scheduled, no-op
pass
if exprs:
processed.append(c.rebuild(exprs=exprs))
else:
processed.append(c)

# Sanity check
assert len(schedulable) == 0

return processed


@timed_pass()
def optimize_msds(clusters):
"""
Optimize clusters defined over MultiSubDomains.
"""
return MSDOptimizer().process(clusters)

0 comments on commit 74e7859

Please sign in to comment.