Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generalize Targets as Operators #1028

Merged
merged 16 commits into from Jan 16, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 3 additions & 4 deletions .github/workflows/documentation.yml
Expand Up @@ -2,7 +2,7 @@ name: Documentation

on:
push:
branches:
branches:
- master # Push events on master branch

jobs:
Expand All @@ -18,18 +18,17 @@ jobs:
python-version: '3.x'
- name: Install Sphinx
run: |
python -m pip install --upgrade pip
python -m pip install --upgrade pip
pip install sphinx sphinx_rtd_theme
pip install -e .

- name: Generate documentation
working-directory: docs
run: make html

- name: Deploy
uses: peaceiris/actions-gh-pages@v2
env:
PERSONAL_TOKEN: ${{ secrets.PERSONAL_TOKEN }}
PUBLISH_BRANCH: gh-pages
PUBLISH_DIR: ./docs/_build/html

8 changes: 3 additions & 5 deletions devito/__init__.py
Expand Up @@ -21,8 +21,7 @@
from devito.compiler import compiler_registry
from devito.dse import dse_registry
from devito.logger import error, warning, info, logger_registry, set_log_level # noqa
from devito.operator import profiler_registry
from devito.targets import targets_registry
from devito.operator import profiler_registry, operator_registry


from ._version import get_versions # noqa
Expand Down Expand Up @@ -97,9 +96,8 @@ def _at_callback(val): # noqa
configuration.add('dse', 'advanced', list(dse_registry))

# Setup DLE
# Note: for backwards compatibility, this config option is still called 'dle',
# instead of 'target' for example
configuration.add('dle', 'advanced', list(targets_registry))
# Note: for backwards compatibility, this config option is still called 'dle'
configuration.add('dle', 'advanced', list(operator_registry._accepted))
configuration.add('dle-options', {})

# Setup Operator profiling
Expand Down
37 changes: 19 additions & 18 deletions devito/core/__init__.py
Expand Up @@ -5,34 +5,35 @@
"""

from devito.archinfo import Cpu64, Intel64, Arm, Power, Device
from devito.core.cpu import (CPU64NoopOperator, CPU64Operator, Intel64Operator,
ArmOperator, PowerOperator, CustomOperator)
from devito.core.gpu import DeviceOffloadingOperator
from devito.operator.registry import operator_registry
from devito.parameters import Parameters, add_sub_configuration
from devito.targets import (CPU64Target, Intel64Target, ArmTarget, PowerTarget,
CPU64NoopTarget, CustomTarget, DeviceOffloadingTarget,
targets)

core_configuration = Parameters('core')
env_vars_mapper = {}
add_sub_configuration(core_configuration, env_vars_mapper)

# Add core-specific Targets
targets.add(CPU64NoopTarget, Cpu64, 'noop')
targets.add(CPU64Target, Cpu64, 'advanced')
targets.add(CustomTarget, Cpu64, 'custom')
# Add core-specific Operators
operator_registry.add(CPU64NoopOperator, Cpu64, 'noop')
operator_registry.add(CPU64Operator, Cpu64, 'advanced')
operator_registry.add(CustomOperator, Cpu64, 'custom')

targets.add(CPU64NoopTarget, Intel64, 'noop')
targets.add(Intel64Target, Intel64, 'advanced')
targets.add(CustomTarget, Intel64, 'custom')
operator_registry.add(CPU64NoopOperator, Intel64, 'noop')
operator_registry.add(Intel64Operator, Intel64, 'advanced')
operator_registry.add(CustomOperator, Intel64, 'custom')

targets.add(CPU64NoopTarget, Arm, 'noop')
targets.add(ArmTarget, Arm, 'advanced')
targets.add(CustomTarget, Arm, 'custom')
operator_registry.add(CPU64NoopOperator, Arm, 'noop')
operator_registry.add(ArmOperator, Arm, 'advanced')
operator_registry.add(CustomOperator, Arm, 'custom')

targets.add(CPU64NoopTarget, Power, 'noop')
targets.add(PowerTarget, Power, 'advanced')
targets.add(CustomTarget, Power, 'custom')
operator_registry.add(CPU64NoopOperator, Power, 'noop')
operator_registry.add(PowerOperator, Power, 'advanced')
operator_registry.add(CustomOperator, Power, 'custom')

targets.add(CPU64NoopTarget, Device, 'noop')
targets.add(DeviceOffloadingTarget, Device, 'advanced')
operator_registry.add(CPU64NoopOperator, Device, 'noop')
operator_registry.add(DeviceOffloadingOperator, Device, 'advanced')

# The following used by backends.backendSelector
from devito.core.operator import OperatorCore as Operator # noqa
Expand Down
2 changes: 1 addition & 1 deletion devito/core/autotuning.py
Expand Up @@ -9,8 +9,8 @@
from devito.mpi.distributed import MPI, MPINeighborhood
from devito.mpi.routines import MPIMsgEnriched
from devito.parameters import configuration
from devito.passes import BlockDimension
from devito.symbolics import evaluate
from devito.targets import BlockDimension
from devito.tools import filter_ordered, flatten, prod

__all__ = ['autotune']
Expand Down
128 changes: 128 additions & 0 deletions devito/core/cpu.py
@@ -0,0 +1,128 @@
from functools import partial

from devito.core.operator import OperatorCore
from devito.exceptions import InvalidOperator
from devito.passes import (DataManager, Blocker, Ompizer, avoid_denormals,
optimize_halospots, mpiize, loop_wrapping,
minimize_remainders, hoist_prodders)
from devito.tools import as_tuple

__all__ = ['CPU64NoopOperator', 'CPU64Operator', 'Intel64Operator', 'PowerOperator',
'ArmOperator', 'CustomOperator']


class CPU64NoopOperator(OperatorCore):

@classmethod
def _specialize_iet(cls, graph, **kwargs):
# Symbol definitions
data_manager = DataManager()
data_manager.place_definitions(graph)
data_manager.place_casts(graph)

return graph


class CPU64Operator(CPU64NoopOperator):

BLOCK_LEVELS = 1
"""
Loop blocking depth. So, 1 => "blocks", 2 => "blocks" and "sub-blocks",
3 => "blocks", "sub-blocks", and "sub-sub-blocks", ...
"""

@classmethod
def _specialize_iet(cls, graph, **kwargs):
options = kwargs['options']
platform = kwargs['platform']

# Flush denormal numbers
avoid_denormals(graph)

# Distributed-memory parallelism
optimize_halospots(graph)
if options['mpi']:
mpiize(graph, mode=options['mpi'])

# Tiling
blocker = Blocker(options['blockinner'],
options['blocklevels'] or cls.BLOCK_LEVELS)
blocker.make_blocking(graph)

# Shared-memory and SIMD-level parallelism
ompizer = Ompizer()
ompizer.make_simd(graph, simd_reg_size=platform.simd_reg_size)
if options['openmp']:
ompizer.make_parallel(graph)

# Misc optimizations
minimize_remainders(graph, simd_items_per_reg=platform.simd_items_per_reg)
hoist_prodders(graph)

# Symbol definitions
data_manager = DataManager()
data_manager.place_definitions(graph)
data_manager.place_casts(graph)

return graph


Intel64Operator = CPU64Operator
PowerOperator = CPU64Operator
ArmOperator = CPU64Operator


class CustomOperator(CPU64Operator):

@classmethod
def _make_passes_mapper(cls, **kwargs):
options = kwargs['options']
platform = kwargs['platform']

blocker = Blocker(options['blockinner'],
options['blocklevels'] or cls.BLOCK_LEVELS)

ompizer = Ompizer()

return {
'denormals': partial(avoid_denormals),
'optcomms': partial(optimize_halospots),
'wrapping': partial(loop_wrapping),
'blocking': partial(blocker.make_blocking),
'openmp': partial(ompizer.make_parallel),
'mpi': partial(mpiize, mode=options['mpi']),
'simd': partial(ompizer.make_simd, simd_reg_size=platform.simd_reg_size),
'minrem': partial(minimize_remainders,
simd_items_per_reg=platform.simd_items_per_reg),
'prodders': partial(hoist_prodders)
}

@classmethod
def _specialize_iet(cls, graph, **kwargs):
options = kwargs['options']
passes = as_tuple(kwargs['mode'])

# Fetch passes to be called
passes_mapper = cls._make_passes_mapper(**kwargs)

# Call passes
for i in passes:
try:
passes_mapper[i](graph)
except KeyError:
raise InvalidOperator("Unknown passes `%s`" % str(passes))

# Force-call `mpi` if requested via global option
if 'mpi' not in passes and options['mpi']:
passes_mapper['mpi'](graph)

# Force-call `openmp` if requested via global option
if 'openmp' not in passes and options['openmp']:
passes_mapper['openmp'](graph)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For mpi sure, but this one is by default to using it so maybe shouldn't if not asked for in the custom options?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I'm just replicating master's behavior ?

we only end up here when dle != noop/advanced , eg

Operator(..., dle=('blocking', 'simd', X))

all the code is doing is that if 'openmp' doesn't appear among X above, then it depends on the value of DEVITO_OPENMP (which is encoded in options['openmp'])


# Symbol definitions
data_manager = DataManager()
data_manager.place_definitions(graph)
data_manager.place_casts(graph)

return graph
42 changes: 21 additions & 21 deletions devito/targets/gpu.py → devito/core/gpu.py
@@ -1,12 +1,12 @@
import cgen as c

from devito.core.operator import OperatorCore
from devito.data import FULL
from devito.ir.support import COLLAPSED
from devito.targets.basic import Target
from devito.targets.common import (DataManager, Ompizer, ParallelTree,
optimize_halospots, mpiize, hoist_prodders)
from devito.passes import (DataManager, Ompizer, ParallelTree, optimize_halospots,
mpiize, hoist_prodders)

__all__ = ['DeviceOffloadingTarget']
__all__ = ['DeviceOffloadingOperator']


class OffloadingOmpizer(Ompizer):
Expand Down Expand Up @@ -131,27 +131,27 @@ def _map_function_on_high_bw_mem(self, obj, storage):
storage._high_bw_mem[obj] = (decl, alloc, free)


class DeviceOffloadingTarget(Target):
class DeviceOffloadingOperator(OperatorCore):

def __init__(self, params, platform):
super(DeviceOffloadingTarget, self).__init__(params, platform)
@classmethod
def _specialize_iet(cls, graph, **kwargs):
options = kwargs['options']

# Shared-memory parallelizer
self.ompizer = OffloadingOmpizer()
# Distributed-memory parallelism
optimize_halospots(graph)
if options['mpi']:
mpiize(graph, mode=options['mpi'])

# Data manager (declarations, definitions, movemented between
# host and device, ...)
self.data_manager = OffloadingDataManager()
# Shared-memory parallelism
if options['openmp']:
OffloadingOmpizer().make_parallel(graph)

def _pipeline(self, graph):
# Optimization and parallelism
optimize_halospots(graph)
if self.params['mpi']:
mpiize(graph, mode=self.params['mpi'])
if self.params['openmp']:
self.ompizer.make_parallel(graph)
# Misc optimizations
hoist_prodders(graph)

# Symbol definitions
self.data_manager.place_definitions(graph)
self.data_manager.place_casts(graph)
data_manager = OffloadingDataManager()
data_manager.place_definitions(graph)
data_manager.place_casts(graph)

return graph
2 changes: 1 addition & 1 deletion devito/core/operator.py
@@ -1,8 +1,8 @@
from devito.core.autotuning import autotune
from devito.ir.support import align_accesses
from devito.parameters import configuration
from devito.passes import NThreads
from devito.operator import Operator
from devito.targets import NThreads

__all__ = ['OperatorCore']

Expand Down
2 changes: 1 addition & 1 deletion devito/ir/support/properties.py
Expand Up @@ -32,7 +32,7 @@ def __init__(self, name, val=None):
"""
A modulo-N Dimension (i.e., cycling over i, i+1, i+2, ..., i+N-1) that could
safely be turned into a modulo-K Dimension, with K < N. For example:
u[t+1, ...] = f(u[t, ...], u[t-1, ...]) --> u[t-1, ...] = f(u[t, ...], u[t-1, ...]).
u[t+1, ...] = f(u[t, ...]) + u[t-1, ...] --> u[t+1, ...] = f(u[t, ...]) + u[t+1, ...].
"""

ROUNDABLE = Property('roundable')
Expand Down
1 change: 1 addition & 0 deletions devito/operator/__init__.py
@@ -1,2 +1,3 @@
from .operator import Operator # noqa
from .profiling import profiler_registry # noqa
from .registry import operator_registry # noqa