Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Respect kernel.arguments in non-executable targets' lowered code #650

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions loopy/target/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@

from typing import (Any, Tuple, Generic, TypeVar, Sequence, ClassVar, Optional,
TYPE_CHECKING)
import abc

if TYPE_CHECKING:
from loopy.typing import ExpressionT
Expand Down Expand Up @@ -159,6 +160,17 @@ def get_kernel_executor(self, kernel, *args, **kwargs):
"""
raise NotImplementedError()

@abc.abstractproperty
def single_subkernel_is_entrypoint(self) -> bool:
r"""
Returns *True* if *self* does NOT support generating code for
linearized kernels with more than one
:class:`~loopy.schedule.CallKernel`\ s. This guarantees the
:class:`~loopy.schedule.CallKernel` for which we generate code is the
entrypoint kernel. This also allows the target to skip the invoker
level code.
"""


class ASTBuilderBase(Generic[ASTType]):
"""An interface for generating (host or device) ASTs.
Expand Down
17 changes: 16 additions & 1 deletion loopy/target/c/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -849,8 +849,15 @@ def get_function_declaration(

# subkernel launches occur only as part of entrypoint kernels for now
from loopy.schedule.tools import get_subkernel_arg_info
from loopy.kernel.tools import get_subkernels
skai = get_subkernel_arg_info(kernel, subkernel_name)
passed_names = skai.passed_names
if (self.target.single_subkernel_is_entrypoint
and len(get_subkernels(kernel)) > 1):
raise LoopyError(f"Kernel '{kernel.name}' has more than one"
f" subkernel, not allowed in {self.target}.")
passed_names = (skai.passed_names
if not self.target.single_subkernel_is_entrypoint
else [arg.name for arg in kernel.args])
written_names = skai.written_names
else:
name = Value("static void", name)
Expand Down Expand Up @@ -1342,6 +1349,10 @@ def get_dtype_registry(self):
fill_registry_with_c99_complex_types(result)
return DTypeRegistryWrapper(result)

@property
def single_subkernel_is_entrypoint(self) -> bool:
return True


class CASTBuilder(CFamilyASTBuilder):
def preamble_generators(self):
Expand Down Expand Up @@ -1385,6 +1396,10 @@ def get_host_ast_builder(self):
# enable host code generation
return CFamilyASTBuilder(self)

@property
def single_subkernel_is_entrypoint(self) -> bool:
return False

# }}}


Expand Down
4 changes: 4 additions & 0 deletions loopy/target/cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,10 @@ def vector_dtype(self, base, count):

# }}}

@property
def single_subkernel_is_entrypoint(self) -> bool:
return False

# }}}


Expand Down
11 changes: 9 additions & 2 deletions loopy/target/ispc.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,10 @@ def get_dtype_registry(self):

# }}}

@property
def single_subkernel_is_entrypoint(self) -> bool:
return True


class ISPCASTBuilder(CFamilyASTBuilder):
# {{{ top-level codegen
Expand All @@ -222,7 +226,9 @@ def get_function_declaration(
# subkernel launches occur only as part of entrypoint kernels for now
from loopy.schedule.tools import get_subkernel_arg_info
skai = get_subkernel_arg_info(codegen_state.kernel, subkernel_name)
passed_names = skai.passed_names
passed_names = ([arg.name for arg in kernel.args]
if self.target.single_subkernel_is_entrypoint
else skai.passed_names)
written_names = skai.written_names
else:
passed_names = [arg.name for arg in kernel.args]
Expand Down Expand Up @@ -263,7 +269,8 @@ def get_kernel_call(self, codegen_state: CodeGenerationState,
"assert(programCount == (%s))"
% ecm(lsize[0], PREC_NONE)))

if codegen_state.is_entrypoint:
if (codegen_state.is_entrypoint and
self.target.single_subkernel_is_entrypoint):
# subkernel launches occur only as part of entrypoint kernels for now
from loopy.schedule.tools import get_subkernel_arg_info
skai = get_subkernel_arg_info(codegen_state.kernel, subkernel_name)
Expand Down
4 changes: 4 additions & 0 deletions loopy/target/opencl.py
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,10 @@ def is_vector_dtype(self, dtype):
def vector_dtype(self, base, count):
return NumpyType(vec.types[base.numpy_dtype, count])

@property
def single_subkernel_is_entrypoint(self) -> bool:
return False

# }}}


Expand Down
4 changes: 4 additions & 0 deletions loopy/target/pyopencl.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,6 +645,10 @@ def with_device(self, device):
"stop working in 2022.", DeprecationWarning, stacklevel=2)
return self

@property
def single_subkernel_is_entrypoint(self) -> bool:
return False

# }}}


Expand Down
18 changes: 18 additions & 0 deletions test/test_target.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,24 @@ def test_passing_bajillions_of_svm_args(ctx_factory, with_gbarrier):
assert (res[f"c{iargset}"].get() == iargset * multiplier + iargset).all()


def test_non_executable_targets_respect_args():
# See https://github.com/inducer/loopy/issues/648
t_unit = lp.make_kernel(
"{ : }",
"""
a[0] = 1729
""",
[lp.GlobalArg("a,b,c,d,e",
shape=(10,),
dtype="float64")],
target=lp.CTarget()
)
code_str = lp.generate_code_v2(t_unit).device_code()

for var in ["b", "c", "d", "e"]:
assert code_str.find(f"double const *__restrict__ {var}") != -1


if __name__ == "__main__":
if len(sys.argv) > 1:
exec(sys.argv[1])
Expand Down