From 6745d1ab2b6868ddf4590098aa02ba28e0efd37e Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 9 Jul 2024 11:31:55 -0500 Subject: [PATCH] Enable ruff.isort, apply fixes --- contrib/c-integer-semantics.py | 3 +- contrib/mem-pattern-explorer/pattern_vis.py | 5 +- doc/conf.py | 3 + examples/fortran/matmul-driver.py | 2 + examples/python/call-external.py | 3 +- examples/python/find-centers.py | 2 + examples/python/global_barrier_removal.py | 6 + examples/python/hello-loopy.py | 5 +- examples/python/ispc-stream-harness.py | 17 +- examples/python/rank-one.py | 2 + examples/python/sparse.py | 1 + examples/python/vector-types.py | 2 + loopy/__init__.py | 317 ++++++++++++-------- loopy/__main__.py | 2 + loopy/auto_test.py | 23 +- loopy/check.py | 137 +++++---- loopy/cli.py | 5 +- loopy/codegen/__init__.py | 60 ++-- loopy/codegen/bounds.py | 5 +- loopy/codegen/control.py | 41 ++- loopy/codegen/instruction.py | 23 +- loopy/codegen/loop.py | 24 +- loopy/codegen/result.py | 15 +- loopy/codegen/tools.py | 20 +- loopy/expression.py | 5 +- loopy/frontend/fortran/__init__.py | 23 +- loopy/frontend/fortran/expression.py | 27 +- loopy/frontend/fortran/translator.py | 36 ++- loopy/frontend/fortran/tree.py | 2 +- loopy/ipython_ext.py | 2 +- loopy/isl_helpers.py | 23 +- loopy/kernel/__init__.py | 79 +++-- loopy/kernel/array.py | 43 ++- loopy/kernel/creation.py | 75 +++-- loopy/kernel/data.py | 51 ++-- loopy/kernel/function_interface.py | 32 +- loopy/kernel/instruction.py | 31 +- loopy/kernel/tools.py | 62 ++-- loopy/library/function.py | 8 +- loopy/library/random123.py | 6 +- loopy/library/reduction.py | 11 +- loopy/loop.py | 1 + loopy/match.py | 9 +- loopy/options.py | 5 +- loopy/preprocess.py | 89 +++--- loopy/schedule/__init__.py | 47 ++- loopy/schedule/device_mapping.py | 9 +- loopy/schedule/tools.py | 34 ++- loopy/statistics.py | 71 +++-- loopy/symbolic.py | 89 +++--- loopy/target/__init__.py | 18 +- loopy/target/c/__init__.py | 72 +++-- loopy/target/c/c_execution.py | 32 +- loopy/target/c/codegen/expression.py | 41 +-- loopy/target/cuda.py | 31 +- loopy/target/execution.py | 46 +-- loopy/target/ispc.py | 42 +-- loopy/target/opencl.py | 40 +-- loopy/target/pyopencl.py | 74 +++-- loopy/target/pyopencl_execution.py | 15 +- loopy/target/python.py | 16 +- loopy/tools.py | 43 ++- loopy/transform/add_barrier.py | 3 +- loopy/transform/arithmetic.py | 22 +- loopy/transform/array_buffer_map.py | 13 +- loopy/transform/batch.py | 4 +- loopy/transform/buffer.py | 30 +- loopy/transform/callable.py | 40 ++- loopy/transform/concatenate.py | 10 +- loopy/transform/data.py | 33 +- loopy/transform/diff.py | 8 +- loopy/transform/fusion.py | 21 +- loopy/transform/iname.py | 36 +-- loopy/transform/instruction.py | 10 +- loopy/transform/pack_and_unpack_args.py | 15 +- loopy/transform/padding.py | 7 +- loopy/transform/parameter.py | 8 +- loopy/transform/precompute.py | 57 ++-- loopy/transform/privatize.py | 7 +- loopy/transform/realize_reduction.py | 49 +-- loopy/transform/save.py | 36 ++- loopy/transform/subst.py | 28 +- loopy/translation_unit.py | 33 +- loopy/type_inference.py | 39 ++- loopy/types.py | 5 +- loopy/typing.py | 3 +- loopy/version.py | 2 + proto-tests/test_fem_assembly.py | 8 +- proto-tests/test_sem.py | 7 +- proto-tests/test_sem_tim.py | 7 +- proto-tests/test_tim.py | 7 +- pyproject.toml | 9 +- setup.py | 8 +- test/library_for_test.py | 3 +- test/test_apps.py | 14 +- test/test_c_execution.py | 14 +- test/test_callables.py | 23 +- test/test_dg.py | 11 +- test/test_diff.py | 12 +- test/test_domain.py | 14 +- test/test_einsum.py | 11 +- test/test_expression.py | 20 +- test/test_fortran.py | 14 +- test/test_fusion.py | 8 +- test/test_isl.py | 3 +- test/test_linalg.py | 15 +- test/test_loopy.py | 36 ++- test/test_misc.py | 7 +- test/test_nbody.py | 12 +- test/test_numa_diff.py | 22 +- test/test_reduction.py | 14 +- test/test_scan.py | 14 +- test/test_sem_reagan.py | 8 +- test/test_split_iname_slabs.py | 11 +- test/test_statistics.py | 23 +- test/test_target.py | 26 +- test/test_transform.py | 18 +- test/testlib.py | 3 +- 118 files changed, 1812 insertions(+), 1172 deletions(-) diff --git a/contrib/c-integer-semantics.py b/contrib/c-integer-semantics.py index 38b5b673e..8556430d0 100644 --- a/contrib/c-integer-semantics.py +++ b/contrib/c-integer-semantics.py @@ -1,7 +1,8 @@ #!/usr/bin/env python -from os import system import ctypes +from os import system + C_SRC = """ #include diff --git a/contrib/mem-pattern-explorer/pattern_vis.py b/contrib/mem-pattern-explorer/pattern_vis.py index 54609acd7..82a2b9602 100644 --- a/contrib/mem-pattern-explorer/pattern_vis.py +++ b/contrib/mem-pattern-explorer/pattern_vis.py @@ -1,5 +1,6 @@ import numpy as np + # Inspired by a visualization used in the Halide tutorial # https://www.youtube.com/watch?v=3uiEyEKji0M @@ -9,8 +10,8 @@ def div_ceil(nr, dr): def product(iterable): - from operator import mul from functools import reduce + from operator import mul return reduce(mul, iterable, 1) @@ -42,8 +43,8 @@ def nsubgroups(self): return div_ceil(product(self.lsize), self.subgroup_size) def animate(self, f, interval=200): - import matplotlib.pyplot as plt import matplotlib.animation as animation + import matplotlib.pyplot as plt fig = plt.figure() diff --git a/doc/conf.py b/doc/conf.py index d31748d6d..2369d8b59 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -1,6 +1,7 @@ import os from urllib.request import urlopen + _conf_url = "https://raw.githubusercontent.com/inducer/sphinxconfig/main/sphinxconfig.py" # noqa with urlopen(_conf_url) as _inf: exec(compile(_inf.read(), _conf_url, "exec"), globals()) @@ -44,6 +45,8 @@ # this needs a setting of the same name across all packages involved, that's # why this name is as global-sounding as it is. import sys + + sys._BUILDING_SPHINX_DOCS = True nitpicky = True diff --git a/examples/fortran/matmul-driver.py b/examples/fortran/matmul-driver.py index 1f8a09d4b..a68488ef6 100644 --- a/examples/fortran/matmul-driver.py +++ b/examples/fortran/matmul-driver.py @@ -1,8 +1,10 @@ import numpy as np import numpy.linalg as la + import pyopencl as cl import pyopencl.array import pyopencl.clrandom + import loopy as lp diff --git a/examples/python/call-external.py b/examples/python/call-external.py index 6694daf5e..ad5615c7b 100644 --- a/examples/python/call-external.py +++ b/examples/python/call-external.py @@ -1,5 +1,6 @@ -import loopy as lp import numpy as np + +import loopy as lp from loopy.diagnostic import LoopyError from loopy.target.c import CTarget from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 # noqa: F401 diff --git a/examples/python/find-centers.py b/examples/python/find-centers.py index 1a7e72391..47ad46289 100644 --- a/examples/python/find-centers.py +++ b/examples/python/find-centers.py @@ -1,9 +1,11 @@ import numpy as np + import pyopencl as cl import loopy as lp from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 # noqa: F401 + cl_ctx = cl.create_some_context() knl = lp.make_kernel( diff --git a/examples/python/global_barrier_removal.py b/examples/python/global_barrier_removal.py index d2e3f0f82..a64be1f2e 100644 --- a/examples/python/global_barrier_removal.py +++ b/examples/python/global_barrier_removal.py @@ -1,7 +1,9 @@ import numpy as np + import loopy as lp from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 # noqa: F401 + knl = lp.make_kernel( "{ [i,k]: 0<=i None: @for_each_kernel def check_offsets_and_dim_tags(kernel: LoopKernel) -> None: + from pymbolic.primitives import Expression, Variable + from loopy.symbolic import DependencyMapper - from pymbolic.primitives import Variable, Expression arg_name_vars = {Variable(name) for name in kernel.arg_dict} dep_mapper = DependencyMapper() @@ -334,8 +345,7 @@ def check_for_integer_subscript_indices(t_unit): """ Checks if every array access is of type :class:`int`. """ - from loopy.kernel.function_interface import (CallableKernel, - ScalarCallable) + from loopy.kernel.function_interface import CallableKernel, ScalarCallable for clbl in t_unit.callables_table.values(): if isinstance(clbl, CallableKernel): _check_for_integer_subscript_indices_inner(clbl.subkernel, @@ -435,8 +445,15 @@ def check_multiple_tags_allowed(kernel): """ Checks if a multiple tags of an iname are compatible. """ - from loopy.kernel.data import (GroupInameTag, LocalInameTag, VectorizeTag, - UnrollTag, ForceSequentialTag, IlpBaseTag, filter_iname_tags_by_type) + from loopy.kernel.data import ( + ForceSequentialTag, + GroupInameTag, + IlpBaseTag, + LocalInameTag, + UnrollTag, + VectorizeTag, + filter_iname_tags_by_type, + ) illegal_combinations = [ (GroupInameTag, LocalInameTag, VectorizeTag, UnrollTag, ForceSequentialTag), (IlpBaseTag, ForceSequentialTag) @@ -449,7 +466,7 @@ def check_multiple_tags_allowed(kernel): def _check_for_double_use_of_hw_axes_inner(kernel, callables_table): - from loopy.kernel.data import UniqueInameTag, GroupInameTag, LocalInameTag + from loopy.kernel.data import GroupInameTag, LocalInameTag, UniqueInameTag from loopy.kernel.instruction import CallInstruction from loopy.symbolic import ResolvedFunction @@ -477,8 +494,7 @@ def check_for_double_use_of_hw_axes(t_unit): Check if any instruction of *kernel* is within multiple inames tagged with the same hw axis tag. """ - from loopy.kernel.function_interface import (CallableKernel, - ScalarCallable) + from loopy.kernel.function_interface import CallableKernel, ScalarCallable for clbl in t_unit.callables_table.values(): if isinstance(clbl, CallableKernel): _check_for_double_use_of_hw_axes_inner(clbl.subkernel, @@ -525,8 +541,13 @@ def check_for_unused_inames(kernel): def _is_racing_iname_tag(tv, tag): - from loopy.kernel.data import (AddressSpace, - LocalInameTagBase, GroupInameTag, ConcurrentTag, auto) + from loopy.kernel.data import ( + AddressSpace, + ConcurrentTag, + GroupInameTag, + LocalInameTagBase, + auto, + ) if tv.address_space == AddressSpace.PRIVATE: return ( @@ -646,9 +667,10 @@ def _align_and_intersect_with_caller_assumption(callee_assumptions, def _mark_variables_from_caller(expr): - from loopy.symbolic import SubstitutionMapper import pymbolic.primitives as prim + from loopy.symbolic import SubstitutionMapper + def subst_func(x): if isinstance(x, prim.Variable): return prim.Variable(f"_lp_caller_{x.name}") @@ -671,8 +693,7 @@ def _make_slab(self, space, iname, start, stop): @memoize_method def _get_access_range(self, domain, subscript): - from loopy.symbolic import (get_access_map, - UnableToDetermineAccessRangeError) + from loopy.symbolic import UnableToDetermineAccessRangeError, get_access_map try: return get_access_map(domain, subscript).range() except UnableToDetermineAccessRangeError: @@ -757,11 +778,13 @@ def map_call(self, expr, domain, insn_id): super().map_call(expr, domain, insn_id) import pymbolic.primitives as prim - from loopy.kernel.function_interface import (CallableKernel, - get_kw_pos_association) - from loopy.symbolic import (guarded_aff_from_expr, - get_dependencies) + from loopy.diagnostic import ExpressionToAffineConversionError + from loopy.kernel.function_interface import ( + CallableKernel, + get_kw_pos_association, + ) + from loopy.symbolic import get_dependencies, guarded_aff_from_expr if (isinstance(expr.function, ResolvedFunction) and isinstance(self.callables_table[expr.function.name], @@ -919,8 +942,10 @@ def check_write_destinations(kernel): @for_each_kernel def check_has_schedulable_iname_nesting(kernel): - from loopy.transform.iname import (has_schedulable_iname_nesting, - get_iname_duplication_options) + from loopy.transform.iname import ( + get_iname_duplication_options, + has_schedulable_iname_nesting, + ) if not has_schedulable_iname_nesting(kernel): import itertools as it opt = get_iname_duplication_options(kernel) @@ -964,7 +989,7 @@ def declares_nosync_with(kernel, var_address_space, dep_a, dep_b): def _get_address_space(kernel, var): - from loopy.kernel.data import ValueArg, AddressSpace, ArrayArg + from loopy.kernel.data import AddressSpace, ArrayArg, ValueArg if var in kernel.temporary_variables: address_space = kernel.temporary_variables[var].address_space else: @@ -989,6 +1014,7 @@ def _get_topological_order(kernel): :class:`loopy.diagnostic.DependencyCycleFound` exception. """ from pytools.graph import compute_sccs + from loopy.diagnostic import DependencyCycleFound dep_map = {insn.id: insn.depends_on for insn in kernel.instructions} @@ -1303,9 +1329,16 @@ def check_for_nested_base_storage(kernel: LoopKernel) -> None: def _check_for_unused_hw_axes_in_kernel_chunk(kernel, callables_table, sched_index=None): - from loopy.schedule import (CallKernel, RunInstruction, - Barrier, EnterLoop, LeaveLoop, ReturnFromKernel, - get_insn_ids_for_block_at, gather_schedule_block) + from loopy.schedule import ( + Barrier, + CallKernel, + EnterLoop, + LeaveLoop, + ReturnFromKernel, + RunInstruction, + gather_schedule_block, + get_insn_ids_for_block_at, + ) if sched_index is None: group_axes = set() @@ -1329,8 +1362,7 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, callables_table, # alternative: just disregard length-1 dimensions? - from loopy.kernel.data import (LocalInameTag, AutoLocalInameTagBase, - GroupInameTag) + from loopy.kernel.data import AutoLocalInameTagBase, GroupInameTag, LocalInameTag while i < loop_end_i: sched_item = kernel.linearization[i] @@ -1462,7 +1494,9 @@ def check_that_temporaries_are_defined_in_subkernels_where_used(kernel): locally_defined_base_storage = set() from loopy.schedule.tools import ( - temporaries_written_in_subkernel, temporaries_read_in_subkernel) + temporaries_read_in_subkernel, + temporaries_written_in_subkernel, + ) for temporary in temporaries_written_in_subkernel(kernel, subkernel): tval = kernel.temporary_variables[temporary] @@ -1526,10 +1560,10 @@ def check_that_all_insns_are_scheduled(kernel): # {{{ check that shapes and strides are arguments def check_that_shapes_and_strides_are_arguments(kernel): - from loopy.kernel.data import ValueArg + import loopy as lp from loopy.kernel.array import ArrayBase, FixedStrideArrayDimTag + from loopy.kernel.data import ValueArg from loopy.symbolic import get_dependencies - import loopy as lp integer_arg_names = { arg.name @@ -1601,9 +1635,9 @@ def _are_sub_array_refs_equivalent(sar1, sar2, caller): != _get_sub_array_ref_swept_range(caller, sar2)): return False - from loopy.symbolic import SubstitutionMapper from pymbolic.mapper.substitutor import make_subst_func - from loopy.symbolic import simplify_via_aff + + from loopy.symbolic import SubstitutionMapper, simplify_via_aff subst_func = make_subst_func({iname1.name: iname2 for iname1, iname2 in zip(sar1.swept_inames, sar2.swept_inames) @@ -1621,8 +1655,8 @@ def _are_sub_array_refs_equivalent(sar1, sar2, caller): def _validate_kernel_call_insn(caller, call_insn, callee): assert call_insn.expression.function.name == callee.name - from loopy.symbolic import SubArrayRef from loopy.kernel.array import ArrayBase + from loopy.symbolic import SubArrayRef arg_id_to_arg = call_insn.arg_id_to_arg() @@ -1671,6 +1705,7 @@ def _validate_kernel_call_insn(caller, call_insn, callee): def _validate_kernel_call_sites_inner(kernel, callables): from pymbolic.primitives import Call + from loopy.kernel.function_interface import CallableKernel for insn in kernel.instructions: @@ -1764,9 +1799,7 @@ def pre_codegen_checks(t_unit): # {{{ sanity-check for implemented domains of each instruction def check_implemented_domains(kernel, implemented_domains, code=None): - from islpy import dim_type - - from islpy import align_two + from islpy import align_two, dim_type last_idomains = None last_insn_inames = None @@ -1799,8 +1832,8 @@ def check_implemented_domains(kernel, implemented_domains, code=None): (insn_impl_domain & assumptions) .project_out_except(insn_inames, [dim_type.set])) - from loopy.kernel.instruction import BarrierInstruction from loopy.kernel.data import LocalInameTag + from loopy.kernel.instruction import BarrierInstruction if isinstance(insn, BarrierInstruction): # project out local-id-mapped inames, solves #94 on gitlab non_lid_inames = frozenset(iname for iname in insn_inames diff --git a/loopy/cli.py b/loopy/cli.py index 4f2804f39..69c35fcea 100644 --- a/loopy/cli.py +++ b/loopy/cli.py @@ -1,8 +1,9 @@ import sys -import loopy as lp import numpy as np +import loopy as lp + def to_python_literal(value): try: @@ -123,8 +124,8 @@ def main(): if lang == "loopy": # {{{ path wrangling - from os.path import dirname, abspath from os import getcwd + from os.path import abspath, dirname infile_dirname = dirname(args.infile) if infile_dirname: diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index 5977ade11..53c998233 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -20,46 +20,54 @@ THE SOFTWARE. """ +import logging import sys -from immutables import Map -from typing import (Set, Mapping, Sequence, Any, FrozenSet, Union, - Optional, Tuple, TYPE_CHECKING) from dataclasses import dataclass, replace -import logging +from typing import ( + TYPE_CHECKING, + Any, + FrozenSet, + Mapping, + Optional, + Sequence, + Set, + Tuple, + Union, +) + +from immutables import Map from loopy.codegen.result import CodeGenerationResult from loopy.translation_unit import CallablesTable, TranslationUnit + + logger = logging.getLogger(__name__) +from functools import reduce + import islpy as isl +from pytools import ProcessLogger, UniqueNameGenerator +from pytools.persistent_dict import WriteOncePersistentDict from loopy.diagnostic import LoopyError, warn -from pytools import UniqueNameGenerator - -from pytools.persistent_dict import WriteOncePersistentDict +from loopy.kernel import LoopKernel +from loopy.kernel.function_interface import CallableKernel +from loopy.symbolic import CombineMapper +from loopy.target import TargetBase from loopy.tools import LoopyKeyBuilder, caches -from loopy.version import DATA_MODEL_VERSION from loopy.types import LoopyType from loopy.typing import ExpressionT -from loopy.kernel import LoopKernel -from loopy.target import TargetBase - - -from loopy.symbolic import CombineMapper -from functools import reduce - -from loopy.kernel.function_interface import CallableKernel +from loopy.version import DATA_MODEL_VERSION -from pytools import ProcessLogger if TYPE_CHECKING: - from loopy.codegen.tools import CodegenOperationCacheManager from loopy.codegen.result import GeneratedProgram + from loopy.codegen.tools import CodegenOperationCacheManager if getattr(sys, "_BUILDING_SPHINX_DOCS", False): - from loopy.codegen.tools import CodegenOperationCacheManager # noqa: F811 from loopy.codegen.result import GeneratedProgram # noqa: F811 + from loopy.codegen.tools import CodegenOperationCacheManager # noqa: F811 __doc__ = """ @@ -476,9 +484,11 @@ def diverge_callee_entrypoints(program): If a :class:`loopy.kernel.function_interface.CallableKernel` is both an entrypoint and a callee, then rename the callee. """ - from loopy.translation_unit import (get_reachable_resolved_callable_ids, - rename_resolved_functions_in_a_single_kernel, - make_callable_name_generator) + from loopy.translation_unit import ( + get_reachable_resolved_callable_ids, + make_callable_name_generator, + rename_resolved_functions_in_a_single_kernel, + ) callable_ids = get_reachable_resolved_callable_ids(program.callables_table, program.entrypoints) @@ -568,12 +578,10 @@ def all_code(self): def generate_code_v2(t_unit: TranslationUnit) -> CodeGenerationResult: - from loopy.kernel import LoopKernel - from loopy.translation_unit import make_program - # {{{ cache retrieval - from loopy import CACHING_ENABLED + from loopy.kernel import LoopKernel + from loopy.translation_unit import make_program if CACHING_ENABLED: input_t_unit = t_unit diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index e043f8f12..a066d3425 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -22,10 +22,11 @@ from typing import FrozenSet + import islpy as isl from islpy import dim_type -from loopy.codegen.tools import CodegenOperationCacheManager +from loopy.codegen.tools import CodegenOperationCacheManager from loopy.kernel import LoopKernel @@ -81,8 +82,8 @@ def get_usable_inames_for_conditional( # - local indices may not be used in conditionals that cross barriers. # - ILP indices and vector lane indices are not available in loop # bounds, they only get defined at the innermost level of nesting. + from loopy.kernel.data import IlpBaseTag, LocalInameTagBase, VectorizeTag from loopy.schedule import find_used_inames_within - from loopy.kernel.data import VectorizeTag, LocalInameTagBase, IlpBaseTag usable_concurrent_inames_in_subkernel = frozenset( iname for iname in concurrent_inames_in_subkernel if (not (kernel.iname_tags_of_type(iname, LocalInameTagBase) diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index 29a7d6d72..bee09229f 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -23,14 +23,21 @@ THE SOFTWARE. """ -import islpy as isl from functools import partial +import islpy as isl + from loopy.codegen.result import merge_codegen_results, wrap_in_if -from loopy.schedule import ( - EnterLoop, LeaveLoop, RunInstruction, Barrier, CallKernel, - gather_schedule_block, generate_sub_sched_items) from loopy.diagnostic import LoopyError +from loopy.schedule import ( + Barrier, + CallKernel, + EnterLoop, + LeaveLoop, + RunInstruction, + gather_schedule_block, + generate_sub_sched_items, +) def generate_code_for_sched_index(codegen_state, sched_index): @@ -40,7 +47,7 @@ def generate_code_for_sched_index(codegen_state, sched_index): if isinstance(sched_item, CallKernel): assert not codegen_state.is_generating_device_code - from loopy.schedule import (gather_schedule_block, get_insn_ids_for_block_at) + from loopy.schedule import gather_schedule_block, get_insn_ids_for_block_at _, past_end_i = gather_schedule_block(kernel.linearization, sched_index) assert past_end_i <= codegen_state.schedule_index_end @@ -71,18 +78,26 @@ def generate_code_for_sched_index(codegen_state, sched_index): return codegen_result elif isinstance(sched_item, EnterLoop): - from loopy.kernel.data import (UnrolledIlpTag, UnrollTag, - ForceSequentialTag, LoopedIlpTag, VectorizeTag, - InameImplementationTag, UnrollHintTag, - InOrderSequentialSequentialTag, filter_iname_tags_by_type) + from loopy.kernel.data import ( + ForceSequentialTag, + InameImplementationTag, + InOrderSequentialSequentialTag, + LoopedIlpTag, + UnrolledIlpTag, + UnrollHintTag, + UnrollTag, + VectorizeTag, + filter_iname_tags_by_type, + ) tags = kernel.iname_tags_of_type(sched_item.iname, InameImplementationTag) tags = tuple(tag for tag in tags if tag) from loopy.codegen.loop import ( - generate_unroll_loop, - generate_vectorize_loop, - generate_sequential_loop_dim_code) + generate_sequential_loop_dim_code, + generate_unroll_loop, + generate_vectorize_loop, + ) if filter_iname_tags_by_type(tags, (UnrollTag, UnrolledIlpTag)): func = generate_unroll_loop @@ -250,8 +265,8 @@ class ScheduleIndexInfo(ImmutableRecord): .. attribute:: used_inames_within """ - from loopy.schedule import find_used_inames_within from loopy.codegen.bounds import get_usable_inames_for_conditional + from loopy.schedule import find_used_inames_within sched_index_info_entries = [ ScheduleIndexInfo( diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py index 0d13f9c9d..1bc26733e 100644 --- a/loopy/codegen/instruction.py +++ b/loopy/codegen/instruction.py @@ -25,12 +25,15 @@ import islpy as isl + + dim_type = isl.dim_type -from loopy.codegen import UnvectorizableError -from loopy.codegen.result import CodeGenerationResult from pymbolic.mapper.stringifier import PREC_NONE from pytools import memoize_on_first_arg +from loopy.codegen import UnvectorizableError +from loopy.codegen.result import CodeGenerationResult + # These 'id' arguments are here because Set has a __hash__ supplied by isl, # which ignores names. This may lead to incorrect things being returned from @@ -76,8 +79,8 @@ def to_codegen_result( required_preds - codegen_state.implemented_predicates) if condition_exprs: - from pymbolic.primitives import LogicalAnd from pymbolic.mapper.stringifier import PREC_NONE + from pymbolic.primitives import LogicalAnd ast = codegen_state.ast_builder.emit_if( codegen_state.expression_to_code_mapper( LogicalAnd(tuple(condition_exprs)), PREC_NONE), @@ -91,7 +94,10 @@ def generate_instruction_code(codegen_state, insn): kernel = codegen_state.kernel from loopy.kernel.instruction import ( - Assignment, CallInstruction, CInstruction, NoOpInstruction + Assignment, + CallInstruction, + CInstruction, + NoOpInstruction, ) if isinstance(insn, Assignment): @@ -147,7 +153,8 @@ def generate_assignment_instruction_code(codegen_state, insn): # }}} - from pymbolic.primitives import Variable, Subscript, Lookup + from pymbolic.primitives import Lookup, Subscript, Variable + from loopy.symbolic import LinearSubscript lhs = insn.assignee @@ -272,10 +279,10 @@ def generate_c_instruction_code(codegen_state, insn): body = [] - from loopy.target.c import POD - from cgen import Initializer, Block, Line - + from cgen import Block, Initializer, Line from pymbolic.primitives import Variable + + from loopy.target.c import POD for name, iname_expr in insn.iname_exprs: if (isinstance(iname_expr, Variable) and name not in codegen_state.var_subst_map): diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index e9230c867..645a57e31 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -21,13 +21,14 @@ """ -from loopy.diagnostic import warn, LoopyError -from loopy.codegen.result import merge_codegen_results import islpy as isl from islpy import dim_type -from loopy.codegen.control import build_loop_nest from pymbolic.mapper.stringifier import PREC_NONE +from loopy.codegen.control import build_loop_nest +from loopy.codegen.result import merge_codegen_results +from loopy.diagnostic import LoopyError, warn + # {{{ conditional-reducing slab decomposition @@ -125,8 +126,7 @@ def generate_unroll_loop(codegen_state, sched_index): bounds = kernel.get_iname_bounds(iname, constants_only=True) - from loopy.isl_helpers import ( - static_max_of_pw_aff, static_value_of_pw_aff) + from loopy.isl_helpers import static_max_of_pw_aff, static_value_of_pw_aff from loopy.symbolic import pw_aff_to_expr length_aff = static_max_of_pw_aff(bounds.size, constants_only=True) @@ -167,8 +167,7 @@ def generate_vectorize_loop(codegen_state, sched_index): bounds = kernel.get_iname_bounds(iname, constants_only=True) - from loopy.isl_helpers import ( - static_max_of_pw_aff, static_value_of_pw_aff) + from loopy.isl_helpers import static_max_of_pw_aff, static_value_of_pw_aff from loopy.symbolic import pw_aff_to_expr length_aff = static_max_of_pw_aff(bounds.size, constants_only=True) @@ -232,9 +231,14 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, hw_inames_left=None): kernel = codegen_state.kernel - from loopy.kernel.data import (UniqueInameTag, HardwareConcurrentTag, - LocalInameTag, GroupInameTag, VectorizeTag, InameImplementationTag) - + from loopy.kernel.data import ( + GroupInameTag, + HardwareConcurrentTag, + InameImplementationTag, + LocalInameTag, + UniqueInameTag, + VectorizeTag, + ) from loopy.schedule import get_insn_ids_for_block_at insn_ids_for_block = get_insn_ids_for_block_at(kernel.linearization, schedule_index) diff --git a/loopy/codegen/result.py b/loopy/codegen/result.py index bcd3d684a..7fcb4294a 100644 --- a/loopy/codegen/result.py +++ b/loopy/codegen/result.py @@ -20,9 +20,18 @@ THE SOFTWARE. """ -from typing import (Any, Sequence, Mapping, Tuple, Optional, TYPE_CHECKING, Union, - Dict, List) from dataclasses import dataclass, replace +from typing import ( + TYPE_CHECKING, + Any, + Dict, + List, + Mapping, + Optional, + Sequence, + Tuple, + Union, +) import islpy as isl @@ -301,8 +310,8 @@ def merge_codegen_results( def wrap_in_if(codegen_state, condition_exprs, inner): if condition_exprs: - from pymbolic.primitives import LogicalAnd from pymbolic.mapper.stringifier import PREC_NONE + from pymbolic.primitives import LogicalAnd cur_ast = inner.current_ast(codegen_state) return inner.with_new_ast( codegen_state, diff --git a/loopy/codegen/tools.py b/loopy/codegen/tools.py index 22156807a..cb6285b08 100644 --- a/loopy/codegen/tools.py +++ b/loopy/codegen/tools.py @@ -20,17 +20,25 @@ THE SOFTWARE. """ +from dataclasses import dataclass from functools import cached_property +from typing import Dict, FrozenSet, List + from pytools import memoize_method -from loopy.schedule import (EnterLoop, LeaveLoop, CallKernel, ReturnFromKernel, - Barrier, BeginBlockItem, gather_schedule_block, - ScheduleItem) -from dataclasses import dataclass -from typing import FrozenSet, List, Dict -from loopy.kernel.instruction import InstructionBase from loopy.kernel import LoopKernel from loopy.kernel.data import Iname +from loopy.kernel.instruction import InstructionBase +from loopy.schedule import ( + Barrier, + BeginBlockItem, + CallKernel, + EnterLoop, + LeaveLoop, + ReturnFromKernel, + ScheduleItem, + gather_schedule_block, +) __doc__ = """ diff --git a/loopy/expression.py b/loopy/expression.py index 57c7f2e61..224521b03 100644 --- a/loopy/expression.py +++ b/loopy/expression.py @@ -115,10 +115,11 @@ def map_subscript(self, expr): index = expr.index_tuple - from loopy.symbolic import get_dependencies - from loopy.kernel.array import VectorArrayDimTag from pymbolic.primitives import Variable + from loopy.kernel.array import VectorArrayDimTag + from loopy.symbolic import get_dependencies + possible = None for i in range(len(var.shape)): if ( diff --git a/loopy/frontend/fortran/__init__.py b/loopy/frontend/fortran/__init__.py index 5a5d628a5..5e6ff24d0 100644 --- a/loopy/frontend/fortran/__init__.py +++ b/loopy/frontend/fortran/__init__.py @@ -21,11 +21,14 @@ """ import logging + + logger = logging.getLogger(__name__) -from loopy.diagnostic import LoopyError from pytools import ProcessLogger +from loopy.diagnostic import LoopyError + def c_preprocess(source, defines=None, filename=None, include_paths=None): """ @@ -35,8 +38,8 @@ def c_preprocess(source, defines=None, filename=None, include_paths=None): :return: a string """ try: - import ply.lex as lex import ply.cpp as cpp + import ply.lex as lex except ImportError as err: raise LoopyError( "Using the C preprocessor requires PLY to be installed") from err @@ -199,17 +202,18 @@ def parse_transformed_fortran(source, free_form=True, strict=True, else: proc_dict = transform_code_context.copy() - import loopy as lp import numpy as np + import loopy as lp + proc_dict["lp"] = lp proc_dict["np"] = np proc_dict["SOURCE"] = source proc_dict["FILENAME"] = filename - from os.path import dirname, abspath from os import getcwd + from os.path import abspath, dirname infile_dirname = dirname(filename) if infile_dirname: @@ -255,11 +259,16 @@ def _add_assignees_to_calls(knl, all_kernels): """ new_insns = [] subroutine_dict = {kernel.name: kernel for kernel in all_kernels} - from loopy.kernel.instruction import (Assignment, CallInstruction, - CInstruction, _DataObliviousInstruction, - modify_assignee_for_array_call) from pymbolic.primitives import Call, Variable + from loopy.kernel.instruction import ( + Assignment, + CallInstruction, + CInstruction, + _DataObliviousInstruction, + modify_assignee_for_array_call, + ) + for insn in knl.instructions: if isinstance(insn, CallInstruction): if isinstance(insn.expression, Call) and ( diff --git a/loopy/frontend/fortran/expression.py b/loopy/frontend/fortran/expression.py index cc93e914d..bdd006657 100644 --- a/loopy/frontend/fortran/expression.py +++ b/loopy/frontend/fortran/expression.py @@ -20,14 +20,15 @@ THE SOFTWARE. """ -from pymbolic.parser import Parser as ExpressionParserBase -from loopy.frontend.fortran.diagnostic import TranslationError - +import re from sys import intern + import numpy as np import pytools.lex -import re +from pymbolic.parser import Parser as ExpressionParserBase + +from loopy.frontend.fortran.diagnostic import TranslationError _less_than = intern("less_than") @@ -85,9 +86,8 @@ def __init__(self, tree_walker): def parse_terminal(self, pstate): scope = self.tree_walker.scope_stack[-1] - from pymbolic.primitives import Subscript, Call, Variable - from pymbolic.parser import ( - _identifier, _openpar, _closepar, _float) + from pymbolic.parser import _closepar, _float, _identifier, _openpar + from pymbolic.primitives import Call, Subscript, Variable next_tag = pstate.next_tag() if next_tag is _float: @@ -151,8 +151,8 @@ def parse_terminal(self, pstate): } def parse_prefix(self, pstate, min_precedence=0): - from pymbolic.parser import _PREC_UNARY import pymbolic.primitives as primitives + from pymbolic.parser import _PREC_UNARY pstate.expect_not_end() @@ -165,10 +165,13 @@ def parse_prefix(self, pstate, min_precedence=0): def parse_postfix(self, pstate, min_precedence, left_exp): from pymbolic.parser import ( - _PREC_CALL, _PREC_COMPARISON, _openpar, - _PREC_LOGICAL_OR, _PREC_LOGICAL_AND) - from pymbolic.primitives import ( - Comparison, LogicalAnd, LogicalOr) + _PREC_CALL, + _PREC_COMPARISON, + _PREC_LOGICAL_AND, + _PREC_LOGICAL_OR, + _openpar, + ) + from pymbolic.primitives import Comparison, LogicalAnd, LogicalOr next_tag = pstate.next_tag() if next_tag is _openpar and _PREC_CALL > min_precedence: diff --git a/loopy/frontend/fortran/translator.py b/loopy/frontend/fortran/translator.py index 9340aa465..530e92678 100644 --- a/loopy/frontend/fortran/translator.py +++ b/loopy/frontend/fortran/translator.py @@ -21,24 +21,26 @@ """ import re - from sys import intern -from immutables import Map +from warnings import warn -import loopy as lp import numpy as np -from warnings import warn -from loopy.frontend.fortran.tree import FTreeWalkerBase -from loopy.diagnostic import warn_with_kernel -from loopy.frontend.fortran.diagnostic import ( - TranslationError, TranslatorWarning) +from immutables import Map + import islpy as isl from islpy import dim_type -from loopy.symbolic import (IdentityMapper, RuleAwareIdentityMapper, - SubstitutionRuleMappingContext) -from loopy.diagnostic import LoopyError +from pymbolic.primitives import Slice, Wildcard + +import loopy as lp +from loopy.diagnostic import LoopyError, warn_with_kernel +from loopy.frontend.fortran.diagnostic import TranslationError, TranslatorWarning +from loopy.frontend.fortran.tree import FTreeWalkerBase from loopy.kernel.instruction import LegacyStringInstructionTag -from pymbolic.primitives import (Wildcard, Slice) +from loopy.symbolic import ( + IdentityMapper, + RuleAwareIdentityMapper, + SubstitutionRuleMappingContext, +) # {{{ subscript base shifter @@ -228,6 +230,7 @@ def get_loopy_shape(self, name): def process_expression_for_loopy(self, expr): from pymbolic.mapper.substitutor import make_subst_func + from loopy.symbolic import SubstitutionMapper submap = SubstitutionMapper( @@ -258,6 +261,7 @@ def read_vars(self): class FortranDivisionToFloorDiv(IdentityMapper): def map_fortran_division(self, expr, *args): from warnings import warn + from loopy.diagnostic import LoopyWarning warn( "Integer division in Fortran do loop bound. " @@ -286,7 +290,7 @@ def map_fortran_division(self, expr, *args): except TypeInferenceFailure: return super().map_fortran_division(expr, *args) - from pymbolic.primitives import Quotient, FloorDiv + from pymbolic.primitives import FloorDiv, Quotient if num_dtype.kind in "iub" and den_dtype.kind in "iub": warn_with_kernel(self.kernel, "fortran_int_div", @@ -309,8 +313,8 @@ def _specialize_fortran_division_for_kernel(knl, callables): def specialize_fortran_division(t_unit): - from loopy.translation_unit import TranslationUnit, resolve_callables from loopy.kernel.function_interface import CallableKernel + from loopy.translation_unit import TranslationUnit, resolve_callables from loopy.type_inference import infer_unknown_types assert isinstance(t_unit, TranslationUnit) @@ -553,7 +557,7 @@ def map_Assignment(self, node): lhs = scope.process_expression_for_loopy( self.parse_expr(node, node.variable)) - from pymbolic.primitives import Subscript, Call + from pymbolic.primitives import Call, Subscript if isinstance(lhs, Call): raise TranslationError("function call (to '%s') on left hand side of" "assignment--check for misspelled variable name" % lhs) @@ -691,7 +695,7 @@ def construct_else_condition(self): self.conditions.pop() - from pymbolic.primitives import LogicalNot, LogicalAnd + from pymbolic.primitives import LogicalAnd, LogicalNot else_expr = LogicalNot(prev_cond) if context_cond is not None: else_expr = LogicalAnd((else_expr, context_cond)) diff --git a/loopy/frontend/fortran/tree.py b/loopy/frontend/fortran/tree.py index e5c3fa489..b2af66f08 100644 --- a/loopy/frontend/fortran/tree.py +++ b/loopy/frontend/fortran/tree.py @@ -23,7 +23,7 @@ import re from loopy.diagnostic import LoopyError -from loopy.symbolic import IdentityMapper, FortranDivision +from loopy.symbolic import FortranDivision, IdentityMapper class DivisionToFortranDivisionMapper(IdentityMapper): diff --git a/loopy/ipython_ext.py b/loopy/ipython_ext.py index a469b4648..ba734d8ba 100644 --- a/loopy/ipython_ext.py +++ b/loopy/ipython_ext.py @@ -1,4 +1,4 @@ -from IPython.core.magic import (magics_class, Magics, cell_magic) +from IPython.core.magic import Magics, cell_magic, magics_class import loopy as lp diff --git a/loopy/isl_helpers.py b/loopy/isl_helpers.py index 59716edb8..160b6415b 100644 --- a/loopy/isl_helpers.py +++ b/loopy/isl_helpers.py @@ -23,11 +23,12 @@ """ -from loopy.diagnostic import StaticValueFindingError, LoopyError +from warnings import warn import islpy as isl from islpy import dim_type -from warnings import warn + +from loopy.diagnostic import LoopyError, StaticValueFindingError def pw_aff_to_aff(pw_aff): @@ -93,6 +94,7 @@ def make_slab(space, iname, start, stop, iname_multiplier=1): space = zero.get_domain_space() from pymbolic.primitives import Expression + from loopy.symbolic import aff_from_expr if isinstance(start, Expression): start = aff_from_expr(space, start) @@ -230,7 +232,7 @@ def static_extremum_of_pw_aff(pw_aff, constants_only, set_method, what, context) % (what, pw_aff)) return result - from pytools import memoize, flatten + from pytools import flatten, memoize @memoize def is_bounded(set): @@ -340,9 +342,10 @@ def _align_and_intersect(d1, d2): def is_nonnegative(expr, over_set): - from loopy.symbolic import aff_from_expr from pymbolic.primitives import Product + from loopy.symbolic import aff_from_expr + if isinstance(expr, Product) and all( is_nonnegative(child, over_set) for child in expr.children): return True @@ -684,7 +687,7 @@ def subst_into_pwqpolynomial(new_space, poly, subst_dict): poly, subst_domain, subst_dict = get_param_subst_domain( new_space, poly, subst_dict) - from loopy.symbolic import qpolynomial_to_expr, qpolynomial_from_expr + from loopy.symbolic import qpolynomial_from_expr, qpolynomial_to_expr new_pieces = [] for valid_set, qpoly in poly.get_pieces(): valid_set = valid_set & subst_domain @@ -692,8 +695,7 @@ def subst_into_pwqpolynomial(new_space, poly, subst_dict): continue valid_set = valid_set.project_out(dim_type.param, 0, i_begin_subst_space) - from pymbolic.mapper.substitutor import ( - SubstitutionMapper, make_subst_func) + from pymbolic.mapper.substitutor import SubstitutionMapper, make_subst_func sub_mapper = SubstitutionMapper(make_subst_func(subst_dict)) expr = sub_mapper(qpolynomial_to_expr(qpoly)) qpoly = qpolynomial_from_expr(valid_set.space, expr) @@ -724,11 +726,12 @@ def subst_into_pwaff(new_space, pwaff, subst_dict): parameters of *new_space*. The expression must be affine in the param dims of *new_space*. """ - from pymbolic.mapper.substitutor import ( - SubstitutionMapper, make_subst_func) - from loopy.symbolic import aff_from_expr, aff_to_expr from functools import reduce + from pymbolic.mapper.substitutor import SubstitutionMapper, make_subst_func + + from loopy.symbolic import aff_from_expr, aff_to_expr + i_begin_subst_space = pwaff.dim(dim_type.param) pwaff, subst_domain, subst_dict = get_param_subst_domain( new_space, pwaff, subst_dict) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index d3b6ec0ea..a9b3bb07e 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -22,43 +22,64 @@ THE SOFTWARE. """ -from functools import cached_property +from collections import defaultdict +from dataclasses import dataclass, field, fields, replace from enum import IntEnum +from functools import cached_property from sys import intern from typing import ( - Dict, Sequence, Tuple, Mapping, Optional, FrozenSet, Any, Union, - Callable, Iterator, List, Set, TYPE_CHECKING) -from dataclasses import dataclass, replace, field, fields + TYPE_CHECKING, + Any, + Callable, + Dict, + FrozenSet, + Iterator, + List, + Mapping, + Optional, + Sequence, + Set, + Tuple, + Union, +) from warnings import warn -from collections import defaultdict - import numpy as np -from pytools import (memoize_method, - UniqueNameGenerator, generate_unique_names, natsorted) -from pytools.tag import Taggable, Tag -import islpy as isl -from islpy import dim_type from immutables import Map -from loopy.diagnostic import CannotBranchDomainTree, LoopyError -from loopy.tools import update_persistent_hash -from loopy.diagnostic import StaticValueFindingError +import islpy as isl +from islpy import dim_type +from pytools import ( + UniqueNameGenerator, + generate_unique_names, + memoize_method, + natsorted, +) +from pytools.tag import Tag, Taggable + +from loopy.diagnostic import CannotBranchDomainTree, LoopyError, StaticValueFindingError from loopy.kernel.data import ( - _ArraySeparationInfo, - KernelArgument, - filter_iname_tags_by_type, Iname, - TemporaryVariable, ValueArg, ArrayArg, SubstitutionRule) + ArrayArg, + Iname, + KernelArgument, + SubstitutionRule, + TemporaryVariable, + ValueArg, + _ArraySeparationInfo, + filter_iname_tags_by_type, +) from loopy.kernel.instruction import InstructionBase -from loopy.types import LoopyType, NumpyType from loopy.options import Options from loopy.schedule import ScheduleItem -from loopy.typing import ExpressionT from loopy.target import TargetBase +from loopy.tools import update_persistent_hash +from loopy.types import LoopyType, NumpyType +from loopy.typing import ExpressionT + if TYPE_CHECKING: - from loopy.kernel.function_interface import InKernelCallable from loopy.codegen import PreambleInfo + from loopy.kernel.function_interface import InKernelCallable # {{{ loop kernel object @@ -785,9 +806,7 @@ def scalar_loop_args(self): @memoize_method def global_var_names(self): - from loopy.kernel.data import AddressSpace - - from loopy.kernel.data import ArrayArg + from loopy.kernel.data import AddressSpace, ArrayArg return ( { arg.name for arg in self.args @@ -909,8 +928,10 @@ def get_grid_sizes_for_insn_ids_as_dicts(self, insn_ids, # }}} from loopy.kernel.data import ( - GroupInameTag, LocalInameTag, - AutoLocalInameTagBase) + AutoLocalInameTagBase, + GroupInameTag, + LocalInameTag, + ) for iname in all_inames_by_insns: tags = self.iname_tags_of_type( @@ -1270,7 +1291,8 @@ def __getstate__(self): # cache retrieval for execution. from loopy.kernel.instruction import _get_insn_eq_key, _get_insn_hash_key from loopy.tools import ( - LazilyUnpicklingListWithEqAndPersistentHashing as LazyList) + LazilyUnpicklingListWithEqAndPersistentHashing as LazyList, + ) result["instructions"] = LazyList( self.instructions, @@ -1360,8 +1382,9 @@ def __setstate__(self, state): @memoize_method def __hash__(self): - from loopy.tools import LoopyKeyBuilder import hashlib + + from loopy.tools import LoopyKeyBuilder key_hash = hashlib.sha256() self.update_persistent_hash(key_hash, LoopyKeyBuilder()) return hash(key_hash.digest()) diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py index 48231abdb..43e1f86b5 100644 --- a/loopy/kernel/array.py +++ b/loopy/kernel/array.py @@ -2,6 +2,7 @@ from __future__ import annotations + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" __license__ = """ @@ -24,28 +25,41 @@ THE SOFTWARE. """ +import re import sys -from typing import (cast, Optional, Tuple, Union, FrozenSet, Type, Sequence, - List, Callable, ClassVar, TypeVar, TYPE_CHECKING) from dataclasses import dataclass -import re +from typing import ( + TYPE_CHECKING, + Callable, + ClassVar, + FrozenSet, + List, + Optional, + Sequence, + Tuple, + Type, + TypeVar, + Union, + cast, +) from warnings import warn -from pytools import ImmutableRecord -from pytools.tag import Taggable, Tag - import numpy as np # noqa +from pytools import ImmutableRecord +from pytools.tag import Tag, Taggable + from loopy.diagnostic import LoopyError from loopy.tools import is_integer -from loopy.typing import ExpressionT, ShapeType from loopy.types import LoopyType +from loopy.typing import ExpressionT, ShapeType + if TYPE_CHECKING: - from loopy.target import TargetBase - from loopy.kernel import LoopKernel - from loopy.kernel.data import auto, TemporaryVariable, ArrayArg from loopy.codegen import VectorizationInfo + from loopy.kernel import LoopKernel + from loopy.kernel.data import ArrayArg, TemporaryVariable, auto + from loopy.target import TargetBase if getattr(sys, "_BUILDING_SPHINX_DOCS", False): from loopy.target import TargetBase # noqa: F811 @@ -760,7 +774,6 @@ def __init__(self, name, dtype=None, shape=None, dim_tags=None, offset=0, raise TypeError("invalid kwarg: %s" % kwarg_name) import loopy as lp - from loopy.types import to_loopy_type dtype = to_loopy_type(dtype, allow_auto=True, allow_none=True, for_atomic=for_atomic) @@ -918,8 +931,9 @@ def __hash__(self): def __eq__(self, other): from loopy.symbolic import ( - is_tuple_of_expressions_equal as istoee, - is_expression_equal as isee) + is_expression_equal as isee, + is_tuple_of_expressions_equal as istoee, + ) return ( type(self) is type(other) and self.name == other.name @@ -1156,9 +1170,10 @@ def _apply_offset(sub: ExpressionT, ary: ArrayBase) -> ExpressionT: :arg ary: An instance of :class:`ArrayBase`. :arg array_name: Name to reference *ary* by. """ - import loopy as lp from pymbolic import var + import loopy as lp + if ary.offset: from loopy.kernel.data import TemporaryVariable if isinstance(ary, TemporaryVariable): diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 10aab71f1..c4cc880a0 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -23,29 +23,34 @@ THE SOFTWARE. """ +import logging +import re +from sys import intern + import numpy as np -from pymbolic.mapper import CSECachingMapperMixin -from pymbolic.primitives import Slice, Variable, Subscript, Call -from loopy.kernel.array import FixedStrideArrayDimTag -from loopy.tools import intern_frozenset_of_ids, Optional -from loopy.symbolic import ( - IdentityMapper, WalkMapper, SubArrayRef) -from loopy.kernel.data import ( - InstructionBase, - MultiAssignmentBase, Assignment, - SubstitutionRule, AddressSpace, ValueArg, auto) -from loopy.translation_unit import for_each_kernel -from loopy.diagnostic import LoopyError, warn_with_kernel import islpy as isl from islpy import dim_type +from pymbolic.mapper import CSECachingMapperMixin +from pymbolic.primitives import Call, Slice, Subscript, Variable from pytools import ProcessLogger -from sys import intern +from loopy.diagnostic import LoopyError, warn_with_kernel +from loopy.kernel.array import FixedStrideArrayDimTag +from loopy.kernel.data import ( + AddressSpace, + Assignment, + InstructionBase, + MultiAssignmentBase, + SubstitutionRule, + ValueArg, + auto, +) +from loopy.symbolic import IdentityMapper, SubArrayRef, WalkMapper +from loopy.tools import Optional, intern_frozenset_of_ids +from loopy.translation_unit import for_each_kernel -import re -import logging logger = logging.getLogger(__name__) @@ -80,7 +85,9 @@ def _normalize_string_tag(tag): from pytools.tag import Tag from loopy.kernel.instruction import ( - UseStreamingStoreTag, LegacyStringInstructionTag) + LegacyStringInstructionTag, + UseStreamingStoreTag, + ) if tag == "!streaming_store": return UseStreamingStoreTag() else: @@ -169,6 +176,7 @@ def expand_defines_in_expr(expr, defines): return expr from pymbolic.primitives import Variable + from loopy.symbolic import parse def subst_func(var): @@ -182,7 +190,7 @@ def subst_func(var): else: return None - from loopy.symbolic import SubstitutionMapper, PartialEvaluationMapper + from loopy.symbolic import PartialEvaluationMapper, SubstitutionMapper return PartialEvaluationMapper()( SubstitutionMapper(subst_func)(expr)) @@ -211,6 +219,7 @@ def get_default_insn_options_dict(): from collections import namedtuple + _NosyncParseResult = namedtuple("_NosyncParseResult", "expr, scope") @@ -501,7 +510,8 @@ def parse_insn(groups, insn_options): "the following error occurred:" % groups["rhs"]) raise - from pymbolic.primitives import Variable, Subscript, Lookup + from pymbolic.primitives import Lookup, Subscript, Variable + from loopy.symbolic import TypeAnnotation if not isinstance(lhs, tuple): @@ -584,7 +594,7 @@ def parse_subst_rule(groups): "the following error occurred:" % groups["rhs"]) raise - from pymbolic.primitives import Variable, Call + from pymbolic.primitives import Call, Variable if isinstance(lhs, Variable): subst_name = lhs.name arg_names = [] @@ -631,7 +641,7 @@ def parse_special_insn(groups, insn_options): else insn_id), **insn_options) - from loopy.kernel.instruction import NoOpInstruction, BarrierInstruction + from loopy.kernel.instruction import BarrierInstruction, NoOpInstruction special_insn_kind = groups["kind"] # check for bad options check_illegal_options(insn_options, special_insn_kind) @@ -1186,7 +1196,7 @@ def run_irf(expr): def make_new_arg(self, arg_name): arg_name = arg_name.strip() import loopy as lp - from loopy.kernel.data import ValueArg, ArrayArg + from loopy.kernel.data import ArrayArg, ValueArg if arg_name in self.all_params: return ValueArg(arg_name) @@ -1575,8 +1585,9 @@ def determine_shapes_of_temporaries(knl): if tv.shape is lp.auto or tv.base_indices is lp.auto: vars_needing_shape_inference.add(tv.name) - from loopy.kernel.instruction import Assignment from pymbolic.primitives import Variable + + from loopy.kernel.instruction import Assignment for insn in knl.instructions: # If there's an assignment to a var without a subscript # then assume that the variable is a scalar. @@ -1769,8 +1780,8 @@ def _is_wildcard(s): def _resolve_dependencies(what, knl, insn, deps): - from loopy.transform.instruction import find_instructions from loopy.match import MatchExpressionBase + from loopy.transform.instruction import find_instructions new_deps = [] @@ -1960,9 +1971,10 @@ def normalize_slice_params(slice, dimension_length): :arg slice: An instance of :class:`pymbolic.primitives.Slice`. :arg dimension_length: Length of the axis being sliced. """ - from pymbolic.primitives import Slice from numbers import Integral + from pymbolic.primitives import Slice + assert isinstance(slice, Slice) start, stop, step = slice.start, slice.stop, slice.step @@ -2318,9 +2330,8 @@ def make_function(domains, instructions, kernel_data=None, **kwargs): # {{{ handle kernel language version - from loopy.version import LANGUAGE_VERSION_SYMBOLS - import loopy.version as v + from loopy.version import LANGUAGE_VERSION_SYMBOLS version_to_symbol = { getattr(v, lvs): lvs for lvs in LANGUAGE_VERSION_SYMBOLS} @@ -2348,10 +2359,12 @@ def make_function(domains, instructions, kernel_data=None, **kwargs): if lang_version is None: from warnings import warn + from loopy.diagnostic import LoopyWarning from loopy.version import ( - MOST_RECENT_LANGUAGE_VERSION, - FALLBACK_LANGUAGE_VERSION) + FALLBACK_LANGUAGE_VERSION, + MOST_RECENT_LANGUAGE_VERSION, + ) warn("'lang_version' was not passed to make_function(). " "To avoid this warning, pass " "lang_version={ver} in this invocation. " @@ -2376,7 +2389,7 @@ def make_function(domains, instructions, kernel_data=None, **kwargs): # {{{ separate temporary variables and arguments, take care of names with commas - from loopy.kernel.data import TemporaryVariable, ArrayBase + from loopy.kernel.data import ArrayBase, TemporaryVariable if isinstance(kernel_data, str): kernel_data = kernel_data.split(",") @@ -2456,8 +2469,8 @@ def make_function(domains, instructions, kernel_data=None, **kwargs): # }}} - from loopy.kernel.data import Iname from loopy.kernel import _get_inames_from_domains + from loopy.kernel.data import Iname inames = {name: Iname(name, frozenset()) for name in _get_inames_from_domains(domains)} @@ -2477,7 +2490,7 @@ def make_function(domains, instructions, kernel_data=None, **kwargs): kwargs["substitutions"] = substitutions - from pytools.tag import normalize_tags, check_tag_uniqueness + from pytools.tag import check_tag_uniqueness, normalize_tags tags = check_tag_uniqueness(normalize_tags(kwargs.pop("tags", frozenset()))) index_dtype = kwargs.pop("index_dtype", None) diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index fdd0ad248..bdac071da 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -24,35 +24,45 @@ """ -from typing import (Type, Union, FrozenSet, Tuple, Optional, Sequence, Any, ClassVar, - cast) -from sys import intern from dataclasses import dataclass, replace from enum import IntEnum +from sys import intern +from typing import ( + Any, + ClassVar, + FrozenSet, + Optional, + Sequence, + Tuple, + Type, + Union, + cast, +) from warnings import warn -from immutables import Map import numpy as np # noqa +from immutables import Map + from pytools import ImmutableRecord -from pytools.tag import Taggable -from pytools.tag import UniqueTag as UniqueTagBase, Tag +from pytools.tag import Tag, Taggable, UniqueTag as UniqueTagBase -from loopy.kernel.array import ArrayBase, ArrayDimImplementationTag from loopy.diagnostic import LoopyError -from loopy.typing import ExpressionT, ShapeType -from loopy.types import LoopyType, auto +from loopy.kernel.array import ArrayBase, ArrayDimImplementationTag from loopy.kernel.instruction import ( # noqa - InstructionBase, - MemoryOrdering, - MemoryScope, - VarAtomicity, - AtomicInit, - AtomicUpdate, - MultiAssignmentBase, - Assignment, - CallInstruction, - make_assignment, - CInstruction) + Assignment, + AtomicInit, + AtomicUpdate, + CallInstruction, + CInstruction, + InstructionBase, + MemoryOrdering, + MemoryScope, + MultiAssignmentBase, + VarAtomicity, + make_assignment, +) +from loopy.types import LoopyType, auto +from loopy.typing import ExpressionT, ShapeType __doc__ = """ @@ -85,6 +95,7 @@ def _names_from_expr(expr: Union[None, ExpressionT, str]) -> FrozenSet[str]: from numbers import Number + from loopy.symbolic import DependencyMapper dep_mapper = DependencyMapper() diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index a920537f3..e3fcf108a 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -1,5 +1,6 @@ from __future__ import annotations + __copyright__ = "Copyright (C) 2018 Andreas Kloeckner, Kaushik Kulkarni" __license__ = """ @@ -22,16 +23,17 @@ THE SOFTWARE. """ -from typing import ClassVar, FrozenSet, Tuple, TYPE_CHECKING +from typing import TYPE_CHECKING, ClassVar, FrozenSet, Tuple from pytools import ImmutableRecord -from loopy.diagnostic import LoopyError -from loopy.tools import update_persistent_hash +from loopy.diagnostic import LoopyError from loopy.kernel import LoopKernel from loopy.kernel.array import ArrayBase -from loopy.kernel.data import ValueArg, ArrayArg +from loopy.kernel.data import ArrayArg, ValueArg from loopy.symbolic import DependencyMapper, WalkMapper +from loopy.tools import update_persistent_hash + if TYPE_CHECKING: from loopy.translation_unit import CallablesTable, FunctionIdT @@ -175,7 +177,7 @@ def map_subscript(self, expr): self.rec(child) def map_variable(self, expr): - from loopy.kernel.data import TemporaryVariable, ArrayArg, auto + from loopy.kernel.data import ArrayArg, TemporaryVariable, auto if expr.name in self.kernel.all_inames(): # inames are scalar return @@ -202,9 +204,8 @@ def get_arg_descriptor_for_expression(kernel, expr): describing the argument expression *expr* which occurs in a call in the code of *kernel*. """ - from loopy.symbolic import (SubArrayRef, pw_aff_to_expr, - SweptInameStrideCollector) - from loopy.kernel.data import TemporaryVariable, ArrayArg + from loopy.kernel.data import ArrayArg, TemporaryVariable + from loopy.symbolic import SubArrayRef, SweptInameStrideCollector, pw_aff_to_expr if isinstance(expr, SubArrayRef): name = expr.subscript.aggregate.name @@ -598,10 +599,11 @@ def emit_call_insn(self, insn, target, expression_to_code_mapper): if not isinstance(target, CFamilyTarget): raise NotImplementedError() - from loopy.kernel.instruction import CallInstruction - from loopy.expression import dtype_to_type_context - from pymbolic.mapper.stringifier import PREC_NONE from pymbolic import var + from pymbolic.mapper.stringifier import PREC_NONE + + from loopy.expression import dtype_to_type_context + from loopy.kernel.instruction import CallInstruction assert isinstance(insn, CallInstruction) assert self.is_ready_for_codegen() @@ -727,8 +729,7 @@ def with_types(self, arg_id_to_dtype, callables_table): else: new_args.append(arg) - from loopy.type_inference import ( - infer_unknown_types_for_a_single_kernel) + from loopy.type_inference import infer_unknown_types_for_a_single_kernel pre_specialized_subkernel = self.subkernel.copy( args=new_args) @@ -941,9 +942,10 @@ def emit_call_insn(self, insn, target, expression_to_code_mapper): assignee_write_count -= 1 # no type casting in array calls - from loopy.expression import dtype_to_type_context - from pymbolic.mapper.stringifier import PREC_NONE from pymbolic import var + from pymbolic.mapper.stringifier import PREC_NONE + + from loopy.expression import dtype_to_type_context tgt_parameters = [ecm(par, PREC_NONE, dtype_to_type_context(target, par_dtype), diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index b6d2e5212..68099ce2e 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -20,18 +20,18 @@ THE SOFTWARE. """ -from sys import intern +from collections.abc import Set as abc_Set from functools import cached_property +from sys import intern from typing import FrozenSet - from warnings import warn + import islpy as isl from pytools import ImmutableRecord, memoize_method -from pytools.tag import Tag, tag_dataclass, Taggable +from pytools.tag import Tag, Taggable, tag_dataclass from loopy.diagnostic import LoopyError from loopy.tools import Optional -from collections.abc import Set as abc_Set # {{{ instruction tags @@ -469,7 +469,8 @@ def __setstate__(self, val): def _get_assignee_var_name(expr): - from pymbolic.primitives import Variable, Subscript, Lookup + from pymbolic.primitives import Lookup, Subscript, Variable + from loopy.symbolic import LinearSubscript, SubArrayRef if isinstance(expr, Lookup): @@ -501,8 +502,9 @@ def _get_assignee_var_name(expr): def _get_assignee_subscript_deps(expr): - from pymbolic.primitives import Variable, Subscript, Lookup - from loopy.symbolic import LinearSubscript, get_dependencies, SubArrayRef + from pymbolic.primitives import Lookup, Subscript, Variable + + from loopy.symbolic import LinearSubscript, SubArrayRef, get_dependencies if isinstance(expr, Lookup): expr = expr.aggregate @@ -830,7 +832,8 @@ def __init__(self, if isinstance(expression, str): expression = parse(expression) - from pymbolic.primitives import Variable, Subscript, Lookup + from pymbolic.primitives import Lookup, Subscript, Variable + from loopy.symbolic import LinearSubscript if not isinstance(assignee, (Variable, Subscript, LinearSubscript, Lookup)): raise LoopyError("invalid lvalue '%s'" % assignee) @@ -970,6 +973,7 @@ def __init__(self, tags=tags) from pymbolic.primitives import Call + from loopy.symbolic import Reduction if not isinstance(expression, (Call, Reduction)) and ( expression is not None): @@ -987,7 +991,8 @@ def __init__(self, if isinstance(expression, str): expression = parse(expression) - from pymbolic.primitives import Variable, Subscript + from pymbolic.primitives import Subscript, Variable + from loopy.symbolic import LinearSubscript, SubArrayRef for assignee in assignees: if not isinstance(assignee, (Variable, Subscript, LinearSubscript, @@ -1086,7 +1091,7 @@ def subscript_contains_slice(subscript): """Return *True* if the *subscript* contains an instance of :class:`pymbolic.primitives.Slice` as of its indices. """ - from pymbolic.primitives import Subscript, Slice + from pymbolic.primitives import Slice, Subscript assert isinstance(subscript, Subscript) return any(isinstance(index, Slice) for index in subscript.index_tuple) @@ -1100,6 +1105,7 @@ def is_array_call(assignees, expression): :meth:`is_array_call` will return *True*. """ from pymbolic.primitives import Call, Subscript + from loopy.symbolic import SubArrayRef if not isinstance(expression, Call): @@ -1121,6 +1127,7 @@ def modify_assignee_for_array_call(assignee): Converts the assignee subscript or variable as a SubArrayRef. """ from pymbolic.primitives import Subscript, Variable + from loopy.symbolic import SubArrayRef if isinstance(assignee, SubArrayRef): return assignee @@ -1150,6 +1157,7 @@ def make_assignment(assignees, expression, temp_var_types=None, **kwargs): "left-hand side not supported") from pymbolic.primitives import Call + from loopy.symbolic import Reduction if not isinstance(expression, (Call, Reduction)): raise LoopyError("right-hand side in multiple assignment must be " @@ -1174,8 +1182,9 @@ def make_assignment(assignees, expression, temp_var_types=None, **kwargs): **kwargs) else: def _is_array(expr): + from pymbolic.primitives import Slice, Subscript + from loopy.symbolic import SubArrayRef - from pymbolic.primitives import (Subscript, Slice) if isinstance(expr, SubArrayRef): return True if isinstance(expr, Subscript): diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 01e0fcec6..5ed9b2ad3 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -23,26 +23,30 @@ THE SOFTWARE. """ -from typing import Sequence, Mapping, FrozenSet, Dict, AbstractSet, Set, List - +import logging import sys - +from functools import reduce from sys import intern +from typing import AbstractSet, Dict, FrozenSet, List, Mapping, Sequence, Set import numpy as np + import islpy as isl from islpy import dim_type -from loopy.diagnostic import LoopyError, warn_with_kernel from pytools import memoize_on_first_arg, natsorted + +from loopy.diagnostic import LoopyError, warn_with_kernel from loopy.kernel import LoopKernel -from loopy.translation_unit import (TranslationUnit, - for_each_kernel) from loopy.kernel.function_interface import CallableKernel from loopy.kernel.instruction import ( - MultiAssignmentBase, CInstruction, _DataObliviousInstruction) + CInstruction, + MultiAssignmentBase, + _DataObliviousInstruction, +) from loopy.symbolic import CombineMapper -from functools import reduce -import logging +from loopy.translation_unit import TranslationUnit, for_each_kernel + + logger = logging.getLogger(__name__) @@ -394,10 +398,11 @@ def base_index_and_length(self, set_, iname, context=None, from loopy.diagnostic import StaticValueFindingError from loopy.isl_helpers import ( - static_max_of_pw_aff, - static_min_of_pw_aff, - static_value_of_pw_aff, - find_max_of_pwaff_with_params) + find_max_of_pwaff_with_params, + static_max_of_pw_aff, + static_min_of_pw_aff, + static_value_of_pw_aff, + ) from loopy.symbolic import pw_aff_to_expr # {{{ first: try to find static lower bound value @@ -525,7 +530,7 @@ def get_dot_dependency_graph(kernel, callables_table, iname_cluster=True, dep_graph = {} lines = [] - from loopy.kernel.data import MultiAssignmentBase, CInstruction + from loopy.kernel.data import CInstruction, MultiAssignmentBase for insn in kernel.instructions: if isinstance(insn, MultiAssignmentBase): @@ -585,8 +590,13 @@ def get_dot_dependency_graph(kernel, callables_table, iname_cluster=True, if iname_cluster: from loopy.schedule import ( - EnterLoop, LeaveLoop, RunInstruction, Barrier, - CallKernel, ReturnFromKernel) + Barrier, + CallKernel, + EnterLoop, + LeaveLoop, + ReturnFromKernel, + RunInstruction, + ) for sched_item in kernel.linearization: if isinstance(sched_item, EnterLoop): @@ -735,9 +745,10 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn): # maps inames to "aggregate stride" aggregate_strides = {} - from loopy.symbolic import CoefficientCollector from pymbolic.primitives import Variable + from loopy.symbolic import CoefficientCollector + for aae in global_ary_acc_exprs: index_expr = aae.index if not isinstance(index_expr, tuple): @@ -802,8 +813,11 @@ def assign_automatic_axes(kernel, callables_table, axis=0, local_size=None): # TODO: do the tag removal rigorously, might be easier after switching # to set() from tuple() - from loopy.kernel.data import (AutoLocalInameTagBase, LocalInameTag, - filter_iname_tags_by_type) + from loopy.kernel.data import ( + AutoLocalInameTagBase, + LocalInameTag, + filter_iname_tags_by_type, + ) # Realize that at this point in time, axis lengths are already # fixed. So we compute them once and pass them to our recursive @@ -1026,7 +1040,7 @@ def with_changed_array(self, new_array): # {{{ guess_var_shape def guess_var_shape(kernel, var_names): - from loopy.symbolic import SubstitutionRuleExpander, BatchedAccessMapMapper + from loopy.symbolic import BatchedAccessMapMapper, SubstitutionRuleExpander armap = BatchedAccessMapMapper(kernel, var_names) @@ -1813,8 +1827,7 @@ def get_subkernel_to_insn_id_map(kernel: LoopKernel) -> Mapping[str, FrozenSet[s assert kernel.linearization is not None - from loopy.schedule import ( - sched_item_to_insn_id, CallKernel, ReturnFromKernel) + from loopy.schedule import CallKernel, ReturnFromKernel, sched_item_to_insn_id subkernel = None result: Dict[str, Set[str]] = {} @@ -1957,7 +1970,7 @@ def infer_args_are_input_output(kernel): then the array is inferred as an input argument if it is either read at some point in the kernel or it is neither read nor written. """ - from loopy.kernel.data import ArrayArg, ValueArg, ConstantArg, ImageArg + from loopy.kernel.data import ArrayArg, ConstantArg, ImageArg, ValueArg new_args = [] for arg in kernel.args: @@ -2065,6 +2078,7 @@ def get_call_graph(t_unit, only_kernel_callables=False): :arg t_unit: An instance of :class:`TranslationUnit`. """ from pyrsistent import pmap + from loopy.kernel import KernelState if t_unit.state < KernelState.CALLS_RESOLVED: @@ -2121,8 +2135,8 @@ def get_hw_axis_base_for_codegen(kernel: LoopKernel, iname: str) -> isl.Aff: offsetting expression during the hardware ina """ - from loopy.kernel.data import HardwareConcurrentTag from loopy.isl_helpers import static_min_of_pw_aff + from loopy.kernel.data import HardwareConcurrentTag assert kernel.iname_tags_of_type(iname, HardwareConcurrentTag) bounds = kernel.get_iname_bounds(iname) diff --git a/loopy/library/function.py b/loopy/library/function.py index a42359c03..7d274e492 100644 --- a/loopy/library/function.py +++ b/loopy/library/function.py @@ -20,11 +20,12 @@ THE SOFTWARE. """ -from loopy.kernel.function_interface import ScalarCallable +import numpy as np + from loopy.diagnostic import LoopyError +from loopy.kernel.function_interface import ScalarCallable from loopy.translation_unit import CallablesTable from loopy.types import NumpyType -import numpy as np class MakeTupleCallable(ScalarCallable): @@ -69,8 +70,9 @@ def emit_call(self, expression_to_code_mapper, expression, target): ary = expression_to_code_mapper.find_array(arg) - from loopy.kernel.array import get_access_info from pymbolic import evaluate + + from loopy.kernel.array import get_access_info access_info = get_access_info(expression_to_code_mapper.kernel, ary, arg.index, lambda expr: evaluate(expr, expression_to_code_mapper.codegen_state.var_subst_map), diff --git a/loopy/library/random123.py b/loopy/library/random123.py index 8978f4419..0afb0abb9 100644 --- a/loopy/library/random123.py +++ b/loopy/library/random123.py @@ -24,10 +24,12 @@ """ -from pytools import ImmutableRecord +import numpy as np from mako.template import Template + +from pytools import ImmutableRecord + from loopy.kernel.function_interface import ScalarCallable -import numpy as np # {{{ rng metadata diff --git a/loopy/library/reduction.py b/loopy/library/reduction.py index 648b7a197..f3e214a12 100644 --- a/loopy/library/reduction.py +++ b/loopy/library/reduction.py @@ -23,15 +23,16 @@ from typing import ClassVar, Tuple -from pymbolic import var import numpy as np -from loopy.symbolic import ResolvedFunction -from loopy.kernel.function_interface import ScalarCallable -from loopy.symbolic import FunctionIdentifier +from pymbolic import var + from loopy.diagnostic import LoopyError -from loopy.types import NumpyType +from loopy.kernel.function_interface import ScalarCallable +from loopy.symbolic import FunctionIdentifier, ResolvedFunction from loopy.tools import update_persistent_hash +from loopy.types import NumpyType + __doc__ = """ .. currentmodule:: loopy.library.reduction diff --git a/loopy/loop.py b/loopy/loop.py index af61b7db5..001cd80a8 100644 --- a/loopy/loop.py +++ b/loopy/loop.py @@ -22,6 +22,7 @@ import islpy as isl + from loopy.translation_unit import for_each_kernel diff --git a/loopy/match.py b/loopy/match.py index e2a81f2b7..889f4e74f 100644 --- a/loopy/match.py +++ b/loopy/match.py @@ -24,10 +24,10 @@ THE SOFTWARE. """ -from abc import abstractmethod, ABC +from abc import ABC, abstractmethod from dataclasses import dataclass -from typing import FrozenSet, List, Sequence, Tuple, Union, Protocol from sys import intern +from typing import FrozenSet, List, Protocol, Sequence, Tuple, Union from loopy.kernel import LoopKernel from loopy.kernel.instruction import InstructionBase @@ -35,8 +35,9 @@ NoneType = type(None) -from pytools.lex import RE import pytools.tag +from pytools.lex import RE + __doc__ = """ .. autoclass:: Matchable @@ -393,7 +394,7 @@ def inner_parse(pstate, min_precedence=0): if isinstance(expr, MatchExpressionBase): return expr - from pytools.lex import LexIterator, lex, InvalidTokenError + from pytools.lex import InvalidTokenError, LexIterator, lex try: pstate = LexIterator( [(tag, s, idx, matchobj) diff --git a/loopy/options.py b/loopy/options.py index 073145bb1..9c4fa0fb4 100644 --- a/loopy/options.py +++ b/loopy/options.py @@ -21,11 +21,12 @@ """ -from pytools import ImmutableRecord -import re import os +import re from warnings import warn +from pytools import ImmutableRecord + ALLOW_TERMINAL_COLORS = True diff --git a/loopy/preprocess.py b/loopy/preprocess.py index c787cedbf..d24e14cc2 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -20,37 +20,52 @@ THE SOFTWARE. """ -from typing import Tuple, TypeVar, Iterable, Optional, List, FrozenSet, cast import logging +from typing import FrozenSet, Iterable, List, Optional, Tuple, TypeVar, cast + + logger = logging.getLogger(__name__) -from immutables import Map +from functools import partial + import numpy as np +from immutables import Map + +from pytools import ProcessLogger from loopy.diagnostic import ( - LoopyError, WriteRaceConditionWarning, warn_with_kernel, - LoopyAdvisory) + LoopyAdvisory, + LoopyError, + WriteRaceConditionWarning, + warn_with_kernel, +) +from loopy.kernel import LoopKernel +from loopy.kernel.array import ArrayDimImplementationTag +from loopy.kernel.data import ( + ArrayArg, + KernelArgument, + ValueArg, + _ArraySeparationInfo, + auto, + filter_iname_tags_by_type, +) +from loopy.kernel.function_interface import CallableKernel, ScalarCallable +# from loopy.transform.iname import remove_any_newly_unused_inames +from loopy.kernel.instruction import ( + CallInstruction, + CInstruction, + MultiAssignmentBase, + _DataObliviousInstruction, +) +from loopy.symbolic import RuleAwareIdentityMapper from loopy.tools import memoize_on_disk -from loopy.kernel.data import filter_iname_tags_by_type, ArrayArg, auto, ValueArg +from loopy.translation_unit import TranslationUnit, for_each_kernel -from loopy.kernel import LoopKernel # for the benefit of loopy.statistics, for now from loopy.type_inference import infer_unknown_types -from loopy.symbolic import RuleAwareIdentityMapper -# from loopy.transform.iname import remove_any_newly_unused_inames - -from loopy.kernel.instruction import (MultiAssignmentBase, CInstruction, - CallInstruction, _DataObliviousInstruction) -from loopy.kernel.function_interface import CallableKernel, ScalarCallable -from loopy.kernel.array import ArrayDimImplementationTag -from loopy.kernel.data import _ArraySeparationInfo, KernelArgument -from loopy.translation_unit import TranslationUnit, for_each_kernel from loopy.typing import ExpressionT -from pytools import ProcessLogger -from functools import partial - # {{{ check for writes to predicates @@ -206,6 +221,7 @@ def make_args_for_offsets_and_strides(kernel: LoopKernel) -> LoopKernel: vng = kernel.get_var_name_generator() from pymbolic.primitives import Expression, Variable + from loopy.kernel.array import FixedStrideArrayDimTag # {{{ process arguments @@ -320,9 +336,8 @@ def find_temporary_address_space(kernel): logger.debug("%s: find temporary address space" % kernel.name) new_temp_vars = {} - from loopy.kernel.data import (LocalInameTagBase, GroupInameTag, - AddressSpace) import loopy as lp + from loopy.kernel.data import AddressSpace, GroupInameTag, LocalInameTagBase writers = kernel.writer_map() @@ -430,8 +445,7 @@ def find_temporary_address_space(kernel): def realize_ilp(kernel): logger.debug("%s: add axes to temporaries for ilp" % kernel.name) - from loopy.kernel.data import (IlpBaseTag, VectorizeTag, - filter_iname_tags_by_type) + from loopy.kernel.data import IlpBaseTag, VectorizeTag, filter_iname_tags_by_type privatizing_inames = frozenset( name for name, iname in kernel.inames.items() @@ -455,9 +469,9 @@ def check_atomic_loads(kernel): """ logger.debug("%s: check atomic loads" % kernel.name) - from loopy.types import AtomicType from loopy.kernel.array import ArrayBase from loopy.kernel.instruction import Assignment, AtomicLoad + from loopy.types import AtomicType # find atomic variables atomicity_candidates = ( @@ -504,14 +518,16 @@ def __init__(self, rule_mapping_context, caller_kernel, clbl_inf_ctx): self.clbl_inf_ctx = clbl_inf_ctx def map_call(self, expr, expn_state, assignees=None): + from pymbolic.mapper.substitutor import make_subst_func from pymbolic.primitives import Call, Variable - from loopy.kernel.function_interface import ValueArgDescriptor - from loopy.symbolic import ResolvedFunction + from loopy.kernel.array import ArrayBase from loopy.kernel.data import ValueArg - from pymbolic.mapper.substitutor import make_subst_func - from loopy.symbolic import SubstitutionMapper - from loopy.kernel.function_interface import get_arg_descriptor_for_expression + from loopy.kernel.function_interface import ( + ValueArgDescriptor, + get_arg_descriptor_for_expression, + ) + from loopy.symbolic import ResolvedFunction, SubstitutionMapper if not isinstance(expr.function, ResolvedFunction): # ignore if the call is not to a ResolvedFunction @@ -579,9 +595,10 @@ def map_call_with_kwargs(self, expr): raise NotImplementedError def __call__(self, expr, kernel, insn, assignees=None): - from loopy.kernel.data import InstructionBase - from loopy.symbolic import UncachedIdentityMapper, ExpansionState import immutables + + from loopy.kernel.data import InstructionBase + from loopy.symbolic import ExpansionState, UncachedIdentityMapper assert insn is None or isinstance(insn, InstructionBase) return UncachedIdentityMapper.__call__(self, expr, @@ -644,11 +661,10 @@ def infer_arg_descr(program): :attr:`loopy.InKernelCallable.arg_id_to_descr` inferred for all the callables. """ - from loopy.translation_unit import make_clbl_inf_ctx, resolve_callables + from loopy import ValueArg, auto from loopy.kernel.array import ArrayBase - from loopy.kernel.function_interface import (ArrayArgDescriptor, - ValueArgDescriptor) - from loopy import auto, ValueArg + from loopy.kernel.function_interface import ArrayArgDescriptor, ValueArgDescriptor + from loopy.translation_unit import make_clbl_inf_ctx, resolve_callables program = resolve_callables(program) @@ -688,10 +704,11 @@ def _tuple_or_none(s): # {{{ inline_kernels_with_gbarriers def inline_kernels_with_gbarriers(program): + from pytools.graph import compute_topological_order + from loopy.kernel.instruction import BarrierInstruction - from loopy.transform.callable import inline_callable_kernel from loopy.kernel.tools import get_call_graph - from pytools.graph import compute_topological_order + from loopy.transform.callable import inline_callable_kernel def has_gbarrier(knl): return any((isinstance(insn, BarrierInstruction) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 136f0e601..ca45521e3 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -1,5 +1,6 @@ from __future__ import annotations + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" __license__ = """ @@ -25,21 +26,34 @@ import logging import sys from dataclasses import dataclass, replace -from typing import (FrozenSet, Hashable, Sequence, AbstractSet, Any, Set, TypeVar, - Mapping, Dict, Tuple, Iterator, Optional, TYPE_CHECKING) +from typing import ( + TYPE_CHECKING, + AbstractSet, + Any, + Dict, + FrozenSet, + Hashable, + Iterator, + Mapping, + Optional, + Sequence, + Set, + Tuple, + TypeVar, +) from immutables import Map -from pytools import ImmutableRecord -import islpy as isl -from loopy.diagnostic import LoopyError, ScheduleDebugInputError, warn_with_kernel - -from pytools import MinRecursionLimit, ProcessLogger +import islpy as isl +from pytools import ImmutableRecord, MinRecursionLimit, ProcessLogger from pytools.persistent_dict import WriteOncePersistentDict + +from loopy.diagnostic import LoopyError, ScheduleDebugInputError, warn_with_kernel from loopy.kernel.instruction import InstructionBase from loopy.tools import LoopyKeyBuilder, caches from loopy.version import DATA_MODEL_VERSION + if TYPE_CHECKING: from loopy.kernel import LoopKernel from loopy.translation_unit import CallablesTable, TranslationUnit @@ -446,7 +460,10 @@ def format_insn(kernel, insn_id): Fore = kernel.options._fore # noqa Style = kernel.options._style # noqa from loopy.kernel.instruction import ( - MultiAssignmentBase, NoOpInstruction, BarrierInstruction) + BarrierInstruction, + MultiAssignmentBase, + NoOpInstruction, + ) if isinstance(insn, MultiAssignmentBase): return "{}{}{} = {}{}{} {{id={}}""}".format( Fore.CYAN, ", ".join(str(a) for a in insn.assignees), Style.RESET_ALL, @@ -2043,8 +2060,12 @@ def _generate_loop_schedules_inner( for item in preschedule for insn_id in sched_item_to_insn_id(item)} - from loopy.kernel.data import (IlpBaseTag, ConcurrentTag, VectorizeTag, - filter_iname_tags_by_type) + from loopy.kernel.data import ( + ConcurrentTag, + IlpBaseTag, + VectorizeTag, + filter_iname_tags_by_type, + ) ilp_inames = { name for name, iname in kernel.inames.items() @@ -2157,8 +2178,7 @@ def print_longest_dead_end(): linearization=gen_sched, state=KernelState.LINEARIZED) - from loopy.schedule.device_mapping import \ - map_schedule_onto_host_or_device + from loopy.schedule.device_mapping import map_schedule_onto_host_or_device if kernel.state != KernelState.LINEARIZED: # Device mapper only gets run once. new_kernel = map_schedule_onto_host_or_device(new_kernel) @@ -2259,9 +2279,8 @@ def get_one_scheduled_kernel(kernel, callables_table): def linearize(t_unit: TranslationUnit) -> TranslationUnit: - from loopy.kernel.function_interface import (CallableKernel, - ScalarCallable) from loopy.check import pre_schedule_checks + from loopy.kernel.function_interface import CallableKernel, ScalarCallable pre_schedule_checks(t_unit) diff --git a/loopy/schedule/device_mapping.py b/loopy/schedule/device_mapping.py index 38d03a296..a0345049d 100644 --- a/loopy/schedule/device_mapping.py +++ b/loopy/schedule/device_mapping.py @@ -21,8 +21,13 @@ """ from loopy.diagnostic import LoopyError -from loopy.schedule import (Barrier, CallKernel, EnterLoop, - ReturnFromKernel, RunInstruction) +from loopy.schedule import ( + Barrier, + CallKernel, + EnterLoop, + ReturnFromKernel, + RunInstruction, +) from loopy.schedule.tools import get_block_boundaries diff --git a/loopy/schedule/tools.py b/loopy/schedule/tools.py index fd7d46876..b2a44c499 100644 --- a/loopy/schedule/tools.py +++ b/loopy/schedule/tools.py @@ -20,16 +20,16 @@ THE SOFTWARE. """ -from functools import cached_property import enum -from typing import Sequence, FrozenSet, Tuple, List, Set, Dict from dataclasses import dataclass +from functools import cached_property +from typing import Dict, FrozenSet, List, Sequence, Set, Tuple -from pytools import memoize_method import islpy as isl +from pytools import memoize_method -from loopy.kernel.data import AddressSpace, TemporaryVariable, ArrayArg from loopy.kernel import LoopKernel +from loopy.kernel.data import AddressSpace, ArrayArg, TemporaryVariable # {{{ block boundary finder @@ -40,7 +40,7 @@ def get_block_boundaries(schedule): :class:`loopy.schedule.BlockBeginItem`s to :class:`loopy.schedule.BlockEndItem`s and vice versa. """ - from loopy.schedule import (BeginBlockItem, EndBlockItem) + from loopy.schedule import BeginBlockItem, EndBlockItem block_bounds = {} active_blocks = [] for idx, sched_item in enumerate(schedule): @@ -308,8 +308,14 @@ def get_return_from_kernel_mapping(kernel): of the active sub-kernel at 'S'. """ from loopy.kernel import LoopKernel - from loopy.schedule import (RunInstruction, EnterLoop, LeaveLoop, - CallKernel, ReturnFromKernel, Barrier) + from loopy.schedule import ( + Barrier, + CallKernel, + EnterLoop, + LeaveLoop, + ReturnFromKernel, + RunInstruction, + ) assert isinstance(kernel, LoopKernel) assert isinstance(kernel.linearization, list) return_from_kernel_idxs = {} @@ -350,11 +356,14 @@ def _check_for_access_races(map_a, insn_a, map_b, insn_b, knl, callables_table, *unequal* global ids that access the same address. """ import pymbolic.primitives as p - from loopy.symbolic import isl_set_from_expr, aff_from_expr, aff_to_expr - from loopy.kernel.data import (filter_iname_tags_by_type, - HardwareConcurrentTag, - AddressSpace) + + from loopy.kernel.data import ( + AddressSpace, + HardwareConcurrentTag, + filter_iname_tags_by_type, + ) from loopy.kernel.tools import get_hw_axis_base_for_codegen + from loopy.symbolic import aff_from_expr, aff_to_expr, isl_set_from_expr assert address_space in [AddressSpace.LOCAL, AddressSpace.GLOBAL] @@ -538,9 +547,10 @@ def vars(self): @memoize_method def _get_access_maps(self, insn_id, access_dir): - from loopy.symbolic import BatchedAccessMapMapper from collections import defaultdict + from loopy.symbolic import BatchedAccessMapMapper + insn = self.kernel.id_to_insn[insn_id] exprs = list(insn.assignees) diff --git a/loopy/statistics.py b/loopy/statistics.py index 76d95454b..c9cf9d938 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -25,19 +25,18 @@ THE SOFTWARE. """ -from functools import partial, cached_property +from functools import cached_property, partial -from islpy import dim_type import islpy as isl +from islpy import dim_type from pymbolic.mapper import CombineMapper +from pytools import ImmutableRecord, memoize_method import loopy as lp -from loopy.kernel.data import ( - MultiAssignmentBase, TemporaryVariable, AddressSpace) -from loopy.diagnostic import warn_with_kernel, LoopyError -from loopy.symbolic import CoefficientCollector -from pytools import ImmutableRecord, memoize_method +from loopy.diagnostic import LoopyError, warn_with_kernel +from loopy.kernel.data import AddressSpace, MultiAssignmentBase, TemporaryVariable from loopy.kernel.function_interface import CallableKernel +from loopy.symbolic import CoefficientCollector from loopy.translation_unit import TranslationUnit @@ -535,7 +534,7 @@ def eval_and_sum(self, params=None): # {{{ subst_into_to_count_map def subst_into_guarded_pwqpolynomial(new_space, guarded_poly, subst_dict): - from loopy.isl_helpers import subst_into_pwqpolynomial, get_param_subst_domain + from loopy.isl_helpers import get_param_subst_domain, subst_into_pwqpolynomial poly = subst_into_pwqpolynomial( new_space, guarded_poly.pwqpolynomial, subst_dict) @@ -836,9 +835,11 @@ def map_call(self, expr): assert isinstance(expr.function, ResolvedFunction) clbl = self.callables_table[expr.function.name] - from loopy.kernel.function_interface import (CallableKernel, - get_kw_pos_association) from loopy.kernel.data import ValueArg + from loopy.kernel.function_interface import ( + CallableKernel, + get_kw_pos_association, + ) if isinstance(clbl, CallableKernel): sub_result = self.kernel_rec(clbl.subkernel) _, pos_to_kw = get_kw_pos_association(clbl.subkernel) @@ -1089,8 +1090,11 @@ def _get_lid_and_gid_strides(knl, array, index): from loopy.symbolic import get_dependencies my_inames = get_dependencies(index) & knl.all_inames() - from loopy.kernel.data import (LocalInameTag, GroupInameTag, - filter_iname_tags_by_type) + from loopy.kernel.data import ( + GroupInameTag, + LocalInameTag, + filter_iname_tags_by_type, + ) lid_to_iname = {} gid_to_iname = {} for iname in my_inames: @@ -1111,10 +1115,11 @@ def _get_lid_and_gid_strides(knl, array, index): # where l0, l1, l2, g0, g1, and g2 come from flattened index # [... + g2*gid2 + g1*gid1 + g0*gid0 + ... + l2*lid2 + l1*lid1 + l0*lid0] - from loopy.kernel.array import FixedStrideArrayDimTag from pymbolic.primitives import Variable - from loopy.symbolic import simplify_using_aff + from loopy.diagnostic import ExpressionNotAffineError + from loopy.kernel.array import FixedStrideArrayDimTag + from loopy.symbolic import simplify_using_aff def get_iname_strides(tag_to_iname_dict): tag_to_stride_dict = {} @@ -1520,7 +1525,7 @@ def get_unused_hw_axes_factor(knl, callables_table, insn, disregard_local_axes): g_used = set() l_used = set() - from loopy.kernel.data import LocalInameTag, GroupInameTag + from loopy.kernel.data import GroupInameTag, LocalInameTag for iname in insn.within_inames: tags = knl.iname_tags_of_type(iname, (LocalInameTag, GroupInameTag), max_num=1) @@ -1671,8 +1676,12 @@ def _get_op_map_for_single_kernel(knl, callables_table, op_map = op_counter.new_zero_poly_map() from loopy.kernel.instruction import ( - CallInstruction, CInstruction, Assignment, - NoOpInstruction, BarrierInstruction) + Assignment, + BarrierInstruction, + CallInstruction, + CInstruction, + NoOpInstruction, + ) for insn in knl.instructions: if within(knl, insn): @@ -1763,7 +1772,7 @@ def get_op_map(program, count_redundant_work=False, assert entrypoint in program.entrypoints - from loopy.preprocess import preprocess_program, infer_unknown_types + from loopy.preprocess import infer_unknown_types, preprocess_program program = preprocess_program(program) from loopy.match import parse_match @@ -1858,8 +1867,12 @@ def _get_mem_access_map_for_single_kernel(knl, callables_table, access_map = access_counter_g.new_zero_poly_map() from loopy.kernel.instruction import ( - CallInstruction, CInstruction, Assignment, - NoOpInstruction, BarrierInstruction) + Assignment, + BarrierInstruction, + CallInstruction, + CInstruction, + NoOpInstruction, + ) for insn in knl.instructions: if within(knl, insn): @@ -1986,7 +1999,7 @@ def get_mem_access_map(program, count_redundant_work=False, assert entrypoint in program.entrypoints - from loopy.preprocess import preprocess_program, infer_unknown_types + from loopy.preprocess import infer_unknown_types, preprocess_program program = preprocess_program(program) @@ -2013,8 +2026,14 @@ def _get_synchronization_map_for_single_kernel(knl, callables_table, knl = lp.get_one_linearized_kernel(knl, callables_table) - from loopy.schedule import (EnterLoop, LeaveLoop, Barrier, - CallKernel, ReturnFromKernel, RunInstruction) + from loopy.schedule import ( + Barrier, + CallKernel, + EnterLoop, + LeaveLoop, + ReturnFromKernel, + RunInstruction, + ) kernel_rec = partial(_get_synchronization_map_for_single_kernel, callables_table=callables_table, @@ -2100,7 +2119,7 @@ def get_synchronization_map(program, subgroup_size=None, entrypoint=None): entrypoint = list(program.entrypoints)[0] assert entrypoint in program.entrypoints - from loopy.preprocess import preprocess_program, infer_unknown_types + from loopy.preprocess import infer_unknown_types, preprocess_program program = preprocess_program(program) # Ordering restriction: preprocess might insert arguments to @@ -2167,7 +2186,7 @@ def gather_access_footprints(program, ignore_uncountable=False, entrypoint=None) raise NotImplementedError("Currently only supported for program with " "only one CallableKernel.") - from loopy.preprocess import preprocess_program, infer_unknown_types + from loopy.preprocess import infer_unknown_types, preprocess_program program = preprocess_program(program) # Ordering restriction: preprocess might insert arguments to @@ -2205,7 +2224,7 @@ def gather_access_footprint_bytes(program, ignore_uncountable=False): nonlinear indices) """ - from loopy.preprocess import preprocess_program, infer_unknown_types + from loopy.preprocess import infer_unknown_types, preprocess_program kernel = infer_unknown_types(program, expect_completion=True) from loopy.kernel import KernelState diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 8c1c8dce6..ebc784b8e 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -24,51 +24,47 @@ """ -from typing import AbstractSet, ClassVar, Mapping, Sequence, Tuple -from functools import reduce, cached_property -from sys import intern import re +from functools import cached_property, reduce +from sys import intern +from typing import AbstractSet, ClassVar, Mapping, Sequence, Tuple +import immutables import numpy as np -from pytools import (memoize, memoize_method, memoize_on_first_arg, - ImmutableRecord) -import pytools.lex -from pytools.tag import Taggable import islpy as isl -from islpy import dim_type - import pymbolic.primitives as p - +import pytools.lex +from islpy import dim_type from pymbolic.mapper import ( - CachedCombineMapper as CombineMapperBase, - CachedIdentityMapper as IdentityMapperBase, - IdentityMapper as UncachedIdentityMapperBase, - CachedWalkMapper as WalkMapperBase, - WalkMapper as UncachedWalkMapperBase, - CallbackMapper as CallbackMapperBase, - CSECachingMapperMixin, - ) -import immutables -from pymbolic.mapper.evaluator import \ - CachedEvaluationMapper as EvaluationMapperBase -from pymbolic.mapper.substitutor import \ - CachedSubstitutionMapper as SubstitutionMapperBase -from pymbolic.mapper.stringifier import \ - StringifyMapper as StringifyMapperBase -from pymbolic.mapper.dependency import \ - CachedDependencyMapper as DependencyMapperBase -from pymbolic.mapper.coefficient import \ - CoefficientCollector as CoefficientCollectorBase -from pymbolic.mapper.unifier import UnidirectionalUnifier \ - as UnidirectionalUnifierBase -from pymbolic.mapper.constant_folder import \ - ConstantFoldingMapper as ConstantFoldingMapperBase - + CachedCombineMapper as CombineMapperBase, + CachedIdentityMapper as IdentityMapperBase, + CachedWalkMapper as WalkMapperBase, + CallbackMapper as CallbackMapperBase, + CSECachingMapperMixin, + IdentityMapper as UncachedIdentityMapperBase, + WalkMapper as UncachedWalkMapperBase, +) +from pymbolic.mapper.coefficient import CoefficientCollector as CoefficientCollectorBase +from pymbolic.mapper.constant_folder import ( + ConstantFoldingMapper as ConstantFoldingMapperBase, +) +from pymbolic.mapper.dependency import CachedDependencyMapper as DependencyMapperBase +from pymbolic.mapper.evaluator import CachedEvaluationMapper as EvaluationMapperBase +from pymbolic.mapper.stringifier import StringifyMapper as StringifyMapperBase +from pymbolic.mapper.substitutor import ( + CachedSubstitutionMapper as SubstitutionMapperBase, +) +from pymbolic.mapper.unifier import UnidirectionalUnifier as UnidirectionalUnifierBase from pymbolic.parser import Parser as ParserBase -from loopy.diagnostic import LoopyError -from loopy.diagnostic import (ExpressionToAffineConversionError, - UnableToDetermineAccessRangeError) +from pytools import ImmutableRecord, memoize, memoize_method, memoize_on_first_arg +from pytools.tag import Taggable + +from loopy.diagnostic import ( + ExpressionToAffineConversionError, + LoopyError, + UnableToDetermineAccessRangeError, +) from loopy.typing import ExpressionT @@ -648,7 +644,7 @@ class TypeCast(LoopyExpressionBase): def __init__(self, type, child): super().__init__() - from loopy.types import to_loopy_type, NumpyType + from loopy.types import NumpyType, to_loopy_type type = to_loopy_type(type) if (not isinstance(type, NumpyType) @@ -1615,7 +1611,7 @@ def map_call(self, expr): elif name in ["minimum", "maximum"]: if len(expr.parameters) == 2: - from pymbolic.primitives import Min, Max + from pymbolic.primitives import Max, Min return { "minimum": Min, "maximum": Max @@ -1673,8 +1669,16 @@ def parse_float(self, s): return float(val) # generic float def parse_prefix(self, pstate): - from pymbolic.parser import (_PREC_UNARY, _less, _greater, _identifier, - _openbracket, _closebracket, _colon) + from pymbolic.parser import ( + _PREC_UNARY, + _closebracket, + _colon, + _greater, + _identifier, + _less, + _openbracket, + ) + import loopy as lp if pstate.is_next(_less): @@ -1942,6 +1946,7 @@ def pwaff_from_expr(space, expr, vars_to_zero=None): def with_aff_conversion_guard(f, space, expr, *args): import islpy as isl from pymbolic.mapper.evaluator import UnknownVariableError + from loopy.diagnostic import ExpressionNotAffineError err = None @@ -2042,8 +2047,8 @@ def qpolynomial_from_expr(space, expr): # {{{ simplify using aff def simplify_via_aff(expr): - from loopy.symbolic import aff_to_expr, guarded_aff_from_expr, get_dependencies from loopy.diagnostic import ExpressionToAffineConversionError + from loopy.symbolic import aff_to_expr, get_dependencies, guarded_aff_from_expr deps = sorted(get_dependencies(expr)) try: diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index 9e8211a03..48ab04f89 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -23,6 +23,7 @@ from __future__ import annotations + __copyright__ = "Copyright (C) 2015 Andreas Kloeckner" __license__ = """ @@ -46,15 +47,24 @@ """ -from typing import (Any, Tuple, Generic, TypeVar, Sequence, ClassVar, Optional, - TYPE_CHECKING) +from typing import ( + TYPE_CHECKING, + Any, + ClassVar, + Generic, + Optional, + Sequence, + Tuple, + TypeVar, +) + if TYPE_CHECKING: - from loopy.typing import ExpressionT from loopy.codegen import CodeGenerationState from loopy.codegen.result import CodeGenerationResult from loopy.target.execution import ExecutorBase - from loopy.translation_unit import TranslationUnit, FunctionIdT + from loopy.translation_unit import FunctionIdT, TranslationUnit + from loopy.typing import ExpressionT ASTType = TypeVar("ASTType") diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 9adfcbdb9..b9671303f 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -23,34 +23,47 @@ THE SOFTWARE. """ -from typing import cast, Tuple, Optional, Sequence, Any import re +from typing import Any, Optional, Sequence, Tuple, cast import numpy as np # noqa -from cgen import (Collection, Pointer, NestedDeclarator, Block, Generable, - Declarator, Const) +import pymbolic.primitives as p +from cgen import ( + Block, + Collection, + Const, + Declarator, + Generable, + NestedDeclarator, + Pointer, +) from cgen.mapper import IdentityMapper as CASTIdentityMapperBase from pymbolic.mapper.stringifier import PREC_NONE -import pymbolic.primitives as p from pytools import memoize_method -from loopy.target import TargetBase, ASTBuilderBase, DummyHostASTBuilder +from loopy.codegen import CodeGenerationState +from loopy.codegen.result import CodeGenerationResult from loopy.diagnostic import LoopyError, LoopyTypeError -from loopy.symbolic import IdentityMapper -from loopy.target.execution import ExecutorBase -from loopy.translation_unit import FunctionIdT, TranslationUnit -from loopy.types import NumpyType, LoopyType, to_loopy_type -from loopy.typing import ExpressionT from loopy.kernel import LoopKernel from loopy.kernel.array import ArrayBase, FixedStrideArrayDimTag -from loopy.kernel.data import (TemporaryVariable, AddressSpace, ArrayArg, - ConstantArg, ImageArg, ValueArg) +from loopy.kernel.data import ( + AddressSpace, + ArrayArg, + ConstantArg, + ImageArg, + TemporaryVariable, + ValueArg, +) from loopy.kernel.function_interface import ScalarCallable from loopy.schedule import CallKernel +from loopy.symbolic import IdentityMapper +from loopy.target import ASTBuilderBase, DummyHostASTBuilder, TargetBase +from loopy.target.execution import ExecutorBase from loopy.tools import remove_common_indentation -from loopy.codegen import CodeGenerationState -from loopy.codegen.result import CodeGenerationResult +from loopy.translation_unit import FunctionIdT, TranslationUnit +from loopy.types import LoopyType, NumpyType, to_loopy_type +from loopy.typing import ExpressionT __doc__ = """ @@ -421,7 +434,9 @@ def get_device_ast_builder(self): @memoize_method def get_dtype_registry(self): from loopy.target.c.compyte.dtypes import ( - DTypeRegistry, fill_registry_with_c_types) + DTypeRegistry, + fill_registry_with_c_types, + ) result = DTypeRegistry() fill_registry_with_c_types(result, respect_windows=False, include_bool=True) @@ -782,10 +797,7 @@ def get_function_definition( kernel = codegen_state.kernel assert kernel.linearization is not None - from cgen import ( - FunctionBody, - Initializer, - Line) + from cgen import FunctionBody, Initializer, Line result = [] @@ -880,12 +892,14 @@ def get_temporary_decls(self, codegen_state, schedule_index): # {{{ declare temporaries from cgen import Initializer, Line + # Getting the temporary variables that are needed for the current # sub-kernel. from loopy.schedule.tools import ( - temporaries_read_in_subkernel, - temporaries_written_in_subkernel, - supporting_temporary_names) + supporting_temporary_names, + temporaries_read_in_subkernel, + temporaries_written_in_subkernel, + ) subkernel_name = kernel.linearization[schedule_index].kernel_name sub_knl_temps = ( temporaries_read_in_subkernel(kernel, subkernel_name) @@ -1135,8 +1149,8 @@ def emit_assignment(self, codegen_state, insn): else: lhs_atomicity = None - from loopy.kernel.data import AtomicInit, AtomicUpdate from loopy.expression import dtype_to_type_context + from loopy.kernel.data import AtomicInit, AtomicUpdate lhs_code = ecm(insn.assignee, prec=PREC_NONE, type_context=None) rhs_type_context = dtype_to_type_context(kernel.target, lhs_dtype) @@ -1230,10 +1244,10 @@ def emit_sequential_loop(self, codegen_state, iname, iname_dtype, lbound, ubound, inner, hints): ecm = codegen_state.expression_to_code_mapper + from cgen import For, InlineInitializer from pymbolic import var - from pymbolic.primitives import Comparison from pymbolic.mapper.stringifier import PREC_NONE - from cgen import For, InlineInitializer + from pymbolic.primitives import Comparison loop = For( InlineInitializer( @@ -1263,7 +1277,7 @@ def emit_unroll_hint(self, value): def emit_initializer(self, codegen_state, dtype, name, val_str, is_const): decl = POD(self, dtype, name) - from cgen import Initializer, Const + from cgen import Const, Initializer if is_const: decl = Const(decl) @@ -1352,8 +1366,10 @@ def get_device_ast_builder(self): @memoize_method def get_dtype_registry(self): from loopy.target.c.compyte.dtypes import ( - DTypeRegistry, fill_registry_with_c99_stdint_types, - fill_registry_with_c99_complex_types) + DTypeRegistry, + fill_registry_with_c99_complex_types, + fill_registry_with_c99_stdint_types, + ) result = DTypeRegistry() fill_registry_with_c99_stdint_types(result) fill_registry_with_c99_complex_types(result) diff --git a/loopy/target/c/c_execution.py b/loopy/target/c/c_execution.py index ebf2fd611..52937766b 100644 --- a/loopy/target/c/c_execution.py +++ b/loopy/target/c/c_execution.py @@ -20,33 +20,37 @@ THE SOFTWARE. """ -from typing import Callable, Any, Union, Tuple, Sequence, Optional -import tempfile -import os import ctypes +import logging +import os +import tempfile from dataclasses import dataclass +from typing import Any, Callable, Optional, Sequence, Tuple, Union +import numpy as np +from codepy.jit import compile_from_string +from codepy.toolchain import GCCToolchain, ToolchainGuessError, guess_toolchain from immutables import Map + from pytools import memoize_method -from pytools.codegen import Indentation, CodeGenerator +from pytools.codegen import CodeGenerator, Indentation from pytools.prefork import ExecError -from codepy.toolchain import guess_toolchain, ToolchainGuessError, GCCToolchain -from codepy.jit import compile_from_string -import numpy as np - -from loopy.typing import ExpressionT -from loopy.types import LoopyType +from loopy.codegen.result import GeneratedProgram from loopy.kernel import LoopKernel from loopy.kernel.array import ArrayBase from loopy.kernel.data import ArrayArg from loopy.schedule.tools import KernelArgInfo -from loopy.codegen.result import GeneratedProgram +from loopy.target.execution import ( + ExecutionWrapperGeneratorBase, + ExecutorBase, + get_highlighted_code, +) from loopy.translation_unit import TranslationUnit -from loopy.target.execution import (ExecutorBase, - ExecutionWrapperGeneratorBase, get_highlighted_code) +from loopy.types import LoopyType +from loopy.typing import ExpressionT + -import logging logger = logging.getLogger(__name__) DEF_EVEN_DIV_FUNCTION = """ diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index 8f0213bf1..276407fc1 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -22,28 +22,34 @@ from typing import Optional -import numpy as np -from pymbolic.mapper import RecursiveMapper, IdentityMapper -from pymbolic.mapper.stringifier import (PREC_NONE, PREC_CALL, PREC_PRODUCT, - PREC_SHIFT, - PREC_UNARY, PREC_LOGICAL_OR, PREC_LOGICAL_AND, - PREC_BITWISE_AND, PREC_BITWISE_OR, PREC_BITWISE_XOR) +import numpy as np import islpy as isl import pymbolic.primitives as p from pymbolic import var - - -from loopy.expression import dtype_to_type_context -from loopy.type_inference import TypeReader +from pymbolic.mapper import IdentityMapper, RecursiveMapper +from pymbolic.mapper.stringifier import ( + PREC_BITWISE_AND, + PREC_BITWISE_OR, + PREC_BITWISE_XOR, + PREC_CALL, + PREC_LOGICAL_AND, + PREC_LOGICAL_OR, + PREC_NONE, + PREC_PRODUCT, + PREC_SHIFT, + PREC_UNARY, +) from loopy.diagnostic import LoopyError +from loopy.expression import dtype_to_type_context +from loopy.symbolic import TypeCast +from loopy.target.c import CExpression from loopy.tools import is_integer +from loopy.type_inference import TypeReader from loopy.types import LoopyType -from loopy.target.c import CExpression from loopy.typing import ExpressionT -from loopy.symbolic import TypeCast __doc__ = """ @@ -140,7 +146,7 @@ def __call__(self, expr, prec=None, type_context=None, needed_dtype=None): # }}} def map_variable(self, expr, type_context): - from loopy.kernel.data import ValueArg, AddressSpace + from loopy.kernel.data import AddressSpace, ValueArg def postproc(x): return x @@ -218,9 +224,9 @@ def make_var(name): ary = self.find_array(expr) - from loopy.kernel.array import get_access_info from pymbolic import evaluate + from loopy.kernel.array import get_access_info from loopy.symbolic import simplify_using_aff index_tuple = tuple( simplify_using_aff(self.kernel, idx) for idx in expr.index_tuple) @@ -229,8 +235,7 @@ def make_var(name): lambda expr: evaluate(expr, self.codegen_state.var_subst_map), self.codegen_state.vectorization_info) - from loopy.kernel.data import ( - ImageArg, ArrayArg, TemporaryVariable, ConstantArg) + from loopy.kernel.data import ArrayArg, ConstantArg, ImageArg, TemporaryVariable if isinstance(ary, ImageArg): extra_axes = 0 @@ -647,8 +652,8 @@ def map_constant(self, expr, prec): return repr(expr) def map_call(self, expr, enclosing_prec): + from pymbolic.mapper.stringifier import PREC_CALL, PREC_NONE from pymbolic.primitives import Variable - from pymbolic.mapper.stringifier import PREC_NONE, PREC_CALL if isinstance(expr.function, Variable): func = expr.function.name else: @@ -698,7 +703,7 @@ def map_min(self, expr, enclosing_prec): map_max = map_min def map_if(self, expr, enclosing_prec): - from pymbolic.mapper.stringifier import PREC_NONE, PREC_CALL + from pymbolic.mapper.stringifier import PREC_CALL, PREC_NONE return "({} ? {} : {})".format( # Force parentheses around the condition to prevent compiler # warnings regarding precedence (e.g. with POCL 1.8/LLVM 12): diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index 1c5e601d4..3155b3bff 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -23,22 +23,23 @@ THE SOFTWARE. """ -from typing import Tuple, Sequence +from typing import Sequence, Tuple import numpy as np + +from cgen import Const, Declarator, Generable from pymbolic import var from pytools import memoize_method -from cgen import Declarator, Const, Generable -from loopy.target.c import CFamilyTarget, CFamilyASTBuilder -from loopy.target.c.codegen.expression import ExpressionToCExpressionMapper +from loopy.codegen import CodeGenerationState +from loopy.codegen.result import CodeGenerationResult from loopy.diagnostic import LoopyError, LoopyTypeError -from loopy.types import NumpyType from loopy.kernel.array import ArrayBase, FixedStrideArrayDimTag, VectorArrayDimTag -from loopy.kernel.data import AddressSpace, ImageArg, ConstantArg, ArrayArg +from loopy.kernel.data import AddressSpace, ArrayArg, ConstantArg, ImageArg from loopy.kernel.function_interface import ScalarCallable -from loopy.codegen.result import CodeGenerationResult -from loopy.codegen import CodeGenerationState +from loopy.target.c import CFamilyASTBuilder, CFamilyTarget +from loopy.target.c.codegen.expression import ExpressionToCExpressionMapper +from loopy.types import NumpyType # {{{ vector types @@ -234,8 +235,10 @@ def get_device_ast_builder(self): @memoize_method def get_dtype_registry(self): - from loopy.target.c.compyte.dtypes import (DTypeRegistry, - fill_registry_with_c_types) + from loopy.target.c.compyte.dtypes import ( + DTypeRegistry, + fill_registry_with_c_types, + ) result = DTypeRegistry() fill_registry_with_c_types(result, respect_windows=True) @@ -466,9 +469,9 @@ def get_image_arg_declarator( def emit_atomic_update(self, codegen_state, lhs_atomicity, lhs_var, lhs_expr, rhs_expr, lhs_dtype, rhs_type_context): - from pymbolic.primitives import Sum from cgen import Statement from pymbolic.mapper.stringifier import PREC_NONE + from pymbolic.primitives import Sum if isinstance(lhs_dtype, NumpyType) and lhs_dtype.numpy_dtype in [ np.int32, np.int64, np.float32, np.float64]: @@ -484,7 +487,8 @@ def emit_atomic_update(self, codegen_state, lhs_atomicity, lhs_var, return Statement("atomicAdd(&{}, {})".format( lhs_expr_code, rhs_expr_code)) else: - from cgen import Block, DoWhile, Assign + from cgen import Assign, Block, DoWhile + from loopy.target.c import POD old_val_var = codegen_state.var_name_generator("loopy_old_val") new_val_var = codegen_state.var_name_generator("loopy_new_val") @@ -498,8 +502,9 @@ def emit_atomic_update(self, codegen_state, lhs_atomicity, lhs_var, lhs_expr_code = ecm(lhs_expr, prec=PREC_NONE, type_context=None) - from pymbolic.mapper.substitutor import make_subst_func from pymbolic import var + from pymbolic.mapper.substitutor import make_subst_func + from loopy.symbolic import SubstitutionMapper subst = SubstitutionMapper( diff --git a/loopy/target/execution.py b/loopy/target/execution.py index fe0c7442b..cb081a3e5 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -21,30 +21,43 @@ """ -from typing import (Callable, Mapping, Tuple, Union, Set, FrozenSet, List, Dict, - Optional, Sequence, Any) +import logging +from abc import ABC, abstractmethod from dataclasses import dataclass +from typing import ( + Any, + Callable, + Dict, + FrozenSet, + List, + Mapping, + Optional, + Sequence, + Set, + Tuple, + Union, +) from immutables import Map -from abc import ABC, abstractmethod -from loopy.diagnostic import LoopyError +from pymbolic import var +from pytools.codegen import CodeGenerator, Indentation from pytools.py_codegen import PythonFunctionGenerator -from pytools.codegen import Indentation, CodeGenerator -from pymbolic import var +from loopy.diagnostic import LoopyError + -import logging logger = logging.getLogger(__name__) from pytools.persistent_dict import WriteOncePersistentDict -from loopy.tools import LoopyKeyBuilder, caches -from loopy.typing import ExpressionT -from loopy.types import LoopyType, NumpyType + from loopy.kernel import KernelState, LoopKernel -from loopy.kernel.data import _ArraySeparationInfo, ArrayArg, auto -from loopy.translation_unit import TranslationUnit +from loopy.kernel.data import ArrayArg, _ArraySeparationInfo, auto from loopy.schedule.tools import KernelArgInfo +from loopy.tools import LoopyKeyBuilder, caches +from loopy.translation_unit import TranslationUnit +from loopy.types import LoopyType, NumpyType +from loopy.typing import ExpressionT from loopy.version import DATA_MODEL_VERSION @@ -154,10 +167,10 @@ def python_dtype_str(self, gen: CodeGenerator, numpy_dtype): def generate_integer_arg_finding_from_array_data( self, gen: CodeGenerator, kernel: LoopKernel, kai: KernelArgInfo ) -> None: - from loopy.kernel.data import ArrayArg + from loopy.diagnostic import ParameterFinderWarning from loopy.kernel.array import get_strides + from loopy.kernel.data import ArrayArg from loopy.symbolic import DependencyMapper, StringifyMapper - from loopy.diagnostic import ParameterFinderWarning dep_map = DependencyMapper() # {{{ find equations @@ -415,9 +428,8 @@ def generate_arg_setup( ) -> Sequence[str]: options = kernel.options import loopy as lp - - from loopy.kernel.data import ImageArg from loopy.kernel.array import ArrayBase + from loopy.kernel.data import ImageArg from loopy.symbolic import StringifyMapper from loopy.types import NumpyType @@ -941,8 +953,8 @@ def get_highlighted_code(text, python=False): except ImportError: return text else: - from pygments.lexers import CLexer, PythonLexer from pygments.formatters import TerminalFormatter + from pygments.lexers import CLexer, PythonLexer return highlight(text, CLexer() if not python else PythonLexer(), TerminalFormatter()) diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index 217f7a795..ce2a150b0 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -24,25 +24,26 @@ """ -from typing import cast, Tuple, Sequence +from typing import Sequence, Tuple, cast import numpy as np # noqa + import pymbolic.primitives as p +from cgen import Collection, Const, Declarator, Generable from pymbolic import var from pymbolic.mapper.stringifier import PREC_NONE from pytools import memoize_method -from cgen import Generable, Declarator, Const, Collection -from loopy.target.c import CFamilyTarget, CFamilyASTBuilder -from loopy.target.c.codegen.expression import ExpressionToCExpressionMapper +from loopy.codegen import CodeGenerationState +from loopy.codegen.result import CodeGenerationResult from loopy.diagnostic import LoopyError -from loopy.symbolic import Literal +from loopy.kernel.data import AddressSpace, ArrayArg, TemporaryVariable from loopy.schedule import CallKernel -from loopy.typing import ExpressionT +from loopy.symbolic import Literal +from loopy.target.c import CFamilyASTBuilder, CFamilyTarget +from loopy.target.c.codegen.expression import ExpressionToCExpressionMapper from loopy.types import LoopyType -from loopy.kernel.data import AddressSpace, TemporaryVariable, ArrayArg -from loopy.codegen import CodeGenerationState -from loopy.codegen.result import CodeGenerationResult +from loopy.typing import ExpressionT # {{{ expression mapper @@ -116,9 +117,10 @@ def map_subscript(self, expr, type_context): gsize, lsize = self.kernel.get_grid_size_upper_bounds_as_exprs() if lsize: lsize, = lsize - from loopy.kernel.array import get_access_info from pymbolic import evaluate + from loopy.kernel.array import get_access_info + access_info = get_access_info(self.kernel, ary, expr.index, lambda expr: evaluate(expr, self.codegen_state.var_subst_map), self.codegen_state.vectorization_info) @@ -216,7 +218,7 @@ def get_function_declaration( codegen_state.kernel.linearization[schedule_index] ).kernel_name - from cgen import (FunctionDeclaration, Value) + from cgen import FunctionDeclaration, Value from cgen.ispc import ISPCExport, ISPCTask if codegen_state.is_entrypoint: @@ -257,7 +259,7 @@ def get_kernel_call(self, codegen_state: CodeGenerationState, from pymbolic.mapper.stringifier import PREC_NONE result = [] - from cgen import Statement as S, Block + from cgen import Block, Statement as S if lsize: result.append( S( @@ -320,7 +322,7 @@ def get_value_arg_declaraotor( def get_array_arg_declarator( self, arg: ArrayArg, is_written: bool) -> Declarator: # FIXME restrict? - from cgen.ispc import ISPCUniformPointer, ISPCUniform + from cgen.ispc import ISPCUniform, ISPCUniformPointer decl = ISPCUniform( ISPCUniformPointer(self.get_array_base_declarator(arg))) @@ -371,9 +373,10 @@ def emit_assignment(self, codegen_state, insn): if insn.atomicity: raise NotImplementedError("atomic ops in ISPC") - from loopy.expression import dtype_to_type_context from pymbolic.mapper.stringifier import PREC_NONE + from loopy.expression import dtype_to_type_context + rhs_type_context = dtype_to_type_context(kernel.target, lhs_dtype) rhs_code = ecm(insn.expression, prec=PREC_NONE, type_context=rhs_type_context, @@ -387,9 +390,9 @@ def emit_assignment(self, codegen_state, insn): if UseStreamingStoreTag() in insn.tags: ary = ecm.find_array(lhs) - from loopy.kernel.array import get_access_info from pymbolic import evaluate + from loopy.kernel.array import get_access_info from loopy.symbolic import simplify_using_aff index_tuple = tuple( simplify_using_aff(kernel, idx) for idx in lhs.index_tuple) @@ -408,7 +411,7 @@ def emit_assignment(self, codegen_state, insn): raise LoopyError("streaming stores must have a subscript") subscript, = access_info.subscripts - from pymbolic.primitives import Sum, flattened_sum, Variable + from pymbolic.primitives import Sum, Variable, flattened_sum if isinstance(subscript, Sum): terms = subscript.children else: @@ -479,12 +482,11 @@ def emit_sequential_loop(self, codegen_state, iname, iname_dtype, lbound, ubound, inner, hints): ecm = codegen_state.expression_to_code_mapper - from loopy.target.c import POD - - from pymbolic.mapper.stringifier import PREC_NONE from cgen import For, InlineInitializer - from cgen.ispc import ISPCUniform + from pymbolic.mapper.stringifier import PREC_NONE + + from loopy.target.c import POD loop = For( InlineInitializer( diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 384263230..e2f3ecda2 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -23,23 +23,23 @@ THE SOFTWARE. """ -from typing import Tuple, Sequence +from typing import Sequence, Tuple import numpy as np + +from cgen import Declarator, Generable from pymbolic import var from pytools import memoize_method -from cgen import Declarator, Generable -from loopy.target.c import CFamilyTarget, CFamilyASTBuilder -from loopy.target.c.codegen.expression import ExpressionToCExpressionMapper -from loopy.diagnostic import LoopyError, LoopyTypeError -from loopy.types import NumpyType -from loopy.target.c import DTypeRegistryWrapper -from loopy.kernel.array import VectorArrayDimTag, FixedStrideArrayDimTag, ArrayBase -from loopy.kernel.data import AddressSpace, ImageArg, ConstantArg -from loopy.kernel.function_interface import ScalarCallable from loopy.codegen import CodeGenerationState from loopy.codegen.result import CodeGenerationResult +from loopy.diagnostic import LoopyError, LoopyTypeError +from loopy.kernel.array import ArrayBase, FixedStrideArrayDimTag, VectorArrayDimTag +from loopy.kernel.data import AddressSpace, ConstantArg, ImageArg +from loopy.kernel.function_interface import ScalarCallable +from loopy.target.c import CFamilyASTBuilder, CFamilyTarget, DTypeRegistryWrapper +from loopy.target.c.codegen.expression import ExpressionToCExpressionMapper +from loopy.types import NumpyType # {{{ dtype registry wrappers @@ -573,8 +573,10 @@ def get_device_ast_builder(self): @memoize_method def get_dtype_registry(self): - from loopy.target.c.compyte.dtypes import (DTypeRegistry, - fill_registry_with_opencl_c_types) + from loopy.target.c.compyte.dtypes import ( + DTypeRegistry, + fill_registry_with_opencl_c_types, + ) result = DTypeRegistry() fill_registry_with_opencl_c_types(result) @@ -672,7 +674,7 @@ def _wrap_kernel_decl( def generate_top_of_body(self, codegen_state): from loopy.kernel.data import ImageArg if any(isinstance(arg, ImageArg) for arg in codegen_state.kernel.args): - from cgen import Value, Const, Initializer + from cgen import Const, Initializer, Value return [ Initializer(Const(Value("sampler_t", "loopy_sampler")), "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP " @@ -724,7 +726,7 @@ def wrap_decl_for_address_space( % address_space) def wrap_global_constant(self, decl: Declarator) -> Declarator: - from cgen.opencl import CLGlobal, CLConstant + from cgen.opencl import CLConstant, CLGlobal assert isinstance(decl, CLGlobal) decl = decl.subdecl @@ -799,12 +801,13 @@ def emit_atomic_update(self, codegen_state, lhs_atomicity, lhs_var, if isinstance(lhs_dtype, NumpyType) and lhs_dtype.numpy_dtype in [ np.int32, np.int64, np.float32, np.float64]: - from cgen import Block, DoWhile, Assign + from cgen import Assign, Block, DoWhile + from loopy.target.c import POD old_val_var = codegen_state.var_name_generator("loopy_old_val") new_val_var = codegen_state.var_name_generator("loopy_new_val") - from loopy.kernel.data import TemporaryVariable, AddressSpace + from loopy.kernel.data import AddressSpace, TemporaryVariable ecm = codegen_state.expression_to_code_mapper.with_assignments( { old_val_var: TemporaryVariable(old_val_var, lhs_dtype, @@ -815,8 +818,9 @@ def emit_atomic_update(self, codegen_state, lhs_atomicity, lhs_var, lhs_expr_code = ecm(lhs_expr, prec=PREC_NONE, type_context=None) - from pymbolic.mapper.substitutor import make_subst_func from pymbolic import var + from pymbolic.mapper.substitutor import make_subst_func + from loopy.symbolic import SubstitutionMapper subst = SubstitutionMapper( @@ -844,7 +848,7 @@ def emit_atomic_update(self, codegen_state, lhs_atomicity, lhs_var, else: raise AssertionError() - from loopy.kernel.data import (TemporaryVariable, ArrayArg) + from loopy.kernel.data import ArrayArg, TemporaryVariable if ( isinstance(lhs_var, ArrayArg) and diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index edf3c9d93..a8a68fba0 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -1,5 +1,6 @@ from __future__ import annotations + """OpenCL target integrated with PyOpenCL.""" __copyright__ = "Copyright (C) 2015 Andreas Kloeckner" @@ -24,33 +25,51 @@ THE SOFTWARE. """ +import logging +from typing import TYPE_CHECKING, Any, List, Optional, Sequence, Tuple, Union, cast from warnings import warn -from typing import Sequence, Tuple, List, Union, Optional, cast, Any, TYPE_CHECKING import numpy as np -import pymbolic.primitives as p + import genpy -from cgen import (Generable, Pointer, Const, FunctionBody, Collection, Initializer, - Line, Block) +import pymbolic.primitives as p +from cgen import ( + Block, + Collection, + Const, + FunctionBody, + Generable, + Initializer, + Line, + Pointer, +) from cgen.opencl import CLGlobal -from loopy.target.opencl import (OpenCLTarget, OpenCLCASTBuilder, - ExpressionToOpenCLCExpressionMapper) +from loopy.codegen import CodeGenerationState +from loopy.codegen.result import CodeGenerationResult +from loopy.diagnostic import LoopyError, LoopyTypeError +from loopy.kernel import LoopKernel +from loopy.kernel.data import ( + ArrayArg, + ConstantArg, + ImageArg, + TemporaryVariable, + ValueArg, +) +from loopy.kernel.function_interface import ScalarCallable +from loopy.schedule import CallKernel +from loopy.target.opencl import ( + ExpressionToOpenCLCExpressionMapper, + OpenCLCASTBuilder, + OpenCLTarget, +) from loopy.target.pyopencl_execution import PyOpenCLExecutor from loopy.target.python import PythonASTBuilderBase -from loopy.kernel import LoopKernel from loopy.translation_unit import FunctionIdT, TranslationUnit from loopy.types import NumpyType from loopy.typing import ExpressionT -from loopy.diagnostic import LoopyError, LoopyTypeError -from loopy.kernel.function_interface import ScalarCallable -from loopy.kernel.data import ( - TemporaryVariable, ValueArg, ArrayArg, ImageArg, ConstantArg) -from loopy.schedule import CallKernel -from loopy.codegen import CodeGenerationState -from loopy.codegen.result import CodeGenerationResult -import logging + logger = logging.getLogger(__name__) if TYPE_CHECKING: @@ -550,8 +569,9 @@ def get_dtype_registry(self): result = TYPE_REGISTRY from loopy.target.opencl import ( - DTypeRegistryWrapperWithCL1Atomics, - DTypeRegistryWrapperWithInt8ForBool) + DTypeRegistryWrapperWithCL1Atomics, + DTypeRegistryWrapperWithInt8ForBool, + ) result = DTypeRegistryWrapperWithInt8ForBool(result) if self.atomics_flavor == "cl1": @@ -621,11 +641,11 @@ def generate_value_arg_setup( ) -> genpy.Suite: options = kernel.options + from genpy import If, Raise, Statement as S, Suite + import loopy as lp from loopy.kernel.array import ArrayBase - from genpy import If, Raise, Statement as S, Suite - result: List[genpy.Generable] = [] gen = result.append @@ -707,9 +727,10 @@ def add_buf_arg(arg_idx, typechar, expr_str): def generate_array_arg_setup( kernel: LoopKernel, passed_names: Sequence[str], ) -> genpy.Generable: - from loopy.kernel.array import ArrayBase from genpy import Statement as S, Suite + from loopy.kernel.array import ArrayBase + result: List[genpy.Generable] = [] gen = result.append @@ -755,7 +776,7 @@ def get_function_definition( + list(kai.passed_arg_names) + ["wait_for=None", "allocator=None"]) - from genpy import (For, Function, Suite, Return, Line, Statement as S) + from genpy import For, Function, Line, Return, Statement as S, Suite return Function( codegen_result.current_program(codegen_state).name, args, @@ -794,7 +815,6 @@ def _get_global_temporaries(self, codegen_state): def get_temporary_decls(self, codegen_state, schedule_index): from genpy import Assign, Comment, Line - from pymbolic.mapper.stringifier import PREC_NONE ecm = self.get_expression_to_code_mapper(codegen_state) @@ -835,7 +855,7 @@ def get_kernel_call( subkernel_name: str, gsize: Tuple[ExpressionT, ...], lsize: Tuple[ExpressionT, ...] ) -> genpy.Suite: - from genpy import Suite, Assign, Assert, Line, Comment + from genpy import Assert, Assign, Comment, Line, Suite kernel = codegen_state.kernel @@ -909,9 +929,8 @@ def get_kernel_call( cl_arg_count = len(skai.passed_names) overflow_args_code = Suite([]) - from pymbolic.mapper.stringifier import PREC_NONE - import pyopencl.version as cl_ver + from pymbolic.mapper.stringifier import PREC_NONE if cl_ver.VERSION < (2020, 2): from warnings import warn warn("Your kernel invocation will likely fail because your " @@ -1089,7 +1108,7 @@ def get_function_declaration( codegen_state.kernel.linearization[schedule_index] ).kernel_name - from cgen import FunctionDeclaration, Value, Struct + from cgen import FunctionDeclaration, Struct, Value name = codegen_result.current_program(codegen_state).name if self.target.fortran_abi: @@ -1190,8 +1209,7 @@ def get_expression_to_c_expression_mapper(self, codegen_state): class VolatileMemPyOpenCLCASTBuilder(PyOpenCLCASTBuilder): def get_expression_to_c_expression_mapper(self, codegen_state): - from loopy.target.opencl import \ - VolatileMemExpressionToOpenCLCExpressionMapper + from loopy.target.opencl import VolatileMemExpressionToOpenCLCExpressionMapper return VolatileMemExpressionToOpenCLCExpressionMapper(codegen_state) diff --git a/loopy/target/pyopencl_execution.py b/loopy/target/pyopencl_execution.py index 02dbcfc89..6c9dc1dae 100644 --- a/loopy/target/pyopencl_execution.py +++ b/loopy/target/pyopencl_execution.py @@ -21,23 +21,24 @@ """ -from typing import Sequence, Tuple, Union, Callable, Any, Optional, TYPE_CHECKING +import logging from dataclasses import dataclass +from typing import TYPE_CHECKING, Any, Callable, Optional, Sequence, Tuple, Union import numpy as np from immutables import Map from pytools import memoize_method -from pytools.codegen import Indentation, CodeGenerator +from pytools.codegen import CodeGenerator, Indentation -from loopy.types import LoopyType -from loopy.typing import ExpressionT from loopy.kernel import LoopKernel from loopy.kernel.data import ArrayArg from loopy.schedule.tools import KernelArgInfo -from loopy.target.execution import ( - ExecutorBase, ExecutionWrapperGeneratorBase) -import logging +from loopy.target.execution import ExecutionWrapperGeneratorBase, ExecutorBase +from loopy.types import LoopyType +from loopy.typing import ExpressionT + + logger = logging.getLogger(__name__) diff --git a/loopy/target/python.py b/loopy/target/python.py index 5760b56d5..291a92868 100644 --- a/loopy/target/python.py +++ b/loopy/target/python.py @@ -27,16 +27,16 @@ import numpy as np +from genpy import Collection, Generable, Suite from pymbolic.mapper import Mapper from pymbolic.mapper.stringifier import StringifyMapper -from genpy import Generable, Suite, Collection -from loopy.type_inference import TypeReader -from loopy.kernel.data import ValueArg -from loopy.diagnostic import LoopyError # noqa -from loopy.target import ASTBuilderBase from loopy.codegen import CodeGenerationState from loopy.codegen.result import CodeGenerationResult +from loopy.diagnostic import LoopyError # noqa +from loopy.kernel.data import ValueArg +from loopy.target import ASTBuilderBase +from loopy.type_inference import TypeReader # {{{ expression to code @@ -195,8 +195,8 @@ def get_temporary_decls(self, codegen_state, schedule_index): result = [] - from pymbolic.mapper.stringifier import PREC_NONE from genpy import Assign + from pymbolic.mapper.stringifier import PREC_NONE for tv in sorted( kernel.temporary_variables.values(), @@ -234,8 +234,8 @@ def emit_sequential_loop(self, codegen_state, iname, iname_dtype, lbound, ubound, inner, hints): ecm = codegen_state.expression_to_code_mapper - from pymbolic.mapper.stringifier import PREC_NONE, PREC_SUM from genpy import For + from pymbolic.mapper.stringifier import PREC_NONE, PREC_SUM if hints: raise ValueError("hints for python loops not supported") @@ -279,8 +279,8 @@ def emit_assignment(self, codegen_state, insn): if insn.atomicity: raise NotImplementedError("atomic ops in Python") - from pymbolic.mapper.stringifier import PREC_NONE from genpy import Assign + from pymbolic.mapper.stringifier import PREC_NONE return Assign( ecm(insn.assignee, prec=PREC_NONE, type_context=None), diff --git a/loopy/tools.py b/loopy/tools.py index 08f36794b..50a523ee8 100644 --- a/loopy/tools.py +++ b/loopy/tools.py @@ -20,23 +20,31 @@ THE SOFTWARE. """ -from typing import List import collections.abc as abc +import logging from functools import cached_property +from sys import intern +from typing import List +import numpy as np from immutables import Map + import islpy as isl -import numpy as np -from pytools import memoize_method, ProcessLogger -from pytools.persistent_dict import ( - KeyBuilder as KeyBuilderBase, WriteOncePersistentDict) -from loopy.symbolic import (UncachedWalkMapper as LoopyWalkMapper, - RuleAwareIdentityMapper) from pymbolic.mapper.persistent_hash import ( - PersistentHashWalkMapper as PersistentHashWalkMapperBase) -from sys import intern + PersistentHashWalkMapper as PersistentHashWalkMapperBase, +) +from pytools import ProcessLogger, memoize_method +from pytools.persistent_dict import ( + KeyBuilder as KeyBuilderBase, + WriteOncePersistentDict, +) + +from loopy.symbolic import ( + RuleAwareIdentityMapper, + UncachedWalkMapper as LoopyWalkMapper, +) + -import logging logger = logging.getLogger(__name__) @@ -639,7 +647,7 @@ def __hash__(self): def unpickles_equally(obj): - from pickle import loads, dumps + from pickle import dumps, loads return loads(dumps(obj)) == obj @@ -696,8 +704,8 @@ def map_call(self, expr, expn_state): def _unresolve_callables(kernel, callables_table): - from loopy.symbolic import SubstitutionRuleMappingContext from loopy.kernel import KernelState + from loopy.symbolic import SubstitutionRuleMappingContext vng = kernel.get_var_name_generator() rule_mapping_context = SubstitutionRuleMappingContext(kernel.substitutions, @@ -713,7 +721,8 @@ def _unresolve_callables(kernel, callables_table): def _kernel_to_python(kernel, is_entrypoint=False, var_name="kernel"): from mako.template import Template - from loopy.kernel.instruction import MultiAssignmentBase, BarrierInstruction + + from loopy.kernel.instruction import BarrierInstruction, MultiAssignmentBase options = {} # options: mapping from insn_id to str of options @@ -898,12 +907,14 @@ def clear_in_mem_caches() -> None: # {{{ memoize_on_disk def memoize_on_disk(func, key_builder_t=LoopyKeyBuilder): - from loopy.version import DATA_MODEL_VERSION from functools import wraps + + import pymbolic.primitives as prim from pytools.persistent_dict import WriteOncePersistentDict - from loopy.translation_unit import TranslationUnit + from loopy.kernel import LoopKernel - import pymbolic.primitives as prim + from loopy.translation_unit import TranslationUnit + from loopy.version import DATA_MODEL_VERSION transform_cache = WriteOncePersistentDict( ("loopy-memoize-cache-" diff --git a/loopy/transform/add_barrier.py b/loopy/transform/add_barrier.py index 7a220418f..73bd5cdde 100644 --- a/loopy/transform/add_barrier.py +++ b/loopy/transform/add_barrier.py @@ -21,11 +21,12 @@ """ +from loopy.kernel import LoopKernel from loopy.kernel.instruction import BarrierInstruction from loopy.match import parse_match from loopy.transform.instruction import add_dependency from loopy.translation_unit import for_each_kernel -from loopy.kernel import LoopKernel + __doc__ = """ .. currentmodule:: loopy diff --git a/loopy/transform/arithmetic.py b/loopy/transform/arithmetic.py index 65576bad2..b527c087b 100644 --- a/loopy/transform/arithmetic.py +++ b/loopy/transform/arithmetic.py @@ -22,9 +22,8 @@ from loopy.diagnostic import LoopyError - -from loopy.translation_unit import for_each_kernel from loopy.kernel import LoopKernel +from loopy.translation_unit import for_each_kernel # {{{ fold constants @@ -104,10 +103,21 @@ def map_ax_name_to_index(ax): # }}} from pymbolic.mapper.substitutor import make_subst_func - from pymbolic.primitives import (Sum, Product, is_zero, - flattened_sum, flattened_product, Subscript, Variable) - from loopy.symbolic import (get_dependencies, SubstitutionMapper, - UnidirectionalUnifier) + from pymbolic.primitives import ( + Product, + Subscript, + Sum, + Variable, + flattened_product, + flattened_sum, + is_zero, + ) + + from loopy.symbolic import ( + SubstitutionMapper, + UnidirectionalUnifier, + get_dependencies, + ) # {{{ common factor key list maintenance diff --git a/loopy/transform/array_buffer_map.py b/loopy/transform/array_buffer_map.py index f04719c77..ec3737233 100644 --- a/loopy/transform/array_buffer_map.py +++ b/loopy/transform/array_buffer_map.py @@ -21,18 +21,19 @@ """ -from dataclasses import dataclass, replace from abc import ABC, abstractmethod -from typing import Optional, Callable, Sequence, Tuple, Any +from dataclasses import dataclass, replace +from typing import Any, Callable, Optional, Sequence, Tuple + from typing_extensions import Self + import islpy as isl from islpy import dim_type -from loopy.symbolic import (get_dependencies, SubstitutionMapper) +from pymbolic import var from pymbolic.mapper.substitutor import make_subst_func - from pytools import memoize_method -from pymbolic import var +from loopy.symbolic import SubstitutionMapper, get_dependencies from loopy.typing import ExpressionT @@ -354,7 +355,7 @@ def augment_domain_with_sweep(self, domain, new_non1_storage_axis_names, domain = domain & renamed_aug_domain - from loopy.isl_helpers import convexify, boxify + from loopy.isl_helpers import boxify, convexify if boxify_sweep: return boxify(self.kernel.cache_manager, domain, new_non1_storage_axis_names, self.kernel.assumptions) diff --git a/loopy/transform/batch.py b/loopy/transform/batch.py index 536a7a826..04c5ea385 100644 --- a/loopy/transform/batch.py +++ b/loopy/transform/batch.py @@ -21,10 +21,10 @@ """ -from loopy.symbolic import (RuleAwareIdentityMapper, SubstitutionRuleMappingContext) -from loopy.kernel.data import ValueArg, ArrayArg import islpy as isl +from loopy.kernel.data import ArrayArg, ValueArg +from loopy.symbolic import RuleAwareIdentityMapper, SubstitutionRuleMappingContext from loopy.translation_unit import for_each_kernel diff --git a/loopy/transform/buffer.py b/loopy/transform/buffer.py index c40eefcfd..c8339f550 100644 --- a/loopy/transform/buffer.py +++ b/loopy/transform/buffer.py @@ -20,22 +20,31 @@ THE SOFTWARE. """ +import logging + from immutables import Map -from loopy.transform.array_buffer_map import (ArrayToBufferMap, NoOpArrayToBufferMap, - AccessDescriptor) -from loopy.symbolic import (get_dependencies, - RuleAwareIdentityMapper, SubstitutionRuleMappingContext, - SubstitutionMapper) + +from pymbolic import var from pymbolic.mapper.substitutor import make_subst_func -from loopy.tools import memoize_on_disk + from loopy.diagnostic import LoopyError from loopy.kernel import LoopKernel -from loopy.translation_unit import TranslationUnit from loopy.kernel.function_interface import CallableKernel, ScalarCallable +from loopy.symbolic import ( + RuleAwareIdentityMapper, + SubstitutionMapper, + SubstitutionRuleMappingContext, + get_dependencies, +) +from loopy.tools import memoize_on_disk +from loopy.transform.array_buffer_map import ( + AccessDescriptor, + ArrayToBufferMap, + NoOpArrayToBufferMap, +) +from loopy.translation_unit import TranslationUnit -from pymbolic import var -import logging logger = logging.getLogger(__name__) @@ -228,7 +237,8 @@ def buffer_array_for_single_kernel(kernel, callables_table, var_name, if not within(kernel, insn, ()): continue - from pymbolic.primitives import Variable, Subscript + from pymbolic.primitives import Subscript, Variable + from loopy.symbolic import LinearSubscript for assignee in insn.assignees: diff --git a/loopy/transform/callable.py b/loopy/transform/callable.py index 33196ca67..1fe40a370 100644 --- a/loopy/transform/callable.py +++ b/loopy/transform/callable.py @@ -20,22 +20,28 @@ THE SOFTWARE. """ -import islpy as isl from immutables import Map +import islpy as isl from pytools import UniqueNameGenerator -from loopy.kernel import LoopKernel from loopy.diagnostic import LoopyError -from loopy.kernel.instruction import (CallInstruction, MultiAssignmentBase, - Assignment, CInstruction, _DataObliviousInstruction) +from loopy.kernel import LoopKernel +from loopy.kernel.function_interface import CallableKernel, ScalarCallable +from loopy.kernel.instruction import ( + Assignment, + CallInstruction, + CInstruction, + MultiAssignmentBase, + _DataObliviousInstruction, +) from loopy.symbolic import ( - RuleAwareIdentityMapper, - RuleAwareSubstitutionMapper, SubstitutionRuleMappingContext) -from loopy.kernel.function_interface import ( - CallableKernel, ScalarCallable) -from loopy.translation_unit import (TranslationUnit, - for_each_kernel) + RuleAwareIdentityMapper, + RuleAwareSubstitutionMapper, + SubstitutionRuleMappingContext, +) +from loopy.translation_unit import TranslationUnit, for_each_kernel + __doc__ = """ .. currentmodule:: loopy @@ -130,8 +136,8 @@ def __init__(self, rule_mapping_context, caller_knl, def map_subscript(self, expr, expn_state): if expr.aggregate.name in self.callee_knl.arg_dict: - from pymbolic.primitives import Subscript, Variable from pymbolic import substitute + from pymbolic.primitives import Subscript, Variable sar = self.callee_arg_to_call_param[expr.aggregate.name] # SubArrayRef @@ -179,6 +185,7 @@ def substitute_into_domain(domain, param_name, expr, allowed_param_dims): :arg allowed_deps: A :class:`list` of :class:`str` that are """ import pymbolic.primitives as prim + from loopy.symbolic import get_dependencies, isl_set_from_expr if param_name not in domain.get_var_dict(): # param_name not in domain => domain will be unchanged @@ -241,6 +248,7 @@ def _inline_call_instruction(caller_knl, callee_knl, call_insn): """ import pymbolic.primitives as prim from pymbolic.mapper.substitutor import make_subst_func + from loopy.kernel.data import ValueArg # {{{ sanity checks @@ -505,7 +513,7 @@ def inline_callable_kernel(translation_unit, function_name): Returns a copy of *translation_unit* with the callable kernel named *function_name* inlined at all call-sites. """ - from loopy.preprocess import infer_arg_descr, filter_reachable_callables + from loopy.preprocess import filter_reachable_callables, infer_arg_descr from loopy.translation_unit import resolve_callables # {{{ must have argument shape information at call sites to inline @@ -533,11 +541,13 @@ def rename_callable(program, old_name, new_name=None, existing_ok=False): :arg new_name: New name for the callable to be renamed :arg existing_ok: An instance of :class:`bool` """ - from loopy.symbolic import ( - RuleAwareSubstitutionMapper, - SubstitutionRuleMappingContext) from pymbolic import var + from loopy.symbolic import ( + RuleAwareSubstitutionMapper, + SubstitutionRuleMappingContext, + ) + assert isinstance(program, TranslationUnit) assert isinstance(old_name, str) diff --git a/loopy/transform/concatenate.py b/loopy/transform/concatenate.py index cbff95df5..8a4bb28fb 100644 --- a/loopy/transform/concatenate.py +++ b/loopy/transform/concatenate.py @@ -25,16 +25,16 @@ .. autofunction:: concatenate_arrays """ -from typing import Sequence, Optional, List +from typing import List, Optional, Sequence +import pymbolic.primitives as prim +from pytools import all_equal + +from loopy.kernel import LoopKernel from loopy.kernel.data import ArrayArg, KernelArgument, TemporaryVariable, auto from loopy.symbolic import SubstitutionRuleMappingContext -from loopy.kernel import LoopKernel from loopy.translation_unit import for_each_kernel -import pymbolic.primitives as prim -from pytools import all_equal - @for_each_kernel def concatenate_arrays( diff --git a/loopy/transform/data.py b/loopy/transform/data.py index 87cb7befb..088d89643 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -20,26 +20,23 @@ THE SOFTWARE. """ -from warnings import warn - from dataclasses import dataclass, replace - -from typing import Optional, Tuple, Dict, cast +from typing import Dict, Optional, Tuple, cast +from warnings import warn import numpy as np from immutables import Map -from islpy import dim_type +from islpy import dim_type from pytools import MovedFunctionDeprecationWrapper from loopy.diagnostic import LoopyError -from loopy.kernel.data import AddressSpace, ImageArg, auto, TemporaryVariable - -from loopy.types import LoopyType -from loopy.typing import ExpressionT -from loopy.translation_unit import TranslationUnit, for_each_kernel from loopy.kernel import LoopKernel +from loopy.kernel.data import AddressSpace, ImageArg, TemporaryVariable, auto from loopy.kernel.function_interface import CallableKernel, ScalarCallable +from loopy.translation_unit import TranslationUnit, for_each_kernel +from loopy.types import LoopyType +from loopy.typing import ExpressionT # {{{ convenience: add_prefetch @@ -106,8 +103,9 @@ def _process_footprint_subscripts(kernel, rule_name, sweep_inames, % str(fsub)) for subst_map in kernel.applied_iname_rewrites: - from loopy.symbolic import SubstitutionMapper from pymbolic.mapper.substitutor import make_subst_func + + from loopy.symbolic import SubstitutionMapper fsub = SubstitutionMapper(make_subst_func(subst_map))(fsub) from loopy.symbolic import get_dependencies @@ -170,7 +168,7 @@ def add_prefetch_for_single_kernel(kernel, callables_table, var_name, from loopy.symbolic import parse parsed_var_name = parse(var_name) - from pymbolic.primitives import Variable, Subscript + from pymbolic.primitives import Subscript, Variable if isinstance(parsed_var_name, Variable): # nothing to see pass @@ -525,9 +523,10 @@ def remove_unused_arguments(kernel): for insn in exp_kernel.instructions: refd_vars.update(insn.dependency_names()) + from itertools import chain + from loopy.kernel.array import ArrayBase, FixedStrideArrayDimTag from loopy.symbolic import get_dependencies - from itertools import chain def tolerant_get_deps(expr): if expr is None or expr is lp.auto: @@ -705,10 +704,12 @@ def rename_argument(kernel, old_name, new_name, existing_ok=False): from pymbolic import var subst_dict = {old_name: var(new_name)} - from loopy.symbolic import ( - RuleAwareSubstitutionMapper, - SubstitutionRuleMappingContext) from pymbolic.mapper.substitutor import make_subst_func + + from loopy.symbolic import ( + RuleAwareSubstitutionMapper, + SubstitutionRuleMappingContext, + ) rule_mapping_context = SubstitutionRuleMappingContext( kernel.substitutions, var_name_gen) smap = RuleAwareSubstitutionMapper(rule_mapping_context, diff --git a/loopy/transform/diff.py b/loopy/transform/diff.py index 6a5a3a710..bb828221f 100644 --- a/loopy/transform/diff.py +++ b/loopy/transform/diff.py @@ -21,17 +21,17 @@ """ import islpy as isl - +import pymbolic.primitives as p from pymbolic.mapper.differentiator import DifferentiationMapper -import pymbolic.primitives as p + var = p.Variable import loopy as lp -from loopy.symbolic import RuleAwareIdentityMapper, SubstitutionRuleMappingContext -from loopy.isl_helpers import make_slab from loopy.diagnostic import LoopyError +from loopy.isl_helpers import make_slab from loopy.kernel import LoopKernel +from loopy.symbolic import RuleAwareIdentityMapper, SubstitutionRuleMappingContext # {{{ diff mapper diff --git a/loopy/transform/fusion.py b/loopy/transform/fusion.py index fe0bddcf3..8e047c036 100644 --- a/loopy/transform/fusion.py +++ b/loopy/transform/fusion.py @@ -21,24 +21,26 @@ """ -import islpy as isl -from islpy import dim_type from immutables import Map -from loopy.diagnostic import LoopyError +import islpy as isl +from islpy import dim_type from pymbolic import var +from loopy.diagnostic import LoopyError from loopy.kernel import LoopKernel -from loopy.translation_unit import TranslationUnit from loopy.kernel.function_interface import CallableKernel +from loopy.translation_unit import TranslationUnit def _apply_renames_in_exprs(kernel, var_renames): - from loopy.symbolic import ( - SubstitutionRuleMappingContext, - RuleAwareSubstitutionMapper) from pymbolic.mapper.substitutor import make_subst_func + from loopy.match import parse_stack_match + from loopy.symbolic import ( + RuleAwareSubstitutionMapper, + SubstitutionRuleMappingContext, + ) srmc = SubstitutionRuleMappingContext( kernel.substitutions, kernel.get_var_name_generator()) @@ -209,8 +211,7 @@ def _fuse_two_kernels(kernela, kernelb): kernelb = _apply_renames_in_exprs(kernelb, b_var_renames) - from pymbolic.imperative.transform import \ - fuse_statement_streams_with_unique_ids + from pymbolic.imperative.transform import fuse_statement_streams_with_unique_ids new_instructions, old_b_id_to_new_b_id = \ fuse_statement_streams_with_unique_ids( kernela.instructions, kernelb.instructions) @@ -386,7 +387,7 @@ def fuse_kernels(kernels, suffixes=None, data_flow=None): kernel.all_variable_names() for kernel in kernels] - from functools import reduce, partial + from functools import partial, reduce from operator import or_ merge_sets = partial(reduce, or_) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index c44720f91..18df3dae4 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -21,21 +21,21 @@ """ +from typing import FrozenSet, Optional + import islpy as isl from islpy import dim_type -from loopy.symbolic import ( - RuleAwareIdentityMapper, RuleAwareSubstitutionMapper, - SubstitutionRuleMappingContext) from loopy.diagnostic import LoopyError -from typing import FrozenSet - -from loopy.translation_unit import (TranslationUnit, - for_each_kernel) from loopy.kernel import LoopKernel from loopy.kernel.function_interface import CallableKernel +from loopy.symbolic import ( + RuleAwareIdentityMapper, + RuleAwareSubstitutionMapper, + SubstitutionRuleMappingContext, +) +from loopy.translation_unit import TranslationUnit, for_each_kernel -from typing import Optional __doc__ = """ .. currentmodule:: loopy @@ -405,9 +405,10 @@ def chunk_iname(kernel, split_iname, num_chunks, chunk_diff = chunk_ceil - chunk_floor chunk_mod = size.mod_val(num_chunks) - from loopy.symbolic import pw_aff_to_expr from pymbolic.primitives import Min + from loopy.symbolic import pw_aff_to_expr + def make_new_loop_index(inner, outer): # These two expressions are equivalent. Benchmarking between the # two was inconclusive, although one is shorter. @@ -571,8 +572,7 @@ def join_inames(kernel, inames, new_iname=None, tag=None, within=None): bounds = kernel.get_iname_bounds(iname, constants_only=True) - from loopy.isl_helpers import ( - static_max_of_pw_aff, static_value_of_pw_aff) + from loopy.isl_helpers import static_max_of_pw_aff, static_value_of_pw_aff from loopy.symbolic import pw_aff_to_expr length = int(pw_aff_to_expr( @@ -1383,6 +1383,7 @@ def parse_equation(eqn): var_name_gen = kernel.get_var_name_generator() from pymbolic.mapper.substitutor import make_subst_func + from loopy.match import parse_stack_match rule_mapping_context = SubstitutionRuleMappingContext( @@ -1627,10 +1628,9 @@ def map_reduction(self, expr, expn_state): self.old_to_new.append((iname, new_iname)) new_inames.append(new_iname) - from loopy.symbolic import SubstitutionMapper from pymbolic.mapper.substitutor import make_subst_func - from loopy.symbolic import Reduction + from loopy.symbolic import Reduction, SubstitutionMapper return Reduction(expr.operation, tuple(new_inames), self.rec( SubstitutionMapper(make_subst_func(subst_dict))( @@ -1962,8 +1962,7 @@ def _apply_identity_for_missing_map_dims(mapping, desired_dims): # dependency maps, which may contain variable names consisting of an iname # suffixed with a single apostrophe.) - from loopy.isl_helpers import ( - add_and_name_dims, add_eq_constraint_from_names) + from loopy.isl_helpers import add_and_name_dims, add_eq_constraint_from_names # {{{ Find any missing vars and add them to the input and output space @@ -2071,8 +2070,9 @@ def map_domain(kernel, transform_map): var_substitutions = {} applied_iname_rewrites = kernel.applied_iname_rewrites - from loopy.symbolic import aff_to_expr from pymbolic import var + + from loopy.symbolic import aff_to_expr for iname in transform_map_in_dims: subst_from_map = aff_to_expr( _find_aff_subst_from_map(iname, transform_map)) @@ -2271,8 +2271,7 @@ def add_inames_for_unused_hw_axes(kernel, within=None): :arg within: An instruction match as understood by :func:`loopy.match.parse_match`. """ - from loopy.kernel.data import (LocalInameTag, GroupInameTag, - AutoFitLocalInameTag) + from loopy.kernel.data import AutoFitLocalInameTag, GroupInameTag, LocalInameTag n_local_axes = max([tag.axis for iname in kernel.inames.values() @@ -2520,6 +2519,7 @@ def rename_iname(kernel, old_iname, new_iname, existing_ok=False, \mathcal{D}_{i_1} \neq \mathcal{D}_{i_2}`. """ from itertools import product + from loopy import tag_inames tags = kernel.inames[old_iname].tags diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py index 9a975b749..629916628 100644 --- a/loopy/transform/instruction.py +++ b/loopy/transform/instruction.py @@ -20,13 +20,14 @@ THE SOFTWARE. """ -from typing import Sequence, Mapping, List, Tuple +from typing import List, Mapping, Sequence, Tuple + from loopy.diagnostic import LoopyError from loopy.kernel import LoopKernel -from loopy.kernel.function_interface import (ScalarCallable, CallableKernel) +from loopy.kernel.function_interface import CallableKernel, ScalarCallable from loopy.kernel.instruction import InstructionBase -from loopy.translation_unit import TranslationUnit, for_each_kernel from loopy.symbolic import RuleAwareIdentityMapper +from loopy.translation_unit import TranslationUnit, for_each_kernel # {{{ find_instructions @@ -489,9 +490,10 @@ def __init__(self, rule_mapping_context, kernel): self.kernel = kernel def map_subscript(self, expr, expn_state): - from loopy.symbolic import simplify_using_aff from pymbolic.primitives import Subscript + from loopy.symbolic import simplify_using_aff + new_indices = tuple(simplify_using_aff(self.kernel, self.rec(idx, expn_state)) for idx in expr.index_tuple) diff --git a/loopy/transform/pack_and_unpack_args.py b/loopy/transform/pack_and_unpack_args.py index 6a39986a3..1c1b48c59 100644 --- a/loopy/transform/pack_and_unpack_args.py +++ b/loopy/transform/pack_and_unpack_args.py @@ -21,12 +21,14 @@ """ from immutables import Map + from loopy.diagnostic import LoopyError -from loopy.kernel.instruction import CallInstruction -from loopy.translation_unit import TranslationUnit from loopy.kernel import LoopKernel from loopy.kernel.function_interface import CallableKernel, ScalarCallable +from loopy.kernel.instruction import CallInstruction from loopy.symbolic import SubArrayRef +from loopy.translation_unit import TranslationUnit + __doc__ = """ .. currentmodule:: loopy @@ -115,10 +117,11 @@ def pack_and_unpack_args_for_call_for_single_kernel(kernel, # {{{ handling ilp tags - from loopy.kernel.data import IlpBaseTag, VectorizeTag import islpy as isl from pymbolic import var + from loopy.kernel.data import IlpBaseTag, VectorizeTag + dim_type = isl.dim_type.set ilp_inames = {iname for iname in insn.within_inames if all(isinstance(tag, (IlpBaseTag, VectorizeTag)) @@ -141,6 +144,7 @@ def pack_and_unpack_args_for_call_for_single_kernel(kernel, # }}} from pymbolic.mapper.substitutor import make_subst_func + from loopy.symbolic import SubstitutionMapper # dict to store the new assignees and parameters, the mapping pattern @@ -177,8 +181,7 @@ def pack_and_unpack_args_for_call_for_single_kernel(kernel, arg = p.subscript.aggregate.name pack_name = vng(arg + "_pack") - from loopy.kernel.data import (TemporaryVariable, - AddressSpace) + from loopy.kernel.data import AddressSpace, TemporaryVariable if arg in kernel.arg_dict: arg_in_caller = kernel.arg_dict[arg] @@ -203,8 +206,8 @@ def pack_and_unpack_args_for_call_for_single_kernel(kernel, # {{{ getting the lhs for packing and rhs for unpacking - from loopy.symbolic import simplify_via_aff from loopy.isl_helpers import make_slab + from loopy.symbolic import simplify_via_aff flatten_index = simplify_via_aff( sum(dim_tag.stride*idx for dim_tag, idx in diff --git a/loopy/transform/padding.py b/loopy/transform/padding.py index 8583e8fda..76deccc44 100644 --- a/loopy/transform/padding.py +++ b/loopy/transform/padding.py @@ -22,13 +22,12 @@ from pytools import MovedFunctionDeprecationWrapper -from loopy.symbolic import RuleAwareIdentityMapper, SubstitutionRuleMappingContext -from loopy.translation_unit import (for_each_kernel, - TranslationUnit) +from loopy.diagnostic import LoopyError from loopy.kernel import LoopKernel from loopy.kernel.function_interface import CallableKernel -from loopy.diagnostic import LoopyError +from loopy.symbolic import RuleAwareIdentityMapper, SubstitutionRuleMappingContext +from loopy.translation_unit import TranslationUnit, for_each_kernel class SubscriptRewriter(RuleAwareIdentityMapper): diff --git a/loopy/transform/parameter.py b/loopy/transform/parameter.py index 5cffdcf23..fb1bc0b71 100644 --- a/loopy/transform/parameter.py +++ b/loopy/transform/parameter.py @@ -21,12 +21,12 @@ """ -from loopy.symbolic import (RuleAwareSubstitutionMapper, - SubstitutionRuleMappingContext) import islpy as isl -from loopy.translation_unit import for_each_kernel from loopy.kernel import LoopKernel +from loopy.symbolic import RuleAwareSubstitutionMapper, SubstitutionRuleMappingContext +from loopy.translation_unit import for_each_kernel + __doc__ = """ @@ -94,7 +94,7 @@ def process_set(s): from pymbolic.mapper.substitutor import make_subst_func subst_func = make_subst_func({name: value}) - from loopy.symbolic import SubstitutionMapper, PartialEvaluationMapper + from loopy.symbolic import PartialEvaluationMapper, SubstitutionMapper subst_map = SubstitutionMapper(subst_func) ev_map = PartialEvaluationMapper() diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 617df3b24..d04fa5b2d 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -23,32 +23,41 @@ from dataclasses import dataclass from typing import FrozenSet, List, Optional, Sequence, Type, Union + from immutables import Map + import islpy as isl +from pymbolic import var +from pymbolic.mapper.substitutor import make_subst_func +from pytools import memoize_on_first_arg from pytools.tag import Tag + +from loopy.diagnostic import LoopyError from loopy.kernel import LoopKernel -from loopy.typing import ExpressionT, auto, not_none +from loopy.kernel.data import AddressSpace +from loopy.kernel.function_interface import CallableKernel, ScalarCallable +from loopy.kernel.instruction import InstructionBase, MultiAssignmentBase +from loopy.kernel.tools import ( + find_most_recent_global_barrier, + kernel_has_global_barriers, +) from loopy.match import ToStackMatchCovertible -from loopy.symbolic import (get_dependencies, - RuleAwareIdentityMapper, RuleAwareSubstitutionMapper, - SubstitutionRuleMappingContext, CombineMapper) -from loopy.diagnostic import LoopyError -from pymbolic.mapper.substitutor import make_subst_func +from loopy.symbolic import ( + CombineMapper, + RuleAwareIdentityMapper, + RuleAwareSubstitutionMapper, + SubstitutionRuleMappingContext, + get_dependencies, +) +from loopy.transform.array_buffer_map import ( + AccessDescriptor, + ArrayToBufferMap, + ArrayToBufferMapBase, + NoOpArrayToBufferMap, +) from loopy.translation_unit import CallablesTable, TranslationUnit -from loopy.kernel.instruction import InstructionBase, MultiAssignmentBase -from loopy.kernel.function_interface import CallableKernel, ScalarCallable -from loopy.kernel.tools import (kernel_has_global_barriers, - find_most_recent_global_barrier) -from loopy.kernel.data import AddressSpace from loopy.types import LoopyType, ToLoopyTypeConvertible, to_loopy_type - -from pymbolic import var -from pytools import memoize_on_first_arg - -from loopy.transform.array_buffer_map import (ArrayToBufferMap, - ArrayToBufferMapBase, - NoOpArrayToBufferMap, - AccessDescriptor) +from loopy.typing import ExpressionT, auto, not_none # {{{ contains_subst_rule_invocation @@ -82,8 +91,9 @@ def _get_calls_in_expr(expr): @memoize_on_first_arg def _get_called_names(insn): assert isinstance(insn, MultiAssignmentBase) - from pymbolic.primitives import Expression from functools import reduce + + from pymbolic.primitives import Expression return ((_get_calls_in_expr(insn.expression) if isinstance(insn.expression, Expression) else frozenset()) @@ -498,8 +508,9 @@ def precompute_for_single_kernel( subst_name: Optional[str] = None subst_tag = None - from pymbolic.primitives import Variable, Call - from loopy.symbolic import parse, TaggedVariable + from pymbolic.primitives import Call, Variable + + from loopy.symbolic import TaggedVariable, parse for use in subst_use: if isinstance(use, str): @@ -551,7 +562,7 @@ def precompute_for_single_kernel( # {{{ process invocations in footprint generators, start access_descriptors if footprint_generators: - from pymbolic.primitives import Variable, Call + from pymbolic.primitives import Call, Variable access_descriptors = [] diff --git a/loopy/transform/privatize.py b/loopy/transform/privatize.py index 3851bbdeb..e9b2b8c53 100644 --- a/loopy/transform/privatize.py +++ b/loopy/transform/privatize.py @@ -21,11 +21,14 @@ """ +import logging + +import pymbolic + from loopy.diagnostic import LoopyError from loopy.translation_unit import for_each_kernel -import pymbolic -import logging + logger = logging.getLogger(__name__) diff --git a/loopy/transform/realize_reduction.py b/loopy/transform/realize_reduction.py index 3aca4634e..5161efba6 100644 --- a/loopy/transform/realize_reduction.py +++ b/loopy/transform/realize_reduction.py @@ -24,31 +24,28 @@ """ +import logging from dataclasses import dataclass, replace -from typing import (Tuple, Dict, Callable, List, Optional, Set, Sequence, - FrozenSet) +from typing import Callable, Dict, FrozenSet, List, Optional, Sequence, Set, Tuple + -import logging logger = logging.getLogger(__name__) -from pytools import memoize_on_first_arg -from pytools.tag import Tag +from immutables import Map + import islpy as isl from pymbolic.primitives import Expression +from pytools import memoize_on_first_arg +from pytools.tag import Tag -from immutables import Map - -from loopy.kernel.data import make_assignment -from loopy.symbolic import ReductionCallbackMapper -from loopy.translation_unit import ConcreteCallablesTable, TranslationUnit -from loopy.kernel.function_interface import CallableKernel -from loopy.kernel.data import TemporaryVariable, AddressSpace -from loopy.kernel.instruction import ( - InstructionBase, MultiAssignmentBase, Assignment) +from loopy.diagnostic import LoopyError, ReductionIsNotTriangularError, warn_with_kernel from loopy.kernel import LoopKernel -from loopy.diagnostic import ( - LoopyError, warn_with_kernel, ReductionIsNotTriangularError) +from loopy.kernel.data import AddressSpace, TemporaryVariable, make_assignment +from loopy.kernel.function_interface import CallableKernel +from loopy.kernel.instruction import Assignment, InstructionBase, MultiAssignmentBase +from loopy.symbolic import ReductionCallbackMapper from loopy.transform.instruction import replace_instruction_ids_in_insn +from loopy.translation_unit import ConcreteCallablesTable, TranslationUnit # {{{ reduction realization context @@ -185,8 +182,12 @@ def _classify_reduction_inames(red_realize_ctx, inames): nonlocal_par = [] from loopy.kernel.data import ( - LocalInameTagBase, UnrolledIlpTag, UnrollTag, - ConcurrentTag, filter_iname_tags_by_type) + ConcurrentTag, + LocalInameTagBase, + UnrolledIlpTag, + UnrollTag, + filter_iname_tags_by_type, + ) for iname in inames: try: @@ -923,6 +924,7 @@ def expand_inner_reduction( red_realize_ctx, id, expr, nresults, depends_on, within_inames, predicates): # FIXME: use _make_temporaries from pymbolic.primitives import Call + from loopy.symbolic import Reduction assert isinstance(expr, (Call, Reduction)) @@ -1338,7 +1340,9 @@ def replace_var_within_expr(kernel, var_name_gen, expr, from_var, to_var): from pymbolic.mapper.substitutor import make_subst_func from loopy.symbolic import ( - SubstitutionRuleMappingContext, RuleAwareSubstitutionMapper) + RuleAwareSubstitutionMapper, + SubstitutionRuleMappingContext, + ) # FIXME: This is broken. SubstitutionRuleMappingContext produces a new # kernel (via finish_kernel) with new subst rules. These get dropped on the @@ -1755,7 +1759,8 @@ def map_reduction(expr, *, red_realize_ctx, nresults): domains=red_realize_ctx.domains) from loopy.type_inference import ( - infer_arg_and_reduction_dtypes_for_reduction_expression) + infer_arg_and_reduction_dtypes_for_reduction_expression, + ) arg_dtypes, reduction_dtypes = ( infer_arg_and_reduction_dtypes_for_reduction_expression( kernel_with_updated_domains, expr, @@ -2068,7 +2073,9 @@ def realize_reduction_for_single_kernel(kernel, callables_table, # extra work compared to not checking. from loopy.kernel.tools import ( - kernel_has_global_barriers, find_most_recent_global_barrier) + find_most_recent_global_barrier, + kernel_has_global_barriers, + ) if kernel_has_global_barriers(orig_kernel): global_barrier = find_most_recent_global_barrier(kernel, insn.id) diff --git a/loopy/transform/save.py b/loopy/transform/save.py index c5931e897..4487f1e65 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -20,24 +20,27 @@ THE SOFTWARE. """ +import logging from functools import cached_property from immutables import Map -from loopy.diagnostic import LoopyError -import loopy as lp +from pytools import Record, memoize_method -from loopy.kernel.data import auto, AddressSpace -from pytools import memoize_method, Record -from loopy.kernel.data import Iname +import loopy as lp +from loopy.diagnostic import LoopyError +from loopy.kernel.data import AddressSpace, Iname, auto from loopy.schedule import ( - EnterLoop, LeaveLoop, RunInstruction, - CallKernel, ReturnFromKernel, Barrier) - + Barrier, + CallKernel, + EnterLoop, + LeaveLoop, + ReturnFromKernel, + RunInstruction, +) from loopy.schedule.tools import get_block_boundaries -import logging logger = logging.getLogger(__name__) @@ -258,8 +261,9 @@ def __init__(self, kernel, callables_table): # representative chosen for saves/reloads self.base_storage_to_representative = {} - from loopy.kernel.data import ValueArg import islpy as isl + + from loopy.kernel.data import ValueArg self.new_subdomain = ( isl.BasicSet.universe( isl.Space.create_from_names( @@ -401,8 +405,12 @@ def _sortedtags(tags): if not tags: continue - from loopy.kernel.data import (GroupInameTag, LocalInameTag, - ConcurrentTag, filter_iname_tags_by_type) + from loopy.kernel.data import ( + ConcurrentTag, + GroupInameTag, + LocalInameTag, + filter_iname_tags_by_type, + ) if filter_iname_tags_by_type(tags, GroupInameTag): tag, = filter_iname_tags_by_type(tags, GroupInameTag, 1) @@ -766,7 +774,9 @@ def save_and_reload_temporaries(program, entrypoint=None): saver = TemporarySaver(knl, program.callables_table) from loopy.schedule.tools import ( - temporaries_read_in_subkernel, temporaries_written_in_subkernel) + temporaries_read_in_subkernel, + temporaries_written_in_subkernel, + ) for sched_idx, sched_item in enumerate(knl.linearization): diff --git a/loopy/transform/subst.py b/loopy/transform/subst.py index 16997255c..b5c7aa7a1 100644 --- a/loopy/transform/subst.py +++ b/loopy/transform/subst.py @@ -20,19 +20,18 @@ THE SOFTWARE. """ -from loopy.symbolic import ( - RuleAwareIdentityMapper, SubstitutionRuleMappingContext) -from loopy.diagnostic import LoopyError -from loopy.transform.iname import remove_any_newly_unused_inames +import logging -from pytools import ImmutableRecord from pymbolic import var +from pytools import ImmutableRecord -from loopy.translation_unit import (for_each_kernel, - TranslationUnit) +from loopy.diagnostic import LoopyError from loopy.kernel.function_interface import CallableKernel, ScalarCallable +from loopy.symbolic import RuleAwareIdentityMapper, SubstitutionRuleMappingContext +from loopy.transform.iname import remove_any_newly_unused_inames +from loopy.translation_unit import TranslationUnit, for_each_kernel + -import logging logger = logging.getLogger(__name__) @@ -122,8 +121,10 @@ def gather_exprs(expr, mapper): # can't nest, don't recurse from loopy.symbolic import ( - CallbackMapper, UncachedWalkMapper as WalkMapper, - IdentityMapper) + CallbackMapper, + IdentityMapper, + UncachedWalkMapper as WalkMapper, + ) dfmapper = CallbackMapper(gather_exprs, WalkMapper()) from loopy.kernel.instruction import MultiAssignmentBase @@ -384,9 +385,10 @@ def get_relevant_definition_insn_id(usage_insn_id): raise LoopyError("no assignments to variable '%s' found" % lhs_name) - from loopy.symbolic import SubstitutionMapper from pymbolic.mapper.substitutor import make_subst_func + from loopy.match import parse_stack_match + from loopy.symbolic import SubstitutionMapper within = parse_stack_match(within) vng = kernel.get_var_name_generator() @@ -416,7 +418,7 @@ def _accesses_lhs(kernel, insn, *args): from loopy.kernel.data import Assignment assert isinstance(def_insn, Assignment) - from pymbolic.primitives import Variable, Subscript + from pymbolic.primitives import Subscript, Variable if isinstance(def_insn.assignee, Subscript): indices = def_insn.assignee.index_tuple elif isinstance(def_insn.assignee, Variable): @@ -520,8 +522,8 @@ def expand_subst(kernel, within=None): logger.debug("%s: expand subst" % kernel.name) - from loopy.symbolic import RuleAwareSubstitutionRuleExpander from loopy.match import parse_stack_match + from loopy.symbolic import RuleAwareSubstitutionRuleExpander rule_mapping_context = SubstitutionRuleMappingContext( kernel.substitutions, kernel.get_var_name_generator()) submap = RuleAwareSubstitutionRuleExpander( diff --git a/loopy/translation_unit.py b/loopy/translation_unit.py index db3a9788c..c0d1b0b05 100644 --- a/loopy/translation_unit.py +++ b/loopy/translation_unit.py @@ -1,5 +1,6 @@ from __future__ import annotations + __copyright__ = "Copyright (C) 2018 Kaushik Kulkarni" __license__ = """ @@ -24,24 +25,30 @@ import collections from collections.abc import Set as abc_Set -from dataclasses import field, dataclass, replace -from typing import FrozenSet, Optional, TYPE_CHECKING, Mapping, Callable, Union, Any +from dataclasses import dataclass, field, replace +from functools import wraps +from typing import TYPE_CHECKING, Any, Callable, FrozenSet, Mapping, Optional, Union from warnings import warn -from pymbolic.primitives import Variable -from functools import wraps +from immutables import Map -from loopy.symbolic import (RuleAwareIdentityMapper, ResolvedFunction, - SubstitutionRuleMappingContext) -from loopy.kernel.function_interface import ( - CallableKernel, InKernelCallable, ScalarCallable) -from loopy.diagnostic import LoopyError, DirectCallUncachedWarning -from loopy.library.reduction import ReductionOpFunction +from pymbolic.primitives import Call, Variable +from loopy.diagnostic import DirectCallUncachedWarning, LoopyError from loopy.kernel import LoopKernel +from loopy.kernel.function_interface import ( + CallableKernel, + InKernelCallable, + ScalarCallable, +) +from loopy.library.reduction import ReductionOpFunction +from loopy.symbolic import ( + ResolvedFunction, + RuleAwareIdentityMapper, + SubstitutionRuleMappingContext, +) from loopy.target import TargetBase -from pymbolic.primitives import Call -from immutables import Map + if TYPE_CHECKING: from loopy.target.execution import ExecutorBase @@ -812,9 +819,9 @@ def resolve_callables(t_unit: TranslationUnit) -> TranslationUnit: Returns a :class:`TranslationUnit` with known :class:`pymbolic.primitives.Call` expression nodes converted to :class:`loopy.symbolic.ResolvedFunction`. """ - from loopy.library.function import get_loopy_callables from loopy.check import validate_kernel_call_sites from loopy.kernel import KernelState + from loopy.library.function import get_loopy_callables if t_unit.state >= KernelState.CALLS_RESOLVED: # program's callables have been resolved diff --git a/loopy/type_inference.py b/loopy/type_inference.py index 53ee91a75..b997cee25 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -20,25 +20,33 @@ THE SOFTWARE. """ -from loopy.symbolic import CombineMapper +import logging + import numpy as np -from loopy.tools import is_integer -from loopy.types import NumpyType +from pymbolic.primitives import Lookup, Subscript, Variable from loopy.diagnostic import ( - LoopyError, - TypeInferenceFailure, DependencyTypeInferenceFailure) + DependencyTypeInferenceFailure, + LoopyError, + TypeInferenceFailure, +) from loopy.kernel.instruction import _DataObliviousInstruction - from loopy.symbolic import ( - LinearSubscript, parse_tagged_name, RuleAwareIdentityMapper, - SubstitutionRuleExpander, ResolvedFunction, - SubstitutionRuleMappingContext, SubArrayRef) -from pymbolic.primitives import Variable, Subscript, Lookup + CombineMapper, + LinearSubscript, + ResolvedFunction, + RuleAwareIdentityMapper, + SubArrayRef, + SubstitutionRuleExpander, + SubstitutionRuleMappingContext, + parse_tagged_name, +) +from loopy.tools import is_integer from loopy.translation_unit import CallablesInferenceContext, make_clbl_inf_ctx +from loopy.types import NumpyType + -import logging logger = logging.getLogger(__name__) @@ -482,8 +490,8 @@ def map_variable(self, expr): raise TypeInferenceFailure("name not known in type inference: %s" % expr.name) - from loopy.kernel.data import TemporaryVariable, KernelArgument import loopy as lp + from loopy.kernel.data import KernelArgument, TemporaryVariable if isinstance(obj, (KernelArgument, TemporaryVariable)): assert obj.dtype is not lp.auto result = [obj.dtype] @@ -550,9 +558,10 @@ def map_reduction(self, expr, return_tuple=False): :arg return_tuple: If *True*, treat the reduction as having tuple type. Otherwise, if *False*, the reduction must have scalar type. """ - from loopy.symbolic import Reduction from pymbolic.primitives import Call + from loopy.symbolic import Reduction + if not return_tuple and expr.is_tuple_typed: raise LoopyError("reductions with more or fewer than one " "return value may only be used in direct " @@ -665,8 +674,8 @@ def map_variable(self, expr): raise TypeInferenceFailure("name not known in type inference: %s" % expr.name) - from loopy.kernel.data import TemporaryVariable, KernelArgument import loopy as lp + from loopy.kernel.data import KernelArgument, TemporaryVariable if isinstance(obj, (KernelArgument, TemporaryVariable)): assert obj.dtype is not lp.auto result = [obj.dtype] @@ -843,7 +852,7 @@ def infer_unknown_types_for_a_single_kernel(kernel, clbl_inf_ctx): # {{{ work on type inference queue - from loopy.kernel.data import TemporaryVariable, KernelArgument + from loopy.kernel.data import KernelArgument, TemporaryVariable old_calls_to_new_calls = {} touched_variable_names = set() diff --git a/loopy/types.py b/loopy/types.py index acc941037..143715a39 100644 --- a/loopy/types.py +++ b/loopy/types.py @@ -1,5 +1,6 @@ from __future__ import annotations + __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" __license__ = """ @@ -23,10 +24,12 @@ """ from typing import Any, Mapping, Type, Union + import numpy as np -from loopy.typing import auto from loopy.diagnostic import LoopyError +from loopy.typing import auto + __doc__ = """ .. currentmodule:: loopy.types diff --git a/loopy/typing.py b/loopy/typing.py index 5a20d2e0b..948616578 100644 --- a/loopy/typing.py +++ b/loopy/typing.py @@ -21,12 +21,13 @@ """ -from typing import Union, Tuple, TypeVar, Optional +from typing import Optional, Tuple, TypeVar, Union import numpy as np from pymbolic.primitives import Expression + IntegralT = Union[int, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64] FloatT = Union[float, complex, np.float32, np.float64, np.complex64, diff --git a/loopy/version.py b/loopy/version.py index f66c24dee..09d8442a2 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -24,6 +24,8 @@ # {{{ find install- or run-time git revision import os + + if os.environ.get("AKPYTHON_EXEC_IMPORT_UNAVAILABLE") is not None: # We're just being exec'd by setup.py. We can't import anything. _git_rev = None diff --git a/proto-tests/test_fem_assembly.py b/proto-tests/test_fem_assembly.py index 0f4e95b47..9103c42cc 100644 --- a/proto-tests/test_fem_assembly.py +++ b/proto-tests/test_fem_assembly.py @@ -1,9 +1,11 @@ import numpy as np + import pyopencl as cl # noqa -import loopy as lp +from pyopencl.tools import ( + pytest_generate_tests_for_pyopencl as pytest_generate_tests, # noqa +) -from pyopencl.tools import pytest_generate_tests_for_pyopencl \ - as pytest_generate_tests # noqa +import loopy as lp def test_laplacian_stiffness(ctx_factory): diff --git a/proto-tests/test_sem.py b/proto-tests/test_sem.py index 5155a0011..d87126cfb 100644 --- a/proto-tests/test_sem.py +++ b/proto-tests/test_sem.py @@ -1,9 +1,12 @@ import numpy as np + import pyopencl as cl # noqa +from pyopencl.tools import ( + pytest_generate_tests_for_pyopencl as pytest_generate_tests, # noqa +) + import loopy as lp -from pyopencl.tools import pytest_generate_tests_for_pyopencl \ - as pytest_generate_tests # noqa 1/0 # not ready diff --git a/proto-tests/test_sem_tim.py b/proto-tests/test_sem_tim.py index 4bf448fc1..2949b39d3 100644 --- a/proto-tests/test_sem_tim.py +++ b/proto-tests/test_sem_tim.py @@ -1,9 +1,12 @@ import numpy as np + import pyopencl as cl # noqa +from pyopencl.tools import ( + pytest_generate_tests_for_pyopencl as pytest_generate_tests, # noqa +) + import loopy as lp -from pyopencl.tools import pytest_generate_tests_for_pyopencl \ - as pytest_generate_tests # noqa 1/0 # inspect me diff --git a/proto-tests/test_tim.py b/proto-tests/test_tim.py index 04b11e317..7ee30313c 100644 --- a/proto-tests/test_tim.py +++ b/proto-tests/test_tim.py @@ -1,9 +1,12 @@ import numpy as np + import pyopencl as cl # noqa +from pyopencl.tools import ( + pytest_generate_tests_for_pyopencl as pytest_generate_tests, # noqa +) + import loopy as lp -from pyopencl.tools import pytest_generate_tests_for_pyopencl \ - as pytest_generate_tests # noqa 1/0 # see sem_reagan? diff --git a/pyproject.toml b/pyproject.toml index fc8a67b4f..30cbab9ff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,8 +9,7 @@ extend-select = [ "E", # pycodestyle "F", # pyflakes - # TODO - # "I", # flake8-isort + "I", # flake8-isort "N", # pep8-naming "NPY", # numpy @@ -51,9 +50,13 @@ combine-as-imports = true known-first-party = [ "pytools", "pymbolic", + "islpy", + "pyopencl", + "cgen", + "genpy", ] known-local-folder = [ - "modepy", + "loopy", ] lines-after-imports = 2 diff --git a/setup.py b/setup.py index b0853937f..0cf58f83a 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,9 @@ #!/usr/bin/env python import os -from setuptools import setup, find_packages + +from setuptools import find_packages, setup + ver_dic = {} version_file = open("loopy/version.py") @@ -21,13 +23,13 @@ def find_git_revision(tree_root): # Keep this routine self-contained so that it can be copy-pasted into # setup.py. - from os.path import join, exists, abspath + from os.path import abspath, exists, join tree_root = abspath(tree_root) if not exists(join(tree_root, ".git")): return None - from subprocess import Popen, PIPE, STDOUT + from subprocess import PIPE, STDOUT, Popen p = Popen(["git", "rev-parse", "HEAD"], shell=False, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True, cwd=tree_root) diff --git a/test/library_for_test.py b/test/library_for_test.py index fe67ace2f..47bca082a 100644 --- a/test/library_for_test.py +++ b/test/library_for_test.py @@ -1,6 +1,7 @@ -import loopy as lp import numpy as np +import loopy as lp + class NoRetFunction(lp.ScalarCallable): def with_types(self, arg_id_to_dtype, callables): diff --git a/test/test_apps.py b/test/test_apps.py index cbe3e76e8..9952f9fdc 100644 --- a/test/test_apps.py +++ b/test/test_apps.py @@ -20,15 +20,19 @@ THE SOFTWARE. """ +import logging import sys + import numpy as np -import loopy as lp +import pytest + import pyopencl as cl import pyopencl.clmath # noqa import pyopencl.clrandom # noqa -import pytest -import logging +import loopy as lp + + logger = logging.getLogger(__name__) try: @@ -38,11 +42,11 @@ else: faulthandler.enable() -from pyopencl.tools import pytest_generate_tests_for_pyopencl \ - as pytest_generate_tests +from pyopencl.tools import pytest_generate_tests_for_pyopencl as pytest_generate_tests from loopy.diagnostic import LoopyError + __all__ = [ "pytest_generate_tests", "cl" # "cl.create_some_context" diff --git a/test/test_c_execution.py b/test/test_c_execution.py index 11e336e0d..e703d9415 100644 --- a/test/test_c_execution.py +++ b/test/test_c_execution.py @@ -20,13 +20,16 @@ THE SOFTWARE. """ -import numpy as np -import loopy as lp +import logging import sys + +import numpy as np import pytest + +import loopy as lp from loopy import CACHING_ENABLED -import logging + logger = logging.getLogger(__name__) try: @@ -295,10 +298,11 @@ def test_c_execution_with_global_temporaries(): def test_missing_compilers(): - from loopy.target.c import ExecutableCTarget, CTarget - from loopy.target.c.c_execution import CCompiler from codepy.toolchain import GCCToolchain + from loopy.target.c import CTarget, ExecutableCTarget + from loopy.target.c.c_execution import CCompiler + def __test(evalfunc, target, **targetargs): n = 10 diff --git a/test/test_callables.py b/test/test_callables.py index 177c12622..d58247a75 100644 --- a/test/test_callables.py +++ b/test/test_callables.py @@ -20,19 +20,19 @@ THE SOFTWARE. """ -import numpy as np -import pyopencl as cl -import pyopencl.clrandom # noqa: F401 -import loopy as lp -import pytest import sys -from pytools import ImmutableRecord +import numpy as np +import pytest +import pyopencl as cl +import pyopencl.clrandom # noqa: F401 from pyopencl.tools import ( # noqa: F401 - pytest_generate_tests_for_pyopencl - as pytest_generate_tests) + pytest_generate_tests_for_pyopencl as pytest_generate_tests, +) +from pytools import ImmutableRecord +import loopy as lp from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 # noqa: F401 @@ -657,9 +657,9 @@ def test_inlining_with_callee_domain_param(ctx_factory): def test_double_resolving(): - from loopy.translation_unit import resolve_callables from loopy.kernel import KernelState from loopy.symbolic import ResolvedFunction + from loopy.translation_unit import resolve_callables knl = lp.make_kernel( "{[i]: 0<=i<10}", @@ -737,8 +737,7 @@ def test_passing_scalar_as_indexed_subcript_in_clbl_knl(ctx_factory, inline): def test_symbol_mangler_in_call(ctx_factory): - from library_for_test import (symbol_x, - preamble_for_x) + from library_for_test import preamble_for_x, symbol_x ctx = cl.create_some_context() cq = cl.CommandQueue(ctx) @@ -760,6 +759,7 @@ def test_symbol_mangler_in_call(ctx_factory): @pytest.mark.parametrize("which", ["max", "min"]) def test_int_max_min_c_target(ctx_factory, which): from numpy.random import default_rng + from pymbolic import parse rng = default_rng() @@ -935,6 +935,7 @@ def test_non1_step_slices(ctx_factory, start, inline): def test_check_bounds_with_caller_assumptions(ctx_factory): import islpy as isl + from loopy.diagnostic import LoopyIndexError arange = lp.make_function( diff --git a/test/test_dg.py b/test/test_dg.py index 125e5b9a6..bc134d9cb 100644 --- a/test/test_dg.py +++ b/test/test_dg.py @@ -21,17 +21,16 @@ """ +import logging # noqa + import numpy as np import pyopencl as cl import pyopencl.array # noqa -import loopy as lp - -import logging # noqa - from pyopencl.tools import ( # noqa - pytest_generate_tests_for_pyopencl as pytest_generate_tests) - + pytest_generate_tests_for_pyopencl as pytest_generate_tests, +) +import loopy as lp from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 # noqa diff --git a/test/test_diff.py b/test/test_diff.py index f06618b9f..fb60a527d 100644 --- a/test/test_diff.py +++ b/test/test_diff.py @@ -20,14 +20,18 @@ THE SOFTWARE. """ +import logging import sys + import numpy as np # noqa import numpy.linalg as la -import loopy as lp + import pyopencl as cl import pyopencl.clrandom # noqa -import logging +import loopy as lp + + logger = logging.getLogger(__name__) try: @@ -37,8 +41,8 @@ else: faulthandler.enable() -from pyopencl.tools import pytest_generate_tests_for_pyopencl \ - as pytest_generate_tests +from pyopencl.tools import pytest_generate_tests_for_pyopencl as pytest_generate_tests + __all__ = [ "pytest_generate_tests", diff --git a/test/test_domain.py b/test/test_domain.py index 9f0bb4158..155cdb109 100644 --- a/test/test_domain.py +++ b/test/test_domain.py @@ -20,15 +20,19 @@ THE SOFTWARE. """ +import logging import sys + import numpy as np -import loopy as lp +import pytest # noqa + import pyopencl as cl import pyopencl.clmath # noqa import pyopencl.clrandom # noqa -import pytest # noqa -import logging +import loopy as lp + + logger = logging.getLogger(__name__) try: @@ -38,8 +42,8 @@ else: faulthandler.enable() -from pyopencl.tools import pytest_generate_tests_for_pyopencl \ - as pytest_generate_tests +from pyopencl.tools import pytest_generate_tests_for_pyopencl as pytest_generate_tests + __all__ = [ "pytest_generate_tests", diff --git a/test/test_einsum.py b/test/test_einsum.py index c3ed4ec98..f2964a121 100644 --- a/test/test_einsum.py +++ b/test/test_einsum.py @@ -22,14 +22,17 @@ import sys -import pytest -import loopy as lp + import numpy as np +import pytest + import pyopencl as cl import pyopencl.array +from pyopencl.tools import ( + pytest_generate_tests_for_pyopencl as pytest_generate_tests, # noqa +) -from pyopencl.tools import \ - pytest_generate_tests_for_pyopencl as pytest_generate_tests # noqa +import loopy as lp def test_make_einsum_error_handling(): diff --git a/test/test_expression.py b/test/test_expression.py index 801d37bd4..6c1d97b35 100644 --- a/test/test_expression.py +++ b/test/test_expression.py @@ -20,18 +20,20 @@ THE SOFTWARE. """ +import logging import sys + import numpy as np -import loopy as lp +import pytest + import pyopencl as cl import pyopencl.clmath # noqa import pyopencl.clrandom # noqa -import pytest - from pymbolic.mapper.evaluator import EvaluationMapper +import loopy as lp + -import logging logger = logging.getLogger(__name__) try: @@ -41,8 +43,8 @@ else: faulthandler.enable() -from pyopencl.tools import pytest_generate_tests_for_pyopencl \ - as pytest_generate_tests +from pyopencl.tools import pytest_generate_tests_for_pyopencl as pytest_generate_tests + __all__ = [ "pytest_generate_tests", @@ -97,6 +99,7 @@ def make_random_fp_value(use_complex): def make_random_fp_expression(prefix, var_values, size, use_complex): from random import randrange + import pymbolic.primitives as p v = randrange(1500) size[0] += 1 @@ -146,6 +149,7 @@ def make_random_int_value(nonneg): def make_random_int_expression(prefix, var_values, size, nonneg): from random import randrange + import pymbolic.primitives as p if size[0] < 10: v = randrange(800) @@ -438,9 +442,10 @@ def test_indexof_vec(ctx_factory): def test_is_expression_equal(): - from loopy.symbolic import is_expression_equal from pymbolic import var + from loopy.symbolic import is_expression_equal + x = var("x") y = var("y") @@ -606,6 +611,7 @@ def test_bool_type_context(ctx_factory): def test_np_bool_handling(ctx_factory): import pymbolic.primitives as p + from loopy.symbolic import parse ctx = ctx_factory() queue = cl.CommandQueue(ctx) diff --git a/test/test_fortran.py b/test/test_fortran.py index 55f1dab10..0a6102d52 100644 --- a/test/test_fortran.py +++ b/test/test_fortran.py @@ -21,18 +21,22 @@ """ +import logging import sys + import numpy as np -import loopy as lp +import pytest + import pyopencl as cl import pyopencl.clrandom # noqa -import pytest -import logging +import loopy as lp + + logger = logging.getLogger(__name__) -from pyopencl.tools import pytest_generate_tests_for_pyopencl \ - as pytest_generate_tests +from pyopencl.tools import pytest_generate_tests_for_pyopencl as pytest_generate_tests + __all__ = [ "pytest_generate_tests", diff --git a/test/test_fusion.py b/test/test_fusion.py index a811b3b3b..66daf9725 100644 --- a/test/test_fusion.py +++ b/test/test_fusion.py @@ -21,12 +21,14 @@ """ -import loopy as lp import numpy as np + import pyopencl as cl +from pyopencl.tools import ( + pytest_generate_tests_for_pyopencl as pytest_generate_tests, # noqa +) -from pyopencl.tools import \ - pytest_generate_tests_for_pyopencl as pytest_generate_tests # noqa +import loopy as lp def test_two_kernel_fusion(ctx_factory): diff --git a/test/test_isl.py b/test/test_isl.py index 150006c39..fc1312f7c 100644 --- a/test/test_isl.py +++ b/test/test_isl.py @@ -97,9 +97,10 @@ def test_simplify_via_aff_reproducibility(): def test_qpolynomrial_to_expr(): - from loopy.symbolic import qpolynomial_to_expr import pymbolic.primitives as p + from loopy.symbolic import qpolynomial_to_expr + (_, qpoly), = isl.PwQPolynomial( "[i,j,k] -> { ((1/3)*i + (1/2)*j + (1/4)*k) : (4i+6j+3k) mod 12 = 0}" ).get_pieces() diff --git a/test/test_linalg.py b/test/test_linalg.py index ce43f19da..99273ae9f 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -21,19 +21,20 @@ """ -import pytest +import logging import sys + import numpy as np +import pytest + import pyopencl as cl import pyopencl.array as cl_array # noqa: F401 import pyopencl.cltypes as cltypes -import loopy as lp - -import logging - from pyopencl.tools import ( # noqa - pytest_generate_tests_for_pyopencl - as pytest_generate_tests) + pytest_generate_tests_for_pyopencl as pytest_generate_tests, +) + +import loopy as lp DEBUG_PREAMBLE = r""" diff --git a/test/test_loopy.py b/test/test_loopy.py index 4f8eda0da..043a4625b 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -20,16 +20,20 @@ THE SOFTWARE. """ +import logging import sys + import numpy as np -import loopy as lp +import pytest + import pyopencl as cl import pyopencl.array # noqa import pyopencl.clmath # noqa import pyopencl.clrandom # noqa -import pytest -import logging +import loopy as lp + + logger = logging.getLogger(__name__) try: @@ -39,8 +43,8 @@ else: faulthandler.enable() -from pyopencl.tools import pytest_generate_tests_for_pyopencl \ - as pytest_generate_tests +from pyopencl.tools import pytest_generate_tests_for_pyopencl as pytest_generate_tests + __all__ = [ "pytest_generate_tests", @@ -266,8 +270,9 @@ def test_ilp_write_race_detection_global(): knl = lp.preprocess_kernel(knl) with lp.CacheMode(False): - from loopy.diagnostic import WriteRaceConditionWarning from warnings import catch_warnings + + from loopy.diagnostic import WriteRaceConditionWarning from loopy.schedule import linearize with catch_warnings(record=True) as warn_list: linearize(knl) @@ -2190,8 +2195,14 @@ def test_nosync_option_parsing(): def barrier_between(knl, id1, id2, ignore_barriers_in_levels=()): - from loopy.schedule import (RunInstruction, Barrier, EnterLoop, LeaveLoop, - CallKernel, ReturnFromKernel) + from loopy.schedule import ( + Barrier, + CallKernel, + EnterLoop, + LeaveLoop, + ReturnFromKernel, + RunInstruction, + ) watch_for_barrier = False seen_barrier = False loop_level = 0 @@ -2302,8 +2313,8 @@ def test_barrier_in_overridden_get_grid_size_expanded_kernel(): def test_multi_argument_reduction_type_inference(): - from loopy.type_inference import TypeReader from loopy.library.reduction import SegmentedSumReductionOperation + from loopy.type_inference import TypeReader from loopy.types import to_loopy_type op = SegmentedSumReductionOperation() @@ -2330,7 +2341,7 @@ def test_multi_argument_reduction_type_inference(): def test_multi_argument_reduction_parsing(): - from loopy.symbolic import parse, Reduction + from loopy.symbolic import Reduction, parse assert isinstance( parse("reduce(argmax, i, reduce(argmax, j, i, j))").expr, @@ -3299,9 +3310,10 @@ def test_redn_in_predicate(ctx_factory): def test_obj_tagged_is_persistent_hashable(): - from loopy.tools import LoopyKeyBuilder - from pytools.tag import tag_dataclass, Tag + from pytools.tag import Tag, tag_dataclass + from loopy.match import ObjTagged + from loopy.tools import LoopyKeyBuilder lkb = LoopyKeyBuilder() diff --git a/test/test_misc.py b/test/test_misc.py index c8fc7e5f6..cee8431c3 100644 --- a/test/test_misc.py +++ b/test/test_misc.py @@ -20,14 +20,15 @@ THE SOFTWARE. """ -from pickle import loads, dumps +import logging +import sys +from pickle import dumps, loads import pytest + import loopy as lp -import sys -import logging logger = logging.getLogger(__name__) diff --git a/test/test_nbody.py b/test/test_nbody.py index b9e2e6ff8..e258d801e 100644 --- a/test/test_nbody.py +++ b/test/test_nbody.py @@ -21,14 +21,18 @@ """ +import logging + import numpy as np -import loopy as lp -import pyopencl as cl # noqa +import pyopencl as cl # noqa from pyopencl.tools import ( # noqa - pytest_generate_tests_for_pyopencl as pytest_generate_tests) + pytest_generate_tests_for_pyopencl as pytest_generate_tests, +) + +import loopy as lp + -import logging logger = logging.getLogger(__name__) diff --git a/test/test_numa_diff.py b/test/test_numa_diff.py index b1f20b0a4..99b0cd9d5 100644 --- a/test/test_numa_diff.py +++ b/test/test_numa_diff.py @@ -23,17 +23,21 @@ THE SOFTWARE. """ +import logging +import os +import sys + import pytest -import loopy as lp + import pyopencl as cl -import sys -import os -import logging +import loopy as lp + + logger = logging.getLogger(__name__) -from pyopencl.tools import pytest_generate_tests_for_pyopencl \ - as pytest_generate_tests +from pyopencl.tools import pytest_generate_tests_for_pyopencl as pytest_generate_tests + __all__ = [ "pytest_generate_tests", @@ -73,8 +77,10 @@ def test_gnuma_horiz_kernel(ctx_factory, ilp_multiple, Nq, opt_level): # noqa hsv = lp.add_nosync(hsv, "any", "writes:rhsQ", "writes:rhsQ", force=True) from gnuma_loopy_transforms import ( - fix_euler_parameters, - set_q_storage_format, set_D_storage_format) + fix_euler_parameters, + set_D_storage_format, + set_q_storage_format, + ) hsv = lp.fix_parameters(hsv, Nq=Nq) hsv = lp.prioritize_loops(hsv, "e,k,j,i") diff --git a/test/test_reduction.py b/test/test_reduction.py index b56940a72..465c5ec47 100644 --- a/test/test_reduction.py +++ b/test/test_reduction.py @@ -20,16 +20,20 @@ THE SOFTWARE. """ +import logging import sys + import numpy as np -import loopy as lp +import pytest + import pyopencl as cl import pyopencl.clmath # noqa import pyopencl.clrandom # noqa import pyopencl.version -import pytest -import logging +import loopy as lp + + logger = logging.getLogger(__name__) try: @@ -39,8 +43,8 @@ else: faulthandler.enable() -from pyopencl.tools import pytest_generate_tests_for_pyopencl \ - as pytest_generate_tests +from pyopencl.tools import pytest_generate_tests_for_pyopencl as pytest_generate_tests + __all__ = [ "pytest_generate_tests", diff --git a/test/test_scan.py b/test/test_scan.py index f5aa8a7c2..349d23411 100644 --- a/test/test_scan.py +++ b/test/test_scan.py @@ -23,15 +23,19 @@ THE SOFTWARE. """ +import logging import sys + import numpy as np -import loopy as lp +import pytest + import pyopencl as cl import pyopencl.clmath # noqa import pyopencl.clrandom # noqa -import pytest -import logging +import loopy as lp + + logger = logging.getLogger(__name__) try: @@ -41,8 +45,8 @@ else: faulthandler.enable() -from pyopencl.tools import pytest_generate_tests_for_pyopencl \ - as pytest_generate_tests +from pyopencl.tools import pytest_generate_tests_for_pyopencl as pytest_generate_tests + __all__ = [ "pytest_generate_tests", diff --git a/test/test_sem_reagan.py b/test/test_sem_reagan.py index 4e89b0a92..5ca4a08f3 100644 --- a/test/test_sem_reagan.py +++ b/test/test_sem_reagan.py @@ -22,13 +22,13 @@ import numpy as np -import pyopencl as cl # noqa -import loopy as lp +import pyopencl as cl # noqa from pyopencl.tools import ( # noqa - pytest_generate_tests_for_pyopencl as pytest_generate_tests) - + pytest_generate_tests_for_pyopencl as pytest_generate_tests, +) +import loopy as lp from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 # noqa diff --git a/test/test_split_iname_slabs.py b/test/test_split_iname_slabs.py index 47f311ab5..a171d526d 100644 --- a/test/test_split_iname_slabs.py +++ b/test/test_split_iname_slabs.py @@ -19,14 +19,17 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ -import loopy as lp import numpy as np +import pytest + import pyopencl as cl # noqa import pyopencl.array as clarray -import pytest +from pyopencl.tools import ( + pytest_generate_tests_for_pyopencl as pytest_generate_tests, # noqa +) + +import loopy as lp from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 # noqa -from pyopencl.tools import \ - pytest_generate_tests_for_pyopencl as pytest_generate_tests # noqa @pytest.fixture diff --git a/test/test_statistics.py b/test/test_statistics.py index 4b9cb8f63..6665f6c76 100644 --- a/test/test_statistics.py +++ b/test/test_statistics.py @@ -21,18 +21,18 @@ """ import sys -from pyopencl.tools import ( # noqa - pytest_generate_tests_for_pyopencl - as pytest_generate_tests) -import loopy as lp -from loopy.types import to_loopy_type + import numpy as np -from pytools import div_ceil -from loopy.statistics import CountGranularity as CG from pymbolic.primitives import Variable +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests, +) +from pytools import div_ceil - +import loopy as lp +from loopy.statistics import CountGranularity as CG +from loopy.types import to_loopy_type from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 # noqa @@ -1282,7 +1282,7 @@ def test_gather_access_footprint(): name="matmul", assumptions="n >= 1") knl = lp.add_and_infer_dtypes(knl, dict(a=np.float32, b=np.float32)) - from loopy.statistics import gather_access_footprints, count + from loopy.statistics import count, gather_access_footprints fp = gather_access_footprints(knl) for key, footprint in fp.items(): @@ -1296,7 +1296,7 @@ def test_gather_access_footprint_2(): name="matmul", assumptions="n >= 1") knl = lp.add_and_infer_dtypes(knl, dict(a=np.float32)) - from loopy.statistics import gather_access_footprints, count + from loopy.statistics import count, gather_access_footprints fp = gather_access_footprints(knl) params = {"n": 200} @@ -1532,10 +1532,11 @@ def test_no_loop_ops(): def test_within_stats(): - import loopy as lp import numpy as np import pytest + import loopy as lp + knl = lp.make_kernel( "{[i]: 0<=i<10}", """ diff --git a/test/test_target.py b/test/test_target.py index 17fca5333..a15f9ece5 100644 --- a/test/test_target.py +++ b/test/test_target.py @@ -20,22 +20,25 @@ THE SOFTWARE. """ -from loopy.diagnostic import LoopyError +import logging import sys + import numpy as np -import loopy as lp +import pytest + +import pymbolic.primitives as prim import pyopencl as cl import pyopencl.clmath import pyopencl.clrandom import pyopencl.tools import pyopencl.version -import pytest -import pymbolic.primitives as prim +import loopy as lp +from loopy.diagnostic import LoopyError from loopy.target.c import CTarget from loopy.target.opencl import OpenCLTarget -import logging + logger = logging.getLogger(__name__) try: @@ -45,8 +48,8 @@ else: faulthandler.enable() -from pyopencl.tools import pytest_generate_tests_for_pyopencl \ - as pytest_generate_tests +from pyopencl.tools import pytest_generate_tests_for_pyopencl as pytest_generate_tests + __all__ = [ "pytest_generate_tests", @@ -388,8 +391,9 @@ def test_opencl_support_for_bool(ctx_factory): @pytest.mark.parametrize("target", [lp.PyOpenCLTarget, lp.ExecutableCTarget]) def test_nan_support(ctx_factory, target): - from loopy.symbolic import parse from pymbolic.primitives import NaN, Variable + + from loopy.symbolic import parse ctx = ctx_factory() queue = cl.CommandQueue(ctx) @@ -485,8 +489,9 @@ def test_scalar_array_take_offset(ctx_factory): @pytest.mark.parametrize("target", [lp.PyOpenCLTarget, lp.ExecutableCTarget]) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) def test_inf_support(ctx_factory, target, dtype): - from loopy.symbolic import parse import math + + from loopy.symbolic import parse # See: https://github.com/inducer/loopy/issues/443 for some laughs ctx = ctx_factory() queue = cl.CommandQueue(ctx) @@ -609,9 +614,10 @@ def __call__(self, size): @pytest.mark.parametrize("dtype", ["float32", "float64"]) def test_glibc_bessel_functions(dtype): pytest.importorskip("scipy.special") + from numpy.random import default_rng from scipy.special import jn, yn # pylint: disable=no-name-in-module + from loopy.target.c.c_execution import CCompiler - from numpy.random import default_rng rng = default_rng(0) compiler = CCompiler(cflags=["-O3"]) diff --git a/test/test_transform.py b/test/test_transform.py index 5ca01dea0..f218c1266 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -20,17 +20,20 @@ THE SOFTWARE. """ +import logging import sys + import numpy as np -import loopy as lp -from pytools.tag import Tag +import pytest import pyopencl as cl import pyopencl.clmath # noqa import pyopencl.clrandom # noqa -import pytest +from pytools.tag import Tag + +import loopy as lp + -import logging logger = logging.getLogger(__name__) try: @@ -40,8 +43,8 @@ else: faulthandler.enable() -from pyopencl.tools import pytest_generate_tests_for_pyopencl \ - as pytest_generate_tests +from pyopencl.tools import pytest_generate_tests_for_pyopencl as pytest_generate_tests + __all__ = [ "pytest_generate_tests", @@ -622,8 +625,7 @@ def _ensure_dim_names_match_and_align(obj_map, tgt_map): # sense to move this function to a location for more general-purpose # machinery. In the other branches, this function's name excludes the # leading underscore.) - from islpy import align_spaces - from islpy import dim_type as dt + from islpy import align_spaces, dim_type as dt # first make sure names match if not all( diff --git a/test/testlib.py b/test/testlib.py index 847c7423a..f8a491ac1 100644 --- a/test/testlib.py +++ b/test/testlib.py @@ -1,6 +1,7 @@ -import loopy as lp import numpy as np +import loopy as lp + # {{{ test_barrier_in_overridden_get_grid_size_expanded_kernel