From 815c4abfafdb54d8e5307f432a10d9562e321d85 Mon Sep 17 00:00:00 2001 From: Jakub Chlanda Date: Fri, 26 Jul 2024 08:49:40 +0100 Subject: [PATCH 1/3] [NFC] Reflow files in buildbot and sycl-fusion dirs Run `black` on python files in buildbot and fusion directories. Those files skipped the original formatting effort, so any change to them now would cause a formatting CI job to fail. --- buildbot/check.py | 58 +++++-- buildbot/clang_tidy.py | 65 +++++-- buildbot/compile.py | 64 +++++-- buildbot/configure.py | 328 ++++++++++++++++++++++++------------ buildbot/dependency.py | 109 +++++++++--- sycl-fusion/test/lit.cfg.py | 4 +- 6 files changed, 440 insertions(+), 188 deletions(-) diff --git a/buildbot/check.py b/buildbot/check.py index 70e98c1a701ea..6d84031a6dfb5 100644 --- a/buildbot/check.py +++ b/buildbot/check.py @@ -6,6 +6,7 @@ DEFAULT_CPU_COUNT = 4 + def do_check(args): try: cpu_count = multiprocessing.cpu_count() @@ -14,45 +15,67 @@ def do_check(args): # Get absolute path to source directory if args.src_dir: - abs_src_dir = os.path.abspath(args.src_dir) + abs_src_dir = os.path.abspath(args.src_dir) else: - abs_src_dir = os.path.abspath(os.path.join(__file__, "../..")) + abs_src_dir = os.path.abspath(os.path.join(__file__, "../..")) # Get absolute path to build directory if args.obj_dir: - abs_obj_dir = os.path.abspath(args.obj_dir) + abs_obj_dir = os.path.abspath(args.obj_dir) else: - abs_obj_dir = os.path.join(abs_src_dir, "build") + abs_obj_dir = os.path.join(abs_src_dir, "build") cmake_cmd = [ "cmake", - "--build", abs_obj_dir, + "--build", + abs_obj_dir, "--", args.test_suite, - "-j", str(cpu_count)] + "-j", + str(cpu_count), + ] print("[Cmake Command]: {}".format(" ".join(cmake_cmd))) - env_tmp=os.environ - env_tmp["LIT_ARGS"]="\"{}\"".format("-v") + env_tmp = os.environ + env_tmp["LIT_ARGS"] = '"{}"'.format("-v") subprocess.check_call(cmake_cmd, cwd=abs_obj_dir, env=env_tmp) ret = True return ret + def main(): - parser = argparse.ArgumentParser(prog="check.py", - description="script to do LIT testing", - formatter_class=argparse.RawTextHelpFormatter) - parser.add_argument("-n", "--build-number", metavar="BUILD_NUM", help="build number") + parser = argparse.ArgumentParser( + prog="check.py", + description="script to do LIT testing", + formatter_class=argparse.RawTextHelpFormatter, + ) + parser.add_argument( + "-n", "--build-number", metavar="BUILD_NUM", help="build number" + ) parser.add_argument("-b", "--branch", metavar="BRANCH", help="pull request branch") - parser.add_argument("-d", "--base-branch", metavar="BASE_BRANCH", help="pull request base branch") - parser.add_argument("-r", "--pr-number", metavar="PR_NUM", help="pull request number") - parser.add_argument("-w", "--builder-dir", metavar="BUILDER_DIR", - help="builder directory, which is the directory containing source and build directories") + parser.add_argument( + "-d", "--base-branch", metavar="BASE_BRANCH", help="pull request base branch" + ) + parser.add_argument( + "-r", "--pr-number", metavar="PR_NUM", help="pull request number" + ) + parser.add_argument( + "-w", + "--builder-dir", + metavar="BUILDER_DIR", + help="builder directory, which is the directory containing source and build directories", + ) parser.add_argument("-s", "--src-dir", metavar="SRC_DIR", help="source directory") parser.add_argument("-o", "--obj-dir", metavar="OBJ_DIR", help="build directory") - parser.add_argument("-t", "--test-suite", metavar="TEST_SUITE", default="check-all", help="check-xxx target") + parser.add_argument( + "-t", + "--test-suite", + metavar="TEST_SUITE", + default="check-all", + help="check-xxx target", + ) args = parser.parse_args() @@ -60,6 +83,7 @@ def main(): return do_check(args) + if __name__ == "__main__": ret = main() exit_code = 0 if ret else 1 diff --git a/buildbot/clang_tidy.py b/buildbot/clang_tidy.py index c9c9eb37d7b68..3d5e46ac0df9c 100644 --- a/buildbot/clang_tidy.py +++ b/buildbot/clang_tidy.py @@ -5,10 +5,16 @@ FILE_EXTENSIONS = [".h", ".hpp", ".c", ".cc", ".cpp"] + def do_clang_tidy(args): ret = False - merge_base_cmd = ["git", "merge-base", "origin/{}".format(args.base_branch), args.branch] + merge_base_cmd = [ + "git", + "merge-base", + "origin/{}".format(args.base_branch), + args.branch, + ] print(merge_base_cmd) base_commit = subprocess.check_output(merge_base_cmd, cwd=args.src_dir) base_commit = base_commit.rstrip() @@ -19,13 +25,15 @@ def do_clang_tidy(args): diff_cmd = ["git", "--no-pager", "diff", base_commit, args.branch, "--name-only"] print(diff_cmd) - with open(changed_files, 'w') as f: - subprocess.check_call(merge_base_cmd, cwd=args.src_dir, stdout=f, stderr=subprocess.STDOUT) + with open(changed_files, "w") as f: + subprocess.check_call( + merge_base_cmd, cwd=args.src_dir, stdout=f, stderr=subprocess.STDOUT + ) if os.path.isfile(changed_files): clang_tidy_binary = os.path.join(args.obj_dir, "bin", "clang-tidy") if os.path.isfile(clang_tidy_binary): - with open(changed_files, 'r') as f: + with open(changed_files, "r") as f: for line in f: filename, file_extension = os.path.splitext(line) if file_extension.lower() in FILE_EXTENSIONS: @@ -41,19 +49,42 @@ def do_clang_tidy(args): return ret + def main(): - parser = argparse.ArgumentParser(prog="clang_tidy.py", - description="script to do clang_tidy", - formatter_class=argparse.RawTextHelpFormatter) - parser.add_argument("-n", "--build-number", metavar="BUILD_NUM", help="build number") - parser.add_argument("-b", "--branch", metavar="BRANCH", required=True, help="pull request branch") - parser.add_argument("-d", "--base-branch", metavar="BASE_BRANCH", required=True, - help="pull request base branch") - parser.add_argument("-r", "--pr-number", metavar="PR_NUM", help="pull request number") - parser.add_argument("-w", "--builder-dir", metavar="BUILDER_DIR", required=True, - help="builder directory, which is the directory containing source and build directories") - parser.add_argument("-s", "--src-dir", metavar="SRC_DIR", required=True, help="source directory") - parser.add_argument("-o", "--obj-dir", metavar="OBJ_DIR", required=True, help="build directory") + parser = argparse.ArgumentParser( + prog="clang_tidy.py", + description="script to do clang_tidy", + formatter_class=argparse.RawTextHelpFormatter, + ) + parser.add_argument( + "-n", "--build-number", metavar="BUILD_NUM", help="build number" + ) + parser.add_argument( + "-b", "--branch", metavar="BRANCH", required=True, help="pull request branch" + ) + parser.add_argument( + "-d", + "--base-branch", + metavar="BASE_BRANCH", + required=True, + help="pull request base branch", + ) + parser.add_argument( + "-r", "--pr-number", metavar="PR_NUM", help="pull request number" + ) + parser.add_argument( + "-w", + "--builder-dir", + metavar="BUILDER_DIR", + required=True, + help="builder directory, which is the directory containing source and build directories", + ) + parser.add_argument( + "-s", "--src-dir", metavar="SRC_DIR", required=True, help="source directory" + ) + parser.add_argument( + "-o", "--obj-dir", metavar="OBJ_DIR", required=True, help="build directory" + ) args = parser.parse_args() @@ -61,8 +92,8 @@ def main(): return do_clang_tidy(args) + if __name__ == "__main__": ret = main() exit_code = 0 if ret else 1 sys.exit(exit_code) - diff --git a/buildbot/compile.py b/buildbot/compile.py index b1c8e22ed1537..055abd6dab739 100644 --- a/buildbot/compile.py +++ b/buildbot/compile.py @@ -18,24 +18,27 @@ def do_compile(args): # Get absolute path to source directory if args.src_dir: - abs_src_dir = os.path.abspath(args.src_dir) + abs_src_dir = os.path.abspath(args.src_dir) else: - abs_src_dir = os.path.abspath(os.path.join(__file__, "../..")) + abs_src_dir = os.path.abspath(os.path.join(__file__, "../..")) # Get absolute path to build directory if args.obj_dir: - abs_obj_dir = os.path.abspath(args.obj_dir) + abs_obj_dir = os.path.abspath(args.obj_dir) else: - abs_obj_dir = os.path.join(abs_src_dir, "build") + abs_obj_dir = os.path.join(abs_src_dir, "build") cmake_cmd = [ "cmake", - "--build", abs_obj_dir, + "--build", + abs_obj_dir, "--", args.build_target, - "-j", str(cpu_count)] + "-j", + str(cpu_count), + ] if args.verbose: - cmake_cmd.append("--verbose") + cmake_cmd.append("--verbose") print("[Cmake Command]: {}".format(" ".join(cmake_cmd))) @@ -45,20 +48,45 @@ def do_compile(args): def main(): - parser = argparse.ArgumentParser(prog="compile.py", - description="script to do compile", - formatter_class=argparse.RawTextHelpFormatter) - parser.add_argument("-n", "--build-number", metavar="BUILD_NUM", help="build number") + parser = argparse.ArgumentParser( + prog="compile.py", + description="script to do compile", + formatter_class=argparse.RawTextHelpFormatter, + ) + parser.add_argument( + "-n", "--build-number", metavar="BUILD_NUM", help="build number" + ) parser.add_argument("-b", "--branch", metavar="BRANCH", help="pull request branch") - parser.add_argument("-d", "--base-branch", metavar="BASE_BRANCH", help="pull request base branch") - parser.add_argument("-r", "--pr-number", metavar="PR_NUM", help="pull request number") - parser.add_argument("-w", "--builder-dir", metavar="BUILDER_DIR", - help="builder directory, which is the directory containing source and build directories") + parser.add_argument( + "-d", "--base-branch", metavar="BASE_BRANCH", help="pull request base branch" + ) + parser.add_argument( + "-r", "--pr-number", metavar="PR_NUM", help="pull request number" + ) + parser.add_argument( + "-w", + "--builder-dir", + metavar="BUILDER_DIR", + help="builder directory, which is the directory containing source and build directories", + ) parser.add_argument("-s", "--src-dir", metavar="SRC_DIR", help="source directory") parser.add_argument("-o", "--obj-dir", metavar="OBJ_DIR", help="build directory") - parser.add_argument("-j", "--build-parallelism", metavar="BUILD_PARALLELISM", help="build parallelism") - parser.add_argument("-v", "--verbose", action='store_true', help="verbose build output") - parser.add_argument("-t", "--build-target", metavar="BUILD_TARGET", default="deploy-sycl-toolchain", help="set build target") + parser.add_argument( + "-j", + "--build-parallelism", + metavar="BUILD_PARALLELISM", + help="build parallelism", + ) + parser.add_argument( + "-v", "--verbose", action="store_true", help="verbose build output" + ) + parser.add_argument( + "-t", + "--build-target", + metavar="BUILD_TARGET", + default="deploy-sycl-toolchain", + help="set build target", + ) args = parser.parse_args() diff --git a/buildbot/configure.py b/buildbot/configure.py index f172be352ba7d..fc89f8b7b00bf 100644 --- a/buildbot/configure.py +++ b/buildbot/configure.py @@ -5,24 +5,31 @@ import subprocess import sys + def do_configure(args): # Get absolute path to source directory - abs_src_dir = os.path.abspath(args.src_dir if args.src_dir else os.path.join(__file__, "../..")) + abs_src_dir = os.path.abspath( + args.src_dir if args.src_dir else os.path.join(__file__, "../..") + ) # Get absolute path to build directory - abs_obj_dir = os.path.abspath(args.obj_dir) if args.obj_dir else os.path.join(abs_src_dir, "build") + abs_obj_dir = ( + os.path.abspath(args.obj_dir) + if args.obj_dir + else os.path.join(abs_src_dir, "build") + ) # Create build directory if it doesn't exist if not os.path.isdir(abs_obj_dir): - os.makedirs(abs_obj_dir) + os.makedirs(abs_obj_dir) - llvm_external_projects = 'sycl;llvm-spirv;opencl;xpti;xptifw' + llvm_external_projects = "sycl;llvm-spirv;opencl;xpti;xptifw" # libdevice build requires a working SYCL toolchain, which is not the case # with macOS target right now. if sys.platform != "darwin": - llvm_external_projects += ';libdevice' + llvm_external_projects += ";libdevice" - libclc_amd_target_names = ';amdgcn--amdhsa' - libclc_nvidia_target_names = ';nvptx64--nvidiacl' + libclc_amd_target_names = ";amdgcn--amdhsa" + libclc_nvidia_target_names = ";nvptx64--nvidiacl" sycl_enable_fusion = "OFF" if not args.disable_fusion: @@ -40,50 +47,50 @@ def do_configure(args): libdevice_dir = os.path.join(abs_src_dir, "libdevice") fusion_dir = os.path.join(abs_src_dir, "sycl-fusion") llvm_targets_to_build = args.host_target - llvm_enable_projects = 'clang;' + llvm_external_projects - libclc_build_native = 'OFF' - libclc_targets_to_build = '' - libclc_gen_remangled_variants = 'OFF' - sycl_build_pi_hip_platform = 'AMD' - sycl_clang_extra_flags = '' - sycl_werror = 'OFF' - llvm_enable_assertions = 'ON' - llvm_enable_doxygen = 'OFF' - llvm_enable_sphinx = 'OFF' - llvm_build_shared_libs = 'OFF' - llvm_enable_lld = 'OFF' + llvm_enable_projects = "clang;" + llvm_external_projects + libclc_build_native = "OFF" + libclc_targets_to_build = "" + libclc_gen_remangled_variants = "OFF" + sycl_build_pi_hip_platform = "AMD" + sycl_clang_extra_flags = "" + sycl_werror = "OFF" + llvm_enable_assertions = "ON" + llvm_enable_doxygen = "OFF" + llvm_enable_sphinx = "OFF" + llvm_build_shared_libs = "OFF" + llvm_enable_lld = "OFF" sycl_enabled_plugins = ["opencl"] - sycl_preview_lib = 'ON' + sycl_preview_lib = "ON" - sycl_enable_xpti_tracing = 'ON' - xpti_enable_werror = 'OFF' + sycl_enable_xpti_tracing = "ON" + xpti_enable_werror = "OFF" if sys.platform != "darwin": sycl_enabled_plugins.append("level_zero") # lld is needed on Windows or for the HIP plugin on AMD - if platform.system() == 'Windows' or (args.hip and args.hip_platform == 'AMD'): - llvm_enable_projects += ';lld' + if platform.system() == "Windows" or (args.hip and args.hip_platform == "AMD"): + llvm_enable_projects += ";lld" libclc_enabled = args.cuda or args.hip or args.native_cpu if libclc_enabled: - llvm_enable_projects += ';libclc' + llvm_enable_projects += ";libclc" if args.cuda: - llvm_targets_to_build += ';NVPTX' + llvm_targets_to_build += ";NVPTX" libclc_targets_to_build = libclc_nvidia_target_names - libclc_gen_remangled_variants = 'ON' + libclc_gen_remangled_variants = "ON" sycl_enabled_plugins.append("cuda") if args.hip: - if args.hip_platform == 'AMD': - llvm_targets_to_build += ';AMDGPU' + if args.hip_platform == "AMD": + llvm_targets_to_build += ";AMDGPU" libclc_targets_to_build += libclc_amd_target_names - elif args.hip_platform == 'NVIDIA' and not args.cuda: - llvm_targets_to_build += ';NVPTX' + elif args.hip_platform == "NVIDIA" and not args.cuda: + llvm_targets_to_build += ";NVPTX" libclc_targets_to_build += libclc_nvidia_target_names - libclc_gen_remangled_variants = 'ON' + libclc_gen_remangled_variants = "ON" sycl_build_pi_hip_platform = args.hip_platform sycl_enabled_plugins.append("hip") @@ -96,28 +103,27 @@ def do_configure(args): libclc_gen_remangled_variants = "ON" sycl_enabled_plugins.append("native_cpu") - # all llvm compiler targets don't require 3rd party dependencies, so can be # built/tested even if specific runtimes are not available if args.enable_all_llvm_targets: - llvm_targets_to_build += ';NVPTX;AMDGPU' + llvm_targets_to_build += ";NVPTX;AMDGPU" if args.werror or args.ci_defaults: - sycl_werror = 'ON' - xpti_enable_werror = 'ON' + sycl_werror = "ON" + xpti_enable_werror = "ON" if args.no_assertions: - llvm_enable_assertions = 'OFF' + llvm_enable_assertions = "OFF" if args.docs: - llvm_enable_doxygen = 'ON' - llvm_enable_sphinx = 'ON' + llvm_enable_doxygen = "ON" + llvm_enable_sphinx = "ON" if args.shared_libs: - llvm_build_shared_libs = 'ON' + llvm_build_shared_libs = "ON" if args.use_lld: - llvm_enable_lld = 'ON' + llvm_enable_lld = "ON" # CI Default conditionally appends to options, keep it at the bottom of # args handling @@ -131,32 +137,33 @@ def do_configure(args): if sys.platform != "darwin": # libclc is required for CI validation libclc_enabled = True - if 'libclc' not in llvm_enable_projects: - llvm_enable_projects += ';libclc' + if "libclc" not in llvm_enable_projects: + llvm_enable_projects += ";libclc" # libclc passes `--nvvm-reflect-enable=false`, build NVPTX to enable it - if 'NVPTX' not in llvm_targets_to_build: - llvm_targets_to_build += ';NVPTX' + if "NVPTX" not in llvm_targets_to_build: + llvm_targets_to_build += ";NVPTX" # since we are building AMD libclc target we must have AMDGPU target - if 'AMDGPU' not in llvm_targets_to_build: - llvm_targets_to_build += ';AMDGPU' + if "AMDGPU" not in llvm_targets_to_build: + llvm_targets_to_build += ";AMDGPU" # Add both NVIDIA and AMD libclc targets if libclc_amd_target_names not in libclc_targets_to_build: libclc_targets_to_build += libclc_amd_target_names if libclc_nvidia_target_names not in libclc_targets_to_build: libclc_targets_to_build += libclc_nvidia_target_names - libclc_gen_remangled_variants = 'ON' + libclc_gen_remangled_variants = "ON" if args.enable_plugin: sycl_enabled_plugins += args.enable_plugin if args.disable_preview_lib: - sycl_preview_lib = 'OFF' + sycl_preview_lib = "OFF" install_dir = os.path.join(abs_obj_dir, "install") cmake_cmd = [ "cmake", - "-G", args.cmake_gen, + "-G", + args.cmake_gen, "-DCMAKE_BUILD_TYPE={}".format(args.build_type), "-DLLVM_ENABLE_ASSERTIONS={}".format(llvm_enable_assertions), "-DLLVM_TARGETS_TO_BUILD={}".format(llvm_targets_to_build), @@ -173,7 +180,7 @@ def do_configure(args): "-DLLVM_BUILD_TOOLS=ON", "-DSYCL_ENABLE_WERROR={}".format(sycl_werror), "-DCMAKE_INSTALL_PREFIX={}".format(install_dir), - "-DSYCL_INCLUDE_TESTS=ON", # Explicitly include all kinds of SYCL tests. + "-DSYCL_INCLUDE_TESTS=ON", # Explicitly include all kinds of SYCL tests. "-DLLVM_ENABLE_DOXYGEN={}".format(llvm_enable_doxygen), "-DLLVM_ENABLE_SPHINX={}".format(llvm_enable_sphinx), "-DBUILD_SHARED_LIBS={}".format(llvm_build_shared_libs), @@ -181,7 +188,7 @@ def do_configure(args): "-DLLVM_ENABLE_LLD={}".format(llvm_enable_lld), "-DXPTI_ENABLE_WERROR={}".format(xpti_enable_werror), "-DSYCL_CLANG_EXTRA_FLAGS={}".format(sycl_clang_extra_flags), - "-DSYCL_ENABLE_PLUGINS={}".format(';'.join(set(sycl_enabled_plugins))), + "-DSYCL_ENABLE_PLUGINS={}".format(";".join(set(sycl_enabled_plugins))), "-DSYCL_ENABLE_KERNEL_FUSION={}".format(sycl_enable_fusion), "-DSYCL_ENABLE_MAJOR_RELEASE_PREVIEW_LIB={}".format(sycl_preview_lib), "-DBUG_REPORT_URL=https://github.com/intel/llvm/issues", @@ -199,31 +206,41 @@ def do_configure(args): ) if args.l0_headers and args.l0_loader: - cmake_cmd.extend([ - "-DLEVEL_ZERO_INCLUDE_DIR={}".format(args.l0_headers), - "-DLEVEL_ZERO_LIBRARY={}".format(args.l0_loader)]) + cmake_cmd.extend( + [ + "-DLEVEL_ZERO_INCLUDE_DIR={}".format(args.l0_headers), + "-DLEVEL_ZERO_LIBRARY={}".format(args.l0_loader), + ] + ) elif args.l0_headers or args.l0_loader: - sys.exit("Please specify both Level Zero headers and loader or don't specify " - "none of them to let download from github.com") + sys.exit( + "Please specify both Level Zero headers and loader or don't specify " + "none of them to let download from github.com" + ) # Add additional CMake options if provided if args.cmake_opt: - cmake_cmd += args.cmake_opt - + cmake_cmd += args.cmake_opt + if args.add_security_flags: - cmake_cmd.extend(["-DEXTRA_SECURITY_FLAGS={}".format(args.add_security_flags)]) + cmake_cmd.extend(["-DEXTRA_SECURITY_FLAGS={}".format(args.add_security_flags)]) # Add path to root CMakeLists.txt cmake_cmd.append(llvm_dir) if args.use_libcxx: - if not (args.libcxx_include and args.libcxx_library): - sys.exit("Please specify include and library path of libc++ when building sycl " - "runtime with it") - cmake_cmd.extend([ - "-DSYCL_USE_LIBCXX=ON", - "-DSYCL_LIBCXX_INCLUDE_PATH={}".format(args.libcxx_include), - "-DSYCL_LIBCXX_LIBRARY_PATH={}".format(args.libcxx_library)]) + if not (args.libcxx_include and args.libcxx_library): + sys.exit( + "Please specify include and library path of libc++ when building sycl " + "runtime with it" + ) + cmake_cmd.extend( + [ + "-DSYCL_USE_LIBCXX=ON", + "-DSYCL_LIBCXX_INCLUDE_PATH={}".format(args.libcxx_include), + "-DSYCL_LIBCXX_LIBRARY_PATH={}".format(args.libcxx_library), + ] + ) print("[Cmake Command]: {}".format(" ".join(map(shlex.quote, cmake_cmd)))) @@ -232,61 +249,158 @@ def do_configure(args): except subprocess.CalledProcessError: cmake_cache = os.path.join(abs_obj_dir, "CMakeCache.txt") if os.path.isfile(cmake_cache): - print("There is CMakeCache.txt at " + cmake_cache + - " ... you can try to remove it and rerun.") - print("Configure failed!") + print( + "There is CMakeCache.txt at " + + cmake_cache + + " ... you can try to remove it and rerun." + ) + print("Configure failed!") return False return True + def main(): - parser = argparse.ArgumentParser(prog="configure.py", - description="Generate build files from CMake configuration files", - formatter_class=argparse.RawTextHelpFormatter) + parser = argparse.ArgumentParser( + prog="configure.py", + description="Generate build files from CMake configuration files", + formatter_class=argparse.RawTextHelpFormatter, + ) # CI system options - parser.add_argument("-n", "--build-number", metavar="BUILD_NUM", help="build number") + parser.add_argument( + "-n", "--build-number", metavar="BUILD_NUM", help="build number" + ) parser.add_argument("-b", "--branch", metavar="BRANCH", help="pull request branch") - parser.add_argument("-d", "--base-branch", metavar="BASE_BRANCH", help="pull request base branch") - parser.add_argument("-r", "--pr-number", metavar="PR_NUM", help="pull request number") - parser.add_argument("-w", "--builder-dir", metavar="BUILDER_DIR", - help="builder directory, which is the directory containing source and build directories") + parser.add_argument( + "-d", "--base-branch", metavar="BASE_BRANCH", help="pull request base branch" + ) + parser.add_argument( + "-r", "--pr-number", metavar="PR_NUM", help="pull request number" + ) + parser.add_argument( + "-w", + "--builder-dir", + metavar="BUILDER_DIR", + help="builder directory, which is the directory containing source and build directories", + ) # User options - parser.add_argument("-s", "--src-dir", metavar="SRC_DIR", help="source directory (autodetected by default)") - parser.add_argument("-o", "--obj-dir", metavar="OBJ_DIR", help="build directory. (/build by default)") - parser.add_argument("--l0-headers", metavar="L0_HEADER_DIR", help="directory with Level Zero headers") - parser.add_argument("--l0-loader", metavar="L0_LOADER", help="path to the Level Zero loader") - parser.add_argument("-t", "--build-type", - metavar="BUILD_TYPE", default="Release", help="build type: Debug, Release") - parser.add_argument("--cuda", action='store_true', help="switch from OpenCL to CUDA") - parser.add_argument("--native_cpu", action='store_true', help="Enable SYCL Native CPU") - parser.add_argument("--hip", action='store_true', help="switch from OpenCL to HIP") - parser.add_argument("--hip-platform", type=str, choices=['AMD', 'NVIDIA'], default='AMD', help="choose hardware platform for HIP backend") - parser.add_argument("--host-target", default='host', - help="host LLVM target architecture, defaults to \'host\', multiple targets may be provided as a semi-colon separated string") - parser.add_argument("--enable-all-llvm-targets", action='store_true', help="build compiler with all supported targets, it doesn't change runtime build") - parser.add_argument("--no-assertions", action='store_true', help="build without assertions") - parser.add_argument("--docs", action='store_true', help="build Doxygen documentation") - parser.add_argument("--werror", action='store_true', help="Treat warnings as errors") - parser.add_argument("--shared-libs", action='store_true', help="Build shared libraries") - parser.add_argument("--cmake-opt", action='append', help="Additional CMake option not configured via script parameters") + parser.add_argument( + "-s", + "--src-dir", + metavar="SRC_DIR", + help="source directory (autodetected by default)", + ) + parser.add_argument( + "-o", + "--obj-dir", + metavar="OBJ_DIR", + help="build directory. (/build by default)", + ) + parser.add_argument( + "--l0-headers", + metavar="L0_HEADER_DIR", + help="directory with Level Zero headers", + ) + parser.add_argument( + "--l0-loader", metavar="L0_LOADER", help="path to the Level Zero loader" + ) + parser.add_argument( + "-t", + "--build-type", + metavar="BUILD_TYPE", + default="Release", + help="build type: Debug, Release", + ) + parser.add_argument( + "--cuda", action="store_true", help="switch from OpenCL to CUDA" + ) + parser.add_argument( + "--native_cpu", action="store_true", help="Enable SYCL Native CPU" + ) + parser.add_argument("--hip", action="store_true", help="switch from OpenCL to HIP") + parser.add_argument( + "--hip-platform", + type=str, + choices=["AMD", "NVIDIA"], + default="AMD", + help="choose hardware platform for HIP backend", + ) + parser.add_argument( + "--host-target", + default="host", + help="host LLVM target architecture, defaults to 'host', multiple targets may be provided as a semi-colon separated string", + ) + parser.add_argument( + "--enable-all-llvm-targets", + action="store_true", + help="build compiler with all supported targets, it doesn't change runtime build", + ) + parser.add_argument( + "--no-assertions", action="store_true", help="build without assertions" + ) + parser.add_argument( + "--docs", action="store_true", help="build Doxygen documentation" + ) + parser.add_argument( + "--werror", action="store_true", help="Treat warnings as errors" + ) + parser.add_argument( + "--shared-libs", action="store_true", help="Build shared libraries" + ) + parser.add_argument( + "--cmake-opt", + action="append", + help="Additional CMake option not configured via script parameters", + ) parser.add_argument("--cmake-gen", default="Ninja", help="CMake generator") - parser.add_argument("--use-libcxx", action="store_true", help="build sycl runtime with libcxx") - parser.add_argument("--libcxx-include", metavar="LIBCXX_INCLUDE_PATH", help="libcxx include path") - parser.add_argument("--libcxx-library", metavar="LIBCXX_LIBRARY_PATH", help="libcxx library path") - parser.add_argument("--use-lld", action="store_true", help="Use LLD linker for build") - parser.add_argument("--llvm-external-projects", help="Add external projects to build. Add as comma seperated list.") - parser.add_argument("--ci-defaults", action="store_true", help="Enable default CI parameters") - parser.add_argument("--enable-plugin", action='append', help="Enable SYCL plugin") - parser.add_argument("--disable-preview-lib", action='store_true', help="Disable building of the SYCL runtime major release preview library") - parser.add_argument("--disable-fusion", action="store_true", help="Disable the kernel fusion JIT compiler") - parser.add_argument("--add_security_flags", type=str, choices=['none', 'default', 'sanitize'], default=None, help="Enables security flags for compile & link. Two values are supported: 'default' and 'sanitize'. 'Sanitize' option is an extension of 'default' set.") - parser.add_argument('--native-cpu-libclc-targets', help='Target triples for libclc, used by the Native CPU backend') + parser.add_argument( + "--use-libcxx", action="store_true", help="build sycl runtime with libcxx" + ) + parser.add_argument( + "--libcxx-include", metavar="LIBCXX_INCLUDE_PATH", help="libcxx include path" + ) + parser.add_argument( + "--libcxx-library", metavar="LIBCXX_LIBRARY_PATH", help="libcxx library path" + ) + parser.add_argument( + "--use-lld", action="store_true", help="Use LLD linker for build" + ) + parser.add_argument( + "--llvm-external-projects", + help="Add external projects to build. Add as comma seperated list.", + ) + parser.add_argument( + "--ci-defaults", action="store_true", help="Enable default CI parameters" + ) + parser.add_argument("--enable-plugin", action="append", help="Enable SYCL plugin") + parser.add_argument( + "--disable-preview-lib", + action="store_true", + help="Disable building of the SYCL runtime major release preview library", + ) + parser.add_argument( + "--disable-fusion", + action="store_true", + help="Disable the kernel fusion JIT compiler", + ) + parser.add_argument( + "--add_security_flags", + type=str, + choices=["none", "default", "sanitize"], + default=None, + help="Enables security flags for compile & link. Two values are supported: 'default' and 'sanitize'. 'Sanitize' option is an extension of 'default' set.", + ) + parser.add_argument( + "--native-cpu-libclc-targets", + help="Target triples for libclc, used by the Native CPU backend", + ) args = parser.parse_args() print("args:{}".format(args)) return do_configure(args) + if __name__ == "__main__": ret = main() exit_code = 0 if ret else 1 diff --git a/buildbot/dependency.py b/buildbot/dependency.py index e79eae2b62cc1..3eac3cc7a92a8 100644 --- a/buildbot/dependency.py +++ b/buildbot/dependency.py @@ -4,6 +4,7 @@ import subprocess import sys + def do_dependency(args): ret = False @@ -11,7 +12,8 @@ def do_dependency(args): if args.pr_number is not None and not args.clean_build: if args.branch is None or args.base_branch is None: "branch ({}) and base branch ({}) is required for pull request #{}".format( - args.branch, args.base_branch, args.pr_number) + args.branch, args.base_branch, args.pr_number + ) return ret # fetching the recent state of base branch fetch_cmd = ["git", "fetch", "origin", args.base_branch] @@ -25,14 +27,31 @@ def do_dependency(args): print(checkout_cmd) subprocess.check_call(checkout_cmd, cwd=args.src_dir) # get baseline commit - merge_base_cmd = ["git", "merge-base", "origin/{}".format(args.base_branch), args.branch] + merge_base_cmd = [ + "git", + "merge-base", + "origin/{}".format(args.base_branch), + args.branch, + ] print(merge_base_cmd) base_commit = subprocess.check_output(merge_base_cmd, cwd=args.src_dir) - base_commit = base_commit.rstrip() - diff_cmd = ["git", "--no-pager", "diff", base_commit, args.branch, "--name-only", "buildbot"] + base_commit = base_commit.rstrip() + diff_cmd = [ + "git", + "--no-pager", + "diff", + base_commit, + args.branch, + "--name-only", + "buildbot", + ] print(diff_cmd) changed_build_scripts = subprocess.check_output(diff_cmd, cwd=args.src_dir) - changed_build_scripts = changed_build_scripts.rstrip() if changed_build_scripts is not None else None + changed_build_scripts = ( + changed_build_scripts.rstrip() + if changed_build_scripts is not None + else None + ) # clean build directory if build scripts have changed if len(changed_build_scripts) > 0: if os.path.isdir(args.obj_dir): @@ -49,8 +68,14 @@ def do_dependency(args): # fetch OpenCL headers ocl_header_dir = os.path.join(args.obj_dir, "OpenCL-Headers") if not os.path.isdir(ocl_header_dir): - clone_cmd = ["git", "clone", "https://github.com/KhronosGroup/OpenCL-Headers", - "OpenCL-Headers", "-b", "main"] + clone_cmd = [ + "git", + "clone", + "https://github.com/KhronosGroup/OpenCL-Headers", + "OpenCL-Headers", + "-b", + "main", + ] subprocess.check_call(clone_cmd, cwd=args.obj_dir) else: fetch_cmd = ["git", "pull", "--ff", "--ff-only", "origin"] @@ -64,9 +89,14 @@ def do_dependency(args): # fetch and build OpenCL ICD loader icd_loader_dir = os.path.join(args.obj_dir, "OpenCL-ICD-Loader") if not os.path.isdir(icd_loader_dir): - clone_cmd = ["git", "clone", - "https://github.com/KhronosGroup/OpenCL-ICD-Loader", - "OpenCL-ICD-Loader", "-b", "main"] + clone_cmd = [ + "git", + "clone", + "https://github.com/KhronosGroup/OpenCL-ICD-Loader", + "OpenCL-ICD-Loader", + "-b", + "main", + ] subprocess.check_call(clone_cmd, cwd=args.obj_dir) else: @@ -83,36 +113,60 @@ def do_dependency(args): shutil.rmtree(icd_build_dir) os.makedirs(icd_build_dir) install_dir = os.path.join(args.obj_dir, "install") - cmake_cmd = ["cmake", "-G", "Ninja", - "-DCMAKE_INSTALL_PREFIX={}".format(install_dir), - "-DOPENCL_ICD_LOADER_HEADERS_DIR={}".format(ocl_header_dir), - ".." ] + cmake_cmd = [ + "cmake", + "-G", + "Ninja", + "-DCMAKE_INSTALL_PREFIX={}".format(install_dir), + "-DOPENCL_ICD_LOADER_HEADERS_DIR={}".format(ocl_header_dir), + "..", + ] print("[Cmake Command]: {}".format(" ".join(cmake_cmd))) - + subprocess.check_call(cmake_cmd, cwd=icd_build_dir) - env_tmp=os.environ + env_tmp = os.environ env_tmp["C_INCLUDE_PATH"] = "{}".format(ocl_header_dir) subprocess.check_call(["ninja", "install"], env=env_tmp, cwd=icd_build_dir) ret = True return ret + def main(): - parser = argparse.ArgumentParser(prog="dependency.py", - description="script to get and build dependency", - formatter_class=argparse.RawTextHelpFormatter) - parser.add_argument("-n", "--build-number", metavar="BUILD_NUM", help="build number") + parser = argparse.ArgumentParser( + prog="dependency.py", + description="script to get and build dependency", + formatter_class=argparse.RawTextHelpFormatter, + ) + parser.add_argument( + "-n", "--build-number", metavar="BUILD_NUM", help="build number" + ) parser.add_argument("-b", "--branch", metavar="BRANCH", help="pull request branch") - parser.add_argument("-d", "--base-branch", metavar="BASE_BRANCH", help="pull request base branch") - parser.add_argument("-r", "--pr-number", metavar="PR_NUM", help="pull request number") - parser.add_argument("-w", "--builder-dir", metavar="BUILDER_DIR", - help="builder directory, which is the directory containing source and build directories") + parser.add_argument( + "-d", "--base-branch", metavar="BASE_BRANCH", help="pull request base branch" + ) + parser.add_argument( + "-r", "--pr-number", metavar="PR_NUM", help="pull request number" + ) + parser.add_argument( + "-w", + "--builder-dir", + metavar="BUILDER_DIR", + help="builder directory, which is the directory containing source and build directories", + ) parser.add_argument("-s", "--src-dir", metavar="SRC_DIR", help="source directory") - parser.add_argument("-o", "--obj-dir", metavar="OBJ_DIR", required=True, help="build directory") - parser.add_argument("-c", "--clean-build", action="store_true", default=False, - help="true if the build is clean build which has clobber step") + parser.add_argument( + "-o", "--obj-dir", metavar="OBJ_DIR", required=True, help="build directory" + ) + parser.add_argument( + "-c", + "--clean-build", + action="store_true", + default=False, + help="true if the build is clean build which has clobber step", + ) args = parser.parse_args() @@ -120,6 +174,7 @@ def main(): return do_dependency(args) + if __name__ == "__main__": ret = main() exit_code = 0 if ret else 1 diff --git a/sycl-fusion/test/lit.cfg.py b/sycl-fusion/test/lit.cfg.py index d92326b020ce3..fffa59585ef0e 100644 --- a/sycl-fusion/test/lit.cfg.py +++ b/sycl-fusion/test/lit.cfg.py @@ -25,6 +25,6 @@ config.substitutions.append(("%shlibdir", config.llvm_shlib_dir)) if "NVPTX" in config.llvm_targets_to_build: - config.available_features.add('cuda') + config.available_features.add("cuda") if "AMDGPU" in config.llvm_targets_to_build: - config.available_features.add('hip_amd') + config.available_features.add("hip_amd") From f98815d85e0ffadb0849c061ad96037cae97e1b7 Mon Sep 17 00:00:00 2001 From: Joe Todd Date: Tue, 30 Jul 2024 09:03:29 +0100 Subject: [PATCH 2/3] [SYCL][COMPAT] New launch API to enable passing kernel & launch properties (#14441) This PR defines a new user-facing struct `launch_strategy`, and two new `launch` overloads (currently in `syclcompat::experimental`) which accept a `launch_strategy`. ## Extensions & Properties This work builds on top of the [kernel_properties](https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_properties.asciidoc) and [enqueue_functions](https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/experimental/sycl_ext_oneapi_enqueue_functions.asciidoc) extensions. The latter defines APIs for passing `launch_properties` as part of a `launch_config` object. These are the `parallel_for` and `nd_launch` overloads used by the new `launch`. See the [note](https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/experimental/sycl_ext_oneapi_enqueue_functions.asciidoc#launch-configuration) in the Launch configuration section which describes how `kernel_properties` must be passed via a `get(properties_tag)` method of a kernel functor. ## Local Memory Note also that in order to properly handle local memory, we **must** construct the `KernelFunctor` object within the `cgh` lambda, passing in a `local_accessor` to the constructor. Then within `KernelFunctor::operator()` (the SYCL 'kernel') we can at last grab the local memory pointer with `local_acc.get_multi_ptr()`, since CUDA-style device functions expect to receive their dynamic local memory as a `char *`. --------- Signed-off-by: Joe Todd --- sycl/doc/syclcompat/README.md | 236 +++++----- sycl/include/syclcompat/launch.hpp | 133 ++---- .../syclcompat/launch_experimental.hpp | 105 ----- sycl/include/syclcompat/launch_policy.hpp | 254 +++++++++++ sycl/include/syclcompat/syclcompat.hpp | 1 - sycl/include/syclcompat/traits.hpp | 209 +++++++++ .../syclcompat/launch/kernel_properties.cpp | 64 +++ sycl/test-e2e/syclcompat/launch/launch.cpp | 410 +----------------- .../syclcompat/launch/launch_policy.cpp | 359 +++++++++++++++ .../syclcompat/launch/launch_policy_lmem.cpp | 275 ++++++++++++ .../launch/launch_policy_lmem_neg.cpp | 60 +++ .../syclcompat/launch/launch_policy_neg.cpp | 191 ++++++++ .../syclcompat/launch/launch_properties.cpp | 106 +++++ 13 files changed, 1695 insertions(+), 708 deletions(-) delete mode 100644 sycl/include/syclcompat/launch_experimental.hpp create mode 100644 sycl/include/syclcompat/launch_policy.hpp create mode 100644 sycl/test-e2e/syclcompat/launch/kernel_properties.cpp create mode 100644 sycl/test-e2e/syclcompat/launch/launch_policy.cpp create mode 100644 sycl/test-e2e/syclcompat/launch/launch_policy_lmem.cpp create mode 100644 sycl/test-e2e/syclcompat/launch/launch_policy_lmem_neg.cpp create mode 100644 sycl/test-e2e/syclcompat/launch/launch_policy_neg.cpp create mode 100644 sycl/test-e2e/syclcompat/launch/launch_properties.cpp diff --git a/sycl/doc/syclcompat/README.md b/sycl/doc/syclcompat/README.md index 127df2d17cac9..6dd8708afeb62 100644 --- a/sycl/doc/syclcompat/README.md +++ b/sycl/doc/syclcompat/README.md @@ -42,7 +42,14 @@ Specifically, this library depends on the following SYCL extensions: ../extensions/supported/sycl_ext_oneapi_assert.asciidoc) * [sycl_ext_oneapi_enqueue_barrier]( ../extensions/supported/sycl_ext_oneapi_enqueue_barrier.asciidoc) -* [sycl_ext_oneapi_usm_device_read_only](../extensions/supported/sycl_ext_oneapi_usm_device_read_only.asciidoc) +* [sycl_ext_oneapi_usm_device_read_only]( + ../extensions/supported/sycl_ext_oneapi_usm_device_read_only.asciidoc) +* [sycl_ext_oneapi_properties]( + ../extensions/experimental/sycl_ext_oneapi_properties.asciidoc) +* [sycl_ext_oneapi_enqueue_functions]( + ../extensions/experimental/sycl_ext_oneapi_enqueue_functions.asciidoc) +* [sycl_ext_oneapi_kernel_properties]( + ../extensions/experimental/sycl_ext_oneapi_kernel_properties.asciidoc) If available, the following extensions extend SYCLcompat functionality: @@ -206,44 +213,6 @@ These translate any kernel dimensions from one convention to the other. An example of an equivalent SYCL call for a 3D kernel using `compat` is `syclcompat::global_id::x() == get_global_id(2)`. -### Local Memory - -When using `compat` functions, there are two distinct interfaces to allocate -device local memory. The first interface uses the _sycl_ext_oneapi_local_memory_ -extension to leverage local memory defined at compile time. -_sycl_ext_oneapi_local_memory_ is accessed through the following wrapper: - -``` c++ -namespace syclcompat { - -template auto *local_mem(); - -} // syclcompat -``` - -`syclcompat::local_mem()` can be used as illustrated in the example -below. - -```c++ -// Sample kernel -using namespace syclcompat; -template -void local_mem_2d(int *d_A) { - // Local memory extension wrapper, size defined at compile-time - auto As = local_mem(); - int id_x = local_id::x(); - int id_y = local_id::y(); - As[id_y][id_x] = id_x * BLOCK_SIZE + id_y; - wg_barrier(); - int val = As[BLOCK_SIZE - id_y - 1][BLOCK_SIZE - id_x - 1]; - d_A[global_id::y() * BLOCK_SIZE + global_id::x()] = val; -} -``` - -The second interface allows users to allocate device local memory at runtime. -SYCLcompat provides this functionality through its kernel launch interface, -`launch`, defined in the following section. - ### launch SYCLcompat provides a kernel `launch` interface which accepts a function that @@ -254,7 +223,7 @@ device _function_ with the use of an `auto F` template parameter, and a variadic `Args` for the function's arguments. Various overloads for `launch` exist to permit the user to launch on a -specific `queue`, or to define dynamically sized device local memory. +specific `queue`, or to describe the range as either `nd_range` or `dim3, dim3`. ``` c++ namespace syclcompat { @@ -273,22 +242,6 @@ template sycl::event launch(const dim3 &grid, const dim3 &threads, sycl::queue q, Args... args); -template -sycl::event launch(const sycl::nd_range &range, size_t mem_size, - sycl::queue q, Args... args); - -template -sycl::event launch(const sycl::nd_range &range, size_t mem_size, - Args... args); - -template -sycl::event launch(const dim3 &grid, const dim3 &threads, - size_t mem_size, sycl::queue q, Args... args); - -template -sycl::event launch(const dim3 &grid, const dim3 &threads, - size_t mem_size, Args... args); - } // syclcompat ``` @@ -313,67 +266,156 @@ auto range = sycl::nd_range<3>{blocksPerGrid * threadsPerBlock, syclcompat::launch(range, d_A, d_B, d_C, n); ``` -For dynamic local memory allocation, `launch` injects a pointer to a -local `char *` accessor of `mem_size` as the last argument of the kernel -function. For example, the previous function named `vectorAdd` can be modified -with the following signature, which adds a `char *` pointer to access local -memory inside the kernel: +Note that since `syclcompat::launch` accepts a device function, the kernel +lambda is constructed by SYCLcompat internally. This means that, for +example, `sycl::local_accessor`s cannot be declared. Instead, users wishing to +use local memory should launch with a `launch_policy` object as described below. -``` c++ -void vectorAdd(const float *A, const float *B, float *C, int n, - char *local_mem); +#### launch_policy + +In addition to the simple `syclcompat::launch` interface described above, +SYCLcompat provides a more flexible (`experimental`) interface to `launch` a +kernel with a given `launch_policy`. By constructing and passing a +`launch_policy`, users can pass `sycl::ext::oneapi::experimental::properties` +associated with the kernel or launch, as well as request **local memory** for +the kernel. + +In order to disambiguate the variadic constructor of `launch_policy`, the +following wrapper structs are defined. The `kernel_properties` and +`launch_properties` wrappers can be constructed *either* with a variadc set of +properties, or with an existing `sycl_exp::properties` object. + +```cpp +namespace syclcompat::experimental { +namespace sycl_exp = sycl::ext::oneapi::experimental; + +// Wrapper for kernel sycl_exp::properties +template struct kernel_properties { + using Props = Properties; + template + kernel_properties(Props... properties); + template + kernel_properties(sycl_exp::properties properties) + Properties props; +}; + +// Wrapper for launch sycl_exp::properties +template struct launch_properties { + using Props = Properties; + template + launch_properties(Props... properties); + template + launch_properties(sycl_exp::properties properties) + Properties props; +}; + +// Wrapper for local memory size +struct local_mem_size { + local_mem_size(size_t size = 0); + size_t size; +}; + +} //namespace syclcompat::experimental ``` -Then, `vectorAdd` can be launched like this: +The constructors of `launch_policy` are variadic, accepting any form of range +(`nd_range`, `range`, `dim3`, `dim3, dim3`), followed by zero or more of +`local_memory_size`, `kernel_properties`, and `launch_properties`: ``` c++ -syclcompat::launch(blocksPerGrid, threadsPerBlock, mem_size, d_A, - d_B, d_C, n); +namespace syclcompat::experimental { +namespace sycl_exp = sycl::ext::oneapi::experimental; + +// launch_policy is constructed by the user & passed to `compat_exp::launch` +template +class launch_policy { +public: + using KPropsT = KProps; + using LPropsT = LProps; + using RangeT = Range; + static constexpr bool HasLocalMem = LocalMem; + + template + launch_policy(Range range, Ts... ts); + + template + launch_policy(dim3 global_range, Ts... ts); + + template + launch_policy(dim3 global_range, dim3 local_range, Ts... ts); + + KProps get_kernel_properties(); + LProps get_launch_properties(); + size_t get_local_mem_size(); + Range get_range(); +}; +} //namespace syclcompat::experimental ``` -or this: +The `launch` overloads accepting a `launch_policy` are: + +```cpp +namespace syclcompat::experimental { + +template +sycl::event launch(LaunchPolicy launch_policy, sycl::queue q, Args... args); + +template +sycl::event launch(LaunchPolicy launch_policy, Args... args); +} //namespace syclcompat::experimental -``` c++ -auto range = sycl::nd_range<3>{globalSize, localSize}; -syclcompat::launch(range, mem_size, d_A, d_B, d_C, n); ``` -This `launch` interface allows users to define an internal memory pool, or -scratchpad, that can then be reinterpreted as the datatype required by the user -within the kernel function. +For local memory, `launch` injects a `char *` pointer to the beginning +of a local accessor of the requested `local_mem_size` as the last argument of +the kernel function. This `char *` can then be reinterpreted as the datatype +required by the user within the kernel function. -To launch a kernel with a specified sub-group size, overloads similar to above -`launch` functions are present in the `syclcompat::experimental` namespace, -which accept SubgroupSize as a template parameter and can be called as -`launch` +For example, the previous function named `vectorAdd` can be modified +with the following signature, which adds a `char *` pointer to access local +memory inside the kernel: -```cpp +``` c++ +void vectorAdd(const float *A, const float *B, float *C, int n, + char *local_mem); +``` + +Then, the new `vectorAdd` can be launched like this: + +``` c++ +using syclcompat::experimental; +launch_policy policy{blocksPerGrid, threadsPerBlock, + local_mem_size(nbytes)}; +launch(policy, d_A, d_B, d_C, n); +``` -template -sycl::event launch(sycl::nd_range<3> launch_range, std::size_t local_memory_size, - sycl::queue queue, Args... args); +To request a different cache/local memory split on supported hardware: -template -sycl::event launch(sycl::nd_range launch_range, std::size_t local_memory_size, - Args... args); +```c++ +using syclcompat::experimental; +namespace sycl_intel_exp = sycl::ext::intel::experimental; -template -sycl::event launch(::syclcompat::dim3 grid_dim, ::syclcompat::dim3 block_dim, - std::size_t local_memory_size, Args... args); +sycl_intel_exp::cache_config cache_config{ + sycl_intel_exp::large_slm}; +kernel_properties kernel_props{cache_config}; +launch_policy policy{blocksPerGrid, threadsPerBlock, + local_mem_size(nbytes), kernel_props}; +launch(policy, d_A, d_B, d_C, n); +``` -template -sycl::event launch(sycl::nd_range<3> launch_range, sycl::queue queue, - Args... args); +To request a certain cluster dimension on supported hardware: -template -sycl::event launch(sycl::nd_range launch_range, - Args... args); +```c++ +using syclcompat::experimental; +namespace sycl_exp = sycl::ext::oneapi::experimental; -template -sycl::event launch(::syclcompat::dim3 grid_dim, ::syclcompat::dim3 block_dim, - Args... args); +sycl_exp::cuda::cluster_size cluster_dims(cluster_range); +launch_policy policy{blocksPerGrid, threadsPerBlock, + local_mem_size(nbytes), + launch_properties{cluster_dims}}; +launch(policy, d_A, d_B, d_C, n); ``` ### Utilities diff --git a/sycl/include/syclcompat/launch.hpp b/sycl/include/syclcompat/launch.hpp index 503f29ff8b91f..eb5d774bc12d3 100644 --- a/sycl/include/syclcompat/launch.hpp +++ b/sycl/include/syclcompat/launch.hpp @@ -31,6 +31,7 @@ #include #include +#include namespace syclcompat { @@ -67,26 +68,6 @@ launch(const sycl::nd_range<3> &range, sycl::queue q, Args... args) { range, [=](sycl::nd_item<3>) { [[clang::always_inline]] F(args...); }); } -template -sycl::event launch(const sycl::nd_range<3> &range, size_t mem_size, - sycl::queue q, Args... args) { - static_assert(detail::getArgumentCount(F) == sizeof...(args) + 1, - "Wrong number of arguments to SYCL kernel"); - - using F_t = decltype(F); - using f_return_t = typename std::invoke_result_t; - static_assert(std::is_same::value, - "SYCL kernels should return void"); - - return q.submit([&](sycl::handler &cgh) { - auto local_acc = sycl::local_accessor(mem_size, cgh); - cgh.parallel_for(range, [=](sycl::nd_item<3>) { - auto local_mem = local_acc.get_pointer(); - [[clang::always_inline]] F(args..., local_mem); - }); - }); -} - } // namespace detail template @@ -137,87 +118,47 @@ launch(const dim3 &grid, const dim3 &threads, Args... args) { return launch(grid, threads, get_default_queue(), args...); } -/// Launches a kernel with the templated F param and arguments on a -/// device specified by the given nd_range and SYCL queue. -/// @tparam F SYCL kernel to be executed, expects signature F(T* local_mem, -/// Args... args). -/// @tparam Dim nd_range dimension number. -/// @tparam Args Types of the arguments to be passed to the kernel. -/// @param range Nd_range specifying the work group and global sizes for the -/// kernel. -/// @param q The SYCL queue on which to execute the kernel. -/// @param mem_size The size, in number of bytes, of the local -/// memory to be allocated for kernel. -/// @param args The arguments to be passed to the kernel. -/// @return A SYCL event object that can be used to synchronize with the -/// kernel's execution. -template -sycl::event launch(const sycl::nd_range &range, size_t mem_size, - sycl::queue q, Args... args) { - return detail::launch(detail::transform_nd_range(range), mem_size, q, - args...); +} // namespace syclcompat + +namespace syclcompat::experimental { + +namespace detail { + +template +sycl::event launch(LaunchPolicy launch_policy, sycl::queue q, Args... args) { + static_assert(syclcompat::args_compatible, + "Mismatch between device function signature and supplied " + "arguments. Have you correctly handled local memory/char*?"); + + sycl_exp::launch_config config(launch_policy.get_range(), + launch_policy.get_launch_properties()); + + return sycl_exp::submit_with_event(q, [&](sycl::handler &cgh) { + auto KernelFunctor = build_kernel_functor(cgh, launch_policy, args...); + if constexpr (syclcompat::detail::is_range_v< + typename LaunchPolicy::RangeT>) { + parallel_for(cgh, config, KernelFunctor); + } else { + static_assert( + syclcompat::detail::is_nd_range_v); + nd_launch(cgh, config, KernelFunctor); + } + }); } -/// Launches a kernel with the templated F param and arguments on a -/// device specified by the given nd_range using theSYCL default queue. -/// @tparam F SYCL kernel to be executed, expects signature F(T* local_mem, -/// Args... args). -/// @tparam Dim nd_range dimension number. -/// @tparam Args Types of the arguments to be passed to the kernel. -/// @param range Nd_range specifying the work group and global sizes for the -/// kernel. -/// @param mem_size The size, in number of bytes, of the local -/// memory to be allocated for kernel. -/// @param args The arguments to be passed to the kernel. -/// @return A SYCL event object that can be used to synchronize with the -/// kernel's execution. -template -sycl::event launch(const sycl::nd_range &range, size_t mem_size, - Args... args) { - return launch(range, mem_size, get_default_queue(), args...); } -/// Launches a kernel with the templated F param and arguments on a -/// device with a user-specified grid and block dimensions following the -/// standard of other programming models using a user-defined SYCL queue. -/// @tparam F SYCL kernel to be executed, expects signature F(T* local_mem, -/// Args... args). -/// @tparam Dim nd_range dimension number. -/// @tparam Args Types of the arguments to be passed to the kernel. -/// @param grid Grid dimensions represented with an (x, y, z) iteration space. -/// @param threads Block dimensions represented with an (x, y, z) iteration -/// space. -/// @param mem_size The size, in number of bytes, of the local -/// memory to be allocated for kernel. -/// @param args The arguments to be passed to the kernel. -/// @return A SYCL event object that can be used to synchronize with the -/// kernel's execution. -template -sycl::event launch(const dim3 &grid, const dim3 &threads, size_t mem_size, - sycl::queue q, Args... args) { - return launch(sycl::nd_range<3>{grid * threads, threads}, mem_size, q, - args...); + +template +sycl::event launch(LaunchPolicy launch_policy, sycl::queue q, Args... args) { + static_assert(detail::is_launch_policy_v); + return detail::launch(launch_policy, q, args...); } -/// Launches a kernel with the templated F param and arguments on a -/// device with a user-specified grid and block dimensions following the -/// standard of other programming models using the default SYCL queue. -/// @tparam F SYCL kernel to be executed, expects signature F(T* local_mem, -/// Args... args). -/// @tparam Dim nd_range dimension number. -/// @tparam Args Types of the arguments to be passed to the kernel. -/// @param grid Grid dimensions represented with an (x, y, z) iteration space. -/// @param threads Block dimensions represented with an (x, y, z) iteration -/// space. -/// @param mem_size The size, in number of bytes, of the -/// local memory to be allocated. -/// @param args The arguments to be passed to the kernel. -/// @return A SYCL event object that can be used to synchronize with the -/// kernel's execution. -template -sycl::event launch(const dim3 &grid, const dim3 &threads, size_t mem_size, - Args... args) { - return launch(grid, threads, mem_size, get_default_queue(), args...); +template +sycl::event launch(LaunchPolicy launch_policy, Args... args) { + static_assert(detail::is_launch_policy_v); + return launch(launch_policy, get_default_queue(), args...); } -} // namespace syclcompat +} // namespace syclcompat::experimental diff --git a/sycl/include/syclcompat/launch_experimental.hpp b/sycl/include/syclcompat/launch_experimental.hpp deleted file mode 100644 index 3074c8c20371e..0000000000000 --- a/sycl/include/syclcompat/launch_experimental.hpp +++ /dev/null @@ -1,105 +0,0 @@ -/*************************************************************************** - * - * Copyright (C) Codeplay Software Ltd. - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM - * Exceptions. See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * SYCLcompat - * - * launch_experimental.hpp - * - * Description: - * Launch Overloads with accepting required subgroup size - **************************************************************************/ - -#pragma once - -#include -#include -#include - -namespace syclcompat { -namespace experimental { - -//================================================================================================// -// Overloads using Local Memory // -//================================================================================================// - -template -std::enable_if_t, sycl::event> -launch(sycl::nd_range<3> launch_range, std::size_t local_memory_size, - sycl::queue queue, Args... args) { - return queue.submit([&](sycl::handler &cgh) { - sycl::local_accessor loc(local_memory_size, cgh); - cgh.parallel_for( - launch_range, - [=](sycl::nd_item<3> it) [[sycl::reqd_sub_group_size(SubgroupSize)]] { - [[clang::always_inline]] F( - args..., loc.get_multi_ptr()); - }); - }); -} - -template -std::enable_if_t, sycl::event> -launch(sycl::nd_range launch_range, std::size_t local_memory_size, - Args... args) { - return launch( - ::syclcompat::detail::transform_nd_range(launch_range), local_memory_size, - ::syclcompat::get_default_queue(), args...); -} - -template -std::enable_if_t, sycl::event> -launch(::syclcompat::dim3 grid_dim, ::syclcompat::dim3 block_dim, - std::size_t local_memory_size, Args... args) { - return launch( - ::syclcompat::detail::transform_nd_range(sycl::nd_range( - sycl::range<3>(grid_dim * block_dim), sycl::range<3>(block_dim))), - local_memory_size, ::syclcompat::get_default_queue(), args...); -} - -//================================================================================================// -// Overloads not using Local Memory // -//================================================================================================// - -template -std::enable_if_t, sycl::event> -launch(sycl::nd_range<3> launch_range, sycl::queue queue, Args... args) { - return queue.submit([&](sycl::handler &cgh) { - cgh.parallel_for(launch_range, - [=](sycl::nd_item<3> it) - [[sycl::reqd_sub_group_size(SubgroupSize)]] { - [[clang::always_inline]] F(args...); - }); - }); -} - -template -std::enable_if_t, sycl::event> -launch(sycl::nd_range launch_range, Args... args) { - return launch( - ::syclcompat::detail::transform_nd_range(launch_range), - ::syclcompat::get_default_queue(), args...); -} - -template -std::enable_if_t, sycl::event> -launch(::syclcompat::dim3 grid_dim, ::syclcompat::dim3 block_dim, - Args... args) { - return launch( - ::syclcompat::detail::transform_nd_range(sycl::nd_range( - sycl::range<3>(grid_dim * block_dim), sycl::range<3>(block_dim))), - ::syclcompat::get_default_queue(), args...); -} - -} // namespace experimental -} // namespace syclcompat diff --git a/sycl/include/syclcompat/launch_policy.hpp b/sycl/include/syclcompat/launch_policy.hpp new file mode 100644 index 0000000000000..1c5f6ed3e97d6 --- /dev/null +++ b/sycl/include/syclcompat/launch_policy.hpp @@ -0,0 +1,254 @@ +/*************************************************************************** + * + * Copyright (C) Codeplay Software Ltd. + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM + * Exceptions. See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SYCL compatibility extension + * + * launch.hpp + * + * Description: + * launch functionality for the SYCL compatibility extension + **************************************************************************/ + +#pragma once + +#include "sycl/ext/oneapi/experimental/enqueue_functions.hpp" +#include "sycl/ext/oneapi/properties/properties.hpp" +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace syclcompat { +namespace experimental { + +namespace sycl_exp = sycl::ext::oneapi::experimental; + +// Wrapper for kernel sycl_exp::properties +template struct kernel_properties { + static_assert(sycl_exp::is_property_list_v); + using Props = Properties; + + template + kernel_properties(Props... properties) : props{properties...} {} + + template + kernel_properties(sycl_exp::properties properties) + : props{properties} {} + + Properties props; +}; + +template ::value, void>> +kernel_properties(Props... props) + -> kernel_properties; + +template +kernel_properties(sycl_exp::properties props) + -> kernel_properties>; + +// Wrapper for launch sycl_exp::properties +template struct launch_properties { + static_assert(sycl_exp::is_property_list_v); + using Props = Properties; + + template + launch_properties(Props... properties) : props{properties...} {} + + template + launch_properties(sycl_exp::properties properties) + : props{properties} {} + + Properties props; +}; + +template ::value, void>> +launch_properties(Props... props) + -> launch_properties; + +template +launch_properties(sycl_exp::properties props) + -> launch_properties>; + +// Wrapper for local memory size +struct local_mem_size { + local_mem_size(size_t size = 0) : size{size} {}; + size_t size; +}; + +// launch_policy is constructed by the user & passed to `compat_exp::launch` +template +class launch_policy { + static_assert(sycl_exp::is_property_list_v); + static_assert(sycl_exp::is_property_list_v); + static_assert(syclcompat::detail::is_range_or_nd_range_v); + static_assert(syclcompat::detail::is_nd_range_v || !LocalMem, + "sycl::range kernel launches are incompatible with local " + "memory usage!"); + +public: + using KPropsT = KProps; + using LPropsT = LProps; + using RangeT = Range; + static constexpr bool HasLocalMem = LocalMem; + +private: + launch_policy() = default; + + template + launch_policy(Ts... ts) + : _kernel_properties{detail::property_getter< + kernel_properties, kernel_properties, std::tuple>()( + std::tuple(ts...))}, + _launch_properties{detail::property_getter< + launch_properties, launch_properties, std::tuple>()( + std::tuple(ts...))}, + _local_mem_size{ + detail::local_mem_getter>()( + std::tuple(ts...))} { + check_variadic_args(ts...); + } + + template void check_variadic_args(Ts...) { + static_assert( + std::conjunction_v, + detail::is_launch_properties, + detail::is_local_mem_size>...>, + "Received an unexpected argument to ctor. Did you forget to wrap " + "in " + "compat::kernel_properties, launch_properties, local_mem_size?"); + } + +public: + template + launch_policy(Range range, Ts... ts) : launch_policy(ts...) { + _range = range; + check_variadic_args(ts...); + } + + template + launch_policy(dim3 global_range, Ts... ts) : launch_policy(ts...) { + _range = Range{global_range}; + check_variadic_args(ts...); + } + + template + launch_policy(dim3 global_range, dim3 local_range, Ts... ts) + : launch_policy(ts...) { + _range = Range{global_range * local_range, local_range}; + check_variadic_args(ts...); + } + + KProps get_kernel_properties() { return _kernel_properties.props; } + LProps get_launch_properties() { return _launch_properties.props; } + size_t get_local_mem_size() { return _local_mem_size.size; } + Range get_range() { return _range; } + +private: + Range _range; + kernel_properties _kernel_properties; + launch_properties _launch_properties; + local_mem_size _local_mem_size; +}; + +// Deduction guides for launch_policy +template +launch_policy(Range, Ts...) -> launch_policy< + Range, detail::properties_or_empty, + detail::properties_or_empty, + detail::has_type>::value>; + +template +launch_policy(sycl::range, sycl::range, Ts...) -> launch_policy< + sycl::nd_range, detail::properties_or_empty, + detail::properties_or_empty, + detail::has_type>::value>; + +template +launch_policy(dim3, Ts...) -> launch_policy< + sycl::range<3>, detail::properties_or_empty, + detail::properties_or_empty, + detail::has_type>::value>; + +template +launch_policy(dim3, dim3, Ts...) -> launch_policy< + sycl::nd_range<3>, detail::properties_or_empty, + detail::properties_or_empty, + detail::has_type>::value>; + +namespace detail { + +template +struct KernelFunctor { + KernelFunctor(KProps kernel_props, Args... args) + : _kernel_properties{kernel_props}, + _argument_tuple(std::make_tuple(args...)) {} + + KernelFunctor(KProps kernel_props, sycl::local_accessor local_acc, + Args... args) + : _kernel_properties{kernel_props}, _local_acc{local_acc}, + _argument_tuple(std::make_tuple(args...)) {} + + auto get(sycl_exp::properties_tag) { return _kernel_properties; } + + __syclcompat_inline__ void + operator()(syclcompat::detail::range_to_item_t) const { + if constexpr (HasLocalMem) { + char *local_mem_ptr = static_cast( + _local_acc.template get_multi_ptr().get()); + std::apply( + [lmem_ptr = local_mem_ptr](auto &&...args) { F(args..., lmem_ptr); }, + _argument_tuple); + } else { + std::apply([](auto &&...args) { F(args...); }, _argument_tuple); + } + } + + KProps _kernel_properties; + std::tuple _argument_tuple; + std::conditional_t, std::monostate> + _local_acc; // monostate for empty type +}; + +//==================================================================== +// This helper function avoids 2 nested `if constexpr` in detail::launch +template +auto build_kernel_functor(sycl::handler &cgh, LaunchPolicy launch_policy, + Args... args) + -> KernelFunctor { + if constexpr (LaunchPolicy::HasLocalMem) { + sycl::local_accessor local_memory( + launch_policy.get_local_mem_size(), cgh); + return KernelFunctor( + launch_policy.get_kernel_properties(), local_memory, args...); + } else { + return KernelFunctor( + launch_policy.get_kernel_properties(), args...); + } +} + +} // namespace detail +} // namespace experimental +} // namespace syclcompat diff --git a/sycl/include/syclcompat/syclcompat.hpp b/sycl/include/syclcompat/syclcompat.hpp index 401b5681d40dd..8c5f693794948 100644 --- a/sycl/include/syclcompat/syclcompat.hpp +++ b/sycl/include/syclcompat/syclcompat.hpp @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include diff --git a/sycl/include/syclcompat/traits.hpp b/sycl/include/syclcompat/traits.hpp index f992c67bae8ca..2f389ccf79484 100644 --- a/sycl/include/syclcompat/traits.hpp +++ b/sycl/include/syclcompat/traits.hpp @@ -23,6 +23,10 @@ #pragma once #include +#include +#include +#include +#include #include namespace syclcompat { @@ -41,4 +45,209 @@ template struct arith { }; template using arith_t = typename arith::type; +// Traits to check device function signature matches args (with or without local +// mem) +template +struct device_fn_invocable : std::is_invocable {}; + +template +struct device_fn_lmem_invocable + : std::is_invocable {}; + +template +constexpr inline bool args_compatible = + std::conditional_t, + device_fn_invocable>::value; + +namespace detail { + +// Trait for identifying sycl::range and sycl::nd_range. +template struct is_range : std::false_type {}; +template struct is_range> : std::true_type {}; + +template constexpr bool is_range_v = is_range::value; + +template struct is_nd_range : std::false_type {}; +template struct is_nd_range> : std::true_type {}; + +template constexpr bool is_nd_range_v = is_nd_range::value; + +template +constexpr bool is_range_or_nd_range_v = + std::disjunction_v, is_nd_range>; + +// Trait range_to_item_t to convert nd_range -> nd_item, range -> item +template struct range_to_item_map; +template struct range_to_item_map> { + using ItemT = sycl::nd_item; +}; +template struct range_to_item_map> { + using ItemT = sycl::item; +}; + +template +using range_to_item_t = typename range_to_item_map::ItemT; + +} // namespace detail + +// Forward decls +namespace experimental { + +template struct kernel_properties; +template struct launch_properties; +struct local_mem_size; + +template +class launch_policy; +} // namespace experimental + +namespace experimental::detail { + +// Helper for tuple_template_index +template