diff --git a/configure.py b/configure.py index c0b41e1403c05c..a3014163b5204d 100644 --- a/configure.py +++ b/configure.py @@ -949,6 +949,15 @@ def set_mkl(): 'downloading, please set the environment variable \"TF_MKL_ROOT\" every ' 'time before build.') +def set_acl(): + # Set up for ARM Compute Library + write_to_bazelrc('build:acl --define using_acl=true') + write_to_bazelrc('build:acl -c opt') + write_to_bazelrc('build:acl --copt="-DARM_COMPUTE_CL"') + write_to_bazelrc('build:acl --copt="-DARM_NO_EXCEPTIONS"') + print('Add "--config=acl" to your bazel command to build with ARM ' + 'Compute Library support.\nPlease set the environment variable ' + '\"TF_ACL_ROOT\" every time before build.') def set_monolithic(): # Add --config=monolithic to your bazel command to use a mostly-static @@ -1030,6 +1039,7 @@ def main(): set_cc_opt_flags(environ_cp) set_mkl() + set_acl() set_monolithic() diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 0c629dabd83380..fc79cdfcfd53bc 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -526,12 +526,21 @@ load( "if_mkl", ) +load( + "//third_party/acl:build_defs.bzl", + "if_acl", +) + filegroup( name = "intel_binary_blob", data = if_mkl( [ "//third_party/mkl:intel_binary_blob", ], + ) + if_acl( + [ + "//third_party/acl:intel_binary_blob", + ], ), ) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 3953575e1b1ef7..27aab95c4e227e 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -145,6 +145,10 @@ load( "//third_party/mkl:build_defs.bzl", "if_mkl", ) +load( + "//third_party/acl:build_defs.bzl", + "if_acl", +) # ----------------------------------------------------------------------------- # Public targets @@ -1775,6 +1779,10 @@ tf_cuda_library( "//third_party/mkl:intel_binary_blob", "@mkl_dnn//:mkl_dnn", ], + ) + if_acl( + [ + "//third_party/acl:intel_binary_blob", + ], ), alwayslink = 1, ) @@ -1996,6 +2004,10 @@ tf_cuda_library( [ "//third_party/mkl:intel_binary_blob", ], + ) + if_acl( + [ + "//third_party/acl:intel_binary_blob", + ], ), alwayslink = 1, ) @@ -2040,6 +2052,10 @@ tf_cuda_library( "//third_party/mkl:intel_binary_blob", "@mkl_dnn//:mkl_dnn", ], + ) + if_acl( + [ + "//third_party/acl:intel_binary_blob" + ], ) + tf_additional_core_deps() + if_static([":core_cpu_impl"]), alwayslink = 1, ) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index f6c628908eeaef..f215cf08f2662f 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -50,6 +50,10 @@ load( "//third_party/mkl:build_defs.bzl", "if_mkl", ) +load( + "//third_party/acl:build_defs.bzl", + "if_acl", +) load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") config_setting( @@ -2600,6 +2604,8 @@ tf_kernel_library( }) + if_mkl([ "//third_party/mkl:intel_binary_blob", "@mkl_dnn//:mkl_dnn", + ]) + if_acl([ + "//third_party/acl:intel_binary_blob", ]) + if_cuda([ "//tensorflow/core/platform/default/build_config:cublas_plugin", ]), diff --git a/tensorflow/core/kernels/matmul_op.cc b/tensorflow/core/kernels/matmul_op.cc index 12d02a10c7a2b4..4b07ce8c04e17e 100644 --- a/tensorflow/core/kernels/matmul_op.cc +++ b/tensorflow/core/kernels/matmul_op.cc @@ -30,6 +30,13 @@ limitations under the License. #include "tensorflow/core/platform/stream_executor.h" #endif // GOOGLE_CUDA +#ifdef ARM_COMPUTE_CL +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLFunctions.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "utils/Utils.h" +#endif // ARM_COMPUTE_CL + namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; @@ -492,9 +499,54 @@ class MatMulOp : public OpKernel { f(ctx->eigen_device(), out->flat()); return; } +#ifdef ARM_COMPUTE_CL + arm_compute::CLScheduler::get().default_init(); + arm_compute::CLGEMM arm_gemm; + arm_compute::CLTensor arm_a, arm_b, arm_out; + + const arm_compute::TensorShape shape_a{b.shape().dim_size(0), b.shape().dim_size(1)}, + shape_b{a.shape().dim_size(0), a.shape().dim_size(1)}, + shape_out{out->shape().dim_size(0), out->shape().dim_size(1)}; + arm_a.allocator()->init(arm_compute::TensorInfo(shape_a, 1, arm_compute::DataType::F32)); + arm_b.allocator()->init(arm_compute::TensorInfo(shape_b, 1, arm_compute::DataType::F32)); + arm_out.allocator()->init(arm_compute::TensorInfo(shape_out, 1, arm_compute::DataType::F32)); + + arm_gemm.configure(&arm_a, &arm_b, nullptr, &arm_out, 1.0f, 1.0f); + + arm_a.allocator()->allocate(); + arm_b.allocator()->allocate(); + arm_out.allocator()->allocate(); + + auto fill_with_window = + [](const Tensor& tf_tensor, arm_compute::CLTensor& arm_tensor) { + arm_tensor.map(true); + auto tensor_flat = tf_tensor.flat(); + arm_compute::Window win; + win.use_tensor_dimensions(arm_tensor.info()->tensor_shape()); + arm_compute::Iterator it(&arm_tensor, win); + arm_compute::execute_window_loop(win, [&] (arm_compute::Coordinates& c) { + *reinterpret_cast(it.ptr()) = + tensor_flat.data()[c.y() * tf_tensor.shape().dim_size(0) + c.x()]; + }, it); + arm_tensor.unmap(); + }; + fill_with_window(b, arm_a); fill_with_window(a, arm_b);; + arm_gemm.run(); + + arm_compute::Window out_win; + out_win.use_tensor_dimensions(arm_out.info()->tensor_shape()); + arm_out.map(true); + arm_compute::Iterator out_it(&arm_out, out_win); + auto eigen_out = out->flat(); + arm_compute::execute_window_loop(out_win, [&] (arm_compute::Coordinates& c) { + eigen_out.data()[c.y() * out->shape().dim_size(0) + c.x()] = *reinterpret_cast(out_it.ptr()); + }, out_it); + arm_out.unmap(); +#else LaunchMatMul::launch( ctx, a, b, dim_pair, &algorithms_, use_autotune_, out); +#endif // ARM_COMPUTE_CL } private: @@ -562,7 +614,9 @@ TF_CALL_int32(REGISTER_CPU); #else TF_CALL_float(REGISTER_CPU); TF_CALL_double(REGISTER_CPU); +#ifndef ARM_COMPUTE_CL TF_CALL_half(REGISTER_CPU); +#endif // ARM_COMPUTE_CL TF_CALL_int32(REGISTER_CPU); TF_CALL_complex64(REGISTER_CPU); diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index 51d37291eedd5a..d3f5a1288e63c3 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -10,6 +10,10 @@ load( "//third_party/mkl:build_defs.bzl", "if_mkl", ) +load( + "//third_party/acl:build_defs.bzl", + "if_acl", +) # Appends a suffix to a list of deps. def tf_deps(deps, suffix): @@ -538,5 +542,5 @@ def tf_additional_binary_deps(): ] + if_mkl( [ "//third_party/mkl:intel_binary_blob", - ], + ]) + if_acl(["//third_party/acl:intel_binary_blob",] ) diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 0f074151db26a1..3921f8ce297fe8 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -22,6 +22,9 @@ load( "//third_party/mkl:build_defs.bzl", "if_mkl",) +load( + "//third_party/acl:build_defs.bzl", + "if_acl",) def full_path(relative_paths): return [PACKAGE_NAME + "/" + relative for relative in relative_paths] @@ -286,6 +289,10 @@ def tf_cc_binary(name, [ "//third_party/mkl:intel_binary_blob", ], + ) + if_acl( + [ + "//third_party/acl:intel_binary_blob", + ], ), linkopts=linkopts + _rpath_linkopts(name), **kwargs) @@ -537,6 +544,10 @@ def tf_cc_test(name, [ "//third_party/mkl:intel_binary_blob", ], + ) + if_acl( + [ + "//third_party/acl:intel_binary_blob", + ], ), # Nested select() statements seem not to be supported when passed to # linkstatic, and we already have a cuda select() passed in to this diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD index 845bad5e499025..f85c60f374549e 100644 --- a/tensorflow/tools/lib_package/BUILD +++ b/tensorflow/tools/lib_package/BUILD @@ -6,6 +6,7 @@ package(default_visibility = ["//visibility:private"]) load("@bazel_tools//tools/build_defs/pkg:pkg.bzl", "pkg_tar") load("//tensorflow:tensorflow.bzl", "tf_binary_additional_srcs") load("//third_party/mkl:build_defs.bzl", "if_mkl") +load("//third_party/acl:build_defs.bzl", "if_acl") genrule( name = "libtensorflow_proto", @@ -119,6 +120,9 @@ genrule( ] + if_mkl([ "//third_party/mkl:LICENSE", "@mkl//:LICENSE", + ]) + if_acl([ + "//third_party/acl:LICENSE", + "@acl//:LICENSE", ]), outs = ["include/tensorflow/c/LICENSE"], cmd = "$(location :concat_licenses.sh) $(SRCS) >$@", @@ -154,6 +158,9 @@ genrule( ] + if_mkl([ "//third_party/mkl:LICENSE", "@mkl//:LICENSE", + ]) + if_acl([ + "//third_party/acl:LICENSE", + "@acl//:LICENSE", ]), outs = ["include/tensorflow/jni/LICENSE"], cmd = "$(location :concat_licenses.sh) $(SRCS) >$@", diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 3c4e1b66bc467c..5b37af0393c893 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -9,6 +9,7 @@ load( "transitive_hdrs", ) load("//third_party/mkl:build_defs.bzl", "if_mkl") +load("//third_party/acl:build_defs.bzl", "if_acl") load("//tensorflow/core:platform/default/build_config_root.bzl", "tf_additional_license_deps") # This returns a list of headers of all public header libraries (e.g., @@ -131,6 +132,8 @@ filegroup( ] + if_mkl([ "//third_party/mkl:LICENSE", "@mkl//:LICENSE", + ]) + if_acl([ + "//third_party/acl:LICENSE", ]) + if_not_windows([ "@nccl_archive//:LICENSE.txt", ]) + tf_additional_license_deps(), @@ -182,5 +185,6 @@ sh_binary( "//tensorflow/python:test_ops", "//tensorflow/tools/dist_test/server:grpc_tensorflow_server", ], - }) + if_mkl(["//third_party/mkl:intel_binary_blob"]), + }) + if_mkl(["//third_party/mkl:intel_binary_blob"] + ) + if_acl(["//third_party/acl:intel_binary_blob"]), ) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index b7aa1af56298a4..aa40a8f76cd055 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -3,6 +3,7 @@ load("//third_party/gpus:cuda_configure.bzl", "cuda_configure") load("//third_party/sycl:sycl_configure.bzl", "sycl_configure") load("//third_party/mkl:build_defs.bzl", "mkl_repository") +load("//third_party/acl:build_defs.bzl", "acl_repository") load("@io_bazel_rules_closure//closure/private:java_import_external.bzl", "java_import_external") load("@io_bazel_rules_closure//closure:defs.bzl", "filegroup_external") @@ -166,6 +167,16 @@ def tf_workspace(path_prefix="", tf_repo_name=""): repository = tf_repo_name, ) + acl_repository( + name = "acl", + urls = [ + "https://github.com/lukeiwanski/ComputeLibrary/archive/feature/no_exceptions.zip", + ], + strip_prefix = "ComputeLibrary-feature-no_exceptions", + build_file = str(Label("//third_party/acl:acl.BUILD")), + repository = tf_repo_name, + ) + if path_prefix: print("path_prefix was specified to tf_workspace but is no longer used " + "and will be removed in the future.") diff --git a/third_party/acl/BUILD b/third_party/acl/BUILD new file mode 100644 index 00000000000000..f18be93ffee6a0 --- /dev/null +++ b/third_party/acl/BUILD @@ -0,0 +1,26 @@ +licenses(["notice"]) # MIT License + +exports_files(["LICENSE"]) + +config_setting( + name = "using_acl", + values = { + "define": "using_acl=true", + }, + visibility = ["//visibility:public"], +) + +load( + "//third_party/acl:build_defs.bzl", + "if_acl", +) + +cc_library( + name = "intel_binary_blob", + srcs = if_acl([ + "@acl//:libarm_compute.so", + "@acl//:libOpenCL.so", + ]), + visibility = ["//visibility:public"], + deps = ["@acl//:acl_headers"], +) diff --git a/third_party/acl/LICENSE b/third_party/acl/LICENSE new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/third_party/acl/acl.BUILD b/third_party/acl/acl.BUILD new file mode 100644 index 00000000000000..5260b3b0ab689d --- /dev/null +++ b/third_party/acl/acl.BUILD @@ -0,0 +1,30 @@ +licenses(["notice"]) # MIT + +exports_files(["license.txt"]) + +filegroup( + name = "LICENSE", + srcs = [ + "license.txt", + ], + visibility = ["//visibility:public"], +) + +cc_library( + name = "acl_headers", + srcs = glob(["**/*.h"]), + includes = [".", "include", "arm_compute", "support", "utils"], + visibility = ["//visibility:public"], +) + +filegroup( + name = "libarm_compute.so", + srcs = ["lib/libarm_compute.so"], + visibility = ["//visibility:public"], +) + +filegroup( + name = "libOpenCL.so", + srcs = ["lib/libOpenCL.so"], + visibility = ["//visibility:public"], +) diff --git a/third_party/acl/build_defs.bzl b/third_party/acl/build_defs.bzl new file mode 100644 index 00000000000000..af45e5dce6883a --- /dev/null +++ b/third_party/acl/build_defs.bzl @@ -0,0 +1,58 @@ +# -*- Python -*- + +_TF_ACL_ROOT = "TF_ACL_ROOT" + + +def if_acl(if_true, if_false = []): + """Shorthand for select()'ing on whether we're building with ACL. + + Returns a select statement which evaluates to if_true if we're building + with ACL enabled. Otherwise, the select statement evaluates to if_false. + + """ + return select({ + "//third_party/acl:using_acl": if_true, + "//conditions:default": if_false + }) + + +def _enable_local_acl(repository_ctx): + return _TF_ACL_ROOT in repository_ctx.os.environ + + +def _acl_autoconf_impl(repository_ctx): + """Implementation of the local_acl_autoconf repository rule.""" + + # Symlink lib and include local folders. + acl_root = repository_ctx.os.environ[_TF_ACL_ROOT] + acl_lib_path = "%s/build" % acl_root + repository_ctx.symlink(acl_lib_path, "lib") + acl_include_path = "%s/include" % acl_root + repository_ctx.symlink(acl_include_path, "include") + acl_arm_compute_path = "%s/arm_compute" % acl_root + repository_ctx.symlink(acl_arm_compute_path, "arm_compute") + acl_support_path = "%s/support" % acl_root + repository_ctx.symlink(acl_support_path, "support") + acl_utils_path = "%s/utils" % acl_root + repository_ctx.symlink(acl_utils_path, "utils") + +# acl_license_path = "%s/LICENSE" % acl_root +# repository_ctx.symlink(mkl_license_path, "LICENSE") + + # Also setup BUILD file. + repository_ctx.symlink(repository_ctx.attr.build_file, "BUILD") + + +acl_repository = repository_rule( + implementation = _acl_autoconf_impl, + environ = [ + _TF_ACL_ROOT, + ], + attrs = { + "build_file": attr.label(), + "repository": attr.string(), + "urls": attr.string_list(default = []), + "sha256": attr.string(default = ""), + "strip_prefix": attr.string(default = ""), + }, + ) diff --git a/third_party/eigen3/BUILD b/third_party/eigen3/BUILD index f38a26717e14ea..5314b152492f70 100644 --- a/third_party/eigen3/BUILD +++ b/third_party/eigen3/BUILD @@ -17,6 +17,8 @@ load("//tensorflow:tensorflow.bzl", "if_mkl") # INTEL_MKL end load("//tensorflow:tensorflow.bzl", "if_mkl") +load("//tensorflow:tensorflow.bzl", "if_acl") + cc_library( name = "eigen3", hdrs = glob(["unsupported/Eigen/CXX11/src/FixedPoint/*.h"]) + [ @@ -31,7 +33,7 @@ cc_library( "unsupported/Eigen/CXX11/Tensor", "unsupported/Eigen/CXX11/FixedPoint", ], - includes = if_mkl(["./mkl_include"]), + includes = if_mkl(["./mkl_include"]) + if_acl(["./acl_include"]), visibility = ["//visibility:public"], deps = [ "@eigen_archive//:eigen", diff --git a/tools/bazel.rc b/tools/bazel.rc index 414ddf2e475da0..b50920de5009e5 100644 --- a/tools/bazel.rc +++ b/tools/bazel.rc @@ -8,6 +8,8 @@ build:win-cuda --define=using_cuda=true --define=using_cuda_nvcc=true build:mkl --define=using_mkl=true +build:acl --define=using_acl=true + build:sycl --crosstool_top=@local_config_sycl//crosstool:toolchain build:sycl --define=using_sycl=true