apache · mei-ye · May 14, 2020 · May 18, 2020 · May 19, 2020 · May 21, 2020
diff --git a/apps/benchmark/gpu_imagenet_bench.py b/apps/benchmark/gpu_imagenet_bench.py
@@ -57,12 +57,12 @@ def benchmark(network, target):
                          'mobilenet', 'squeezenet_v1.0', 'squeezenet_v1.1'],
                         help='The name of neural network')
     parser.add_argument("--model", type=str,
-                        choices=['1080ti', 'titanx', 'tx2', 'gfx900'], default='1080ti',
+                        choices=['1080ti', 'titanx', 'tx2', 'gfx900', 'v1000'], default='1080ti',
                         help="The model of the test device. If your device is not listed in "
                              "the choices list, pick the most similar one as argument.")
     parser.add_argument("--repeat", type=int, default=600)
     parser.add_argument("--target", type=str,
-                        choices=['cuda', 'opencl', 'rocm', 'nvptx', 'metal'], default='cuda',
+                        choices=['cuda', 'opencl', 'rocm', 'nvptx', 'metal', 'vulkan', 'amd_gpu'], default='cuda',
                         help="The tvm compilation target")
     parser.add_argument("--thread", type=int, default=1, help="The number of threads to be run.")
     args = parser.parse_args()

diff --git a/python/tvm/_ffi/runtime_ctypes.py b/python/tvm/_ffi/runtime_ctypes.py
@@ -171,6 +171,7 @@ class TVMContext(ctypes.Structure):
         'micro_dev': 13,
         'hexagon': 14,
         'webgpu': 15,
+        'amd_gpu': 7,
     }
     def __init__(self, device_type, device_id):
         super(TVMContext, self).__init__()

diff --git a/python/tvm/autotvm/measure/measure_methods.py b/python/tvm/autotvm/measure/measure_methods.py
@@ -231,7 +231,8 @@ def set_task(self, task):
     def get_build_kwargs(self):
         kwargs = {}
         if 'cuda' in self.task.target.keys or 'opencl' in self.task.target.keys or \
-           'rocm' in self.task.target.keys:
+           'rocm' in self.task.target.keys or 'vulkan' in self.task.target.keys or \
+           'amd_gpu' in self.task.target.keys:
             remote = request_remote(self.key, self.host, self.port)
             ctx = remote.context(str(self.task.target), 0)
             max_dims = ctx.max_thread_dimensions

diff --git a/python/tvm/autotvm/tophub.py b/python/tvm/autotvm/tophub.py
@@ -56,6 +56,7 @@
     'intel_graphics':   "v0.02",
 
     'vta':              "v0.08",
+    'amd_gpu':          "v0.01",
 }
 
 logger = logging.getLogger('autotvm')

diff --git a/python/tvm/rpc/client.py b/python/tvm/rpc/client.py
@@ -194,6 +194,10 @@ def webgpu(self, dev_id=0):
         """Construct WebGPU device."""
         return self.context(15, dev_id)
 
+    def amd_gpu(self, dev_id=0):
+        """Construct WebGPU device."""
+        return self.context(7, dev_id)
+
 
 class LocalSession(RPCSession):
     """RPCSession interface backed by local environment.

diff --git a/python/tvm/target/__init__.py b/python/tvm/target/__init__.py
@@ -56,6 +56,7 @@
 """
 from .target import Target, create
 from .target import cuda, rocm, mali, intel_graphics, opengl, arm_cpu, rasp, vta, bifrost, hexagon
+from .target import amd_gpu
 from .generic_func import GenericFunc
 from .generic_func import generic_func, get_native_generic_func, override_native_generic_func
 from . import datatype

diff --git a/python/tvm/target/target.py b/python/tvm/target/target.py
@@ -343,6 +343,17 @@ def validate_hvx_length(codegen_hvx, sim_args):
     args_list = target_str.split()
     return _ffi_api.TargetCreate("hexagon", *args_list)
 
+def amd_gpu(model='unknown', options=None):
+    """Return an AMD integrated GPU target.
+
+    Parameters
+    ----------
+    options : str or list of str
+        Additional options
+    """
+    opts = ["-device=amd_gpu", '-model=%s' % model]
+    opts = _merge_opts(opts, options)
+    return _ffi_api.TargetCreate("vulkan", *opts)
 
 def create(target_str):
     """Get a target given target string.

diff --git a/src/runtime/vulkan/vulkan.cc b/src/runtime/vulkan/vulkan.cc
@@ -366,7 +366,7 @@ void VulkanDeviceAPI::GetAttr(TVMContext ctx, DeviceAttrKind kind, TVMRetValue*
     case kMaxThreadsPerBlock: {
       VkPhysicalDeviceProperties phy_prop;
       vkGetPhysicalDeviceProperties(vctx.phy_device, &phy_prop);
-      int64_t value = phy_prop.limits.maxComputeWorkGroupSize[0];
+      int64_t value = phy_prop.limits.maxComputeWorkGroupInvocations;
       *rv = value;
       break;
     }
@@ -399,8 +399,18 @@ void VulkanDeviceAPI::GetAttr(TVMContext ctx, DeviceAttrKind kind, TVMRetValue*
       return;
     case kExist:
       break;
-    case kMaxThreadDimensions:
+    case kMaxThreadDimensions: {
+      VkPhysicalDeviceProperties phy_prop;
+      vkGetPhysicalDeviceProperties(vctx.phy_device, &phy_prop);
+      int64_t dims[3];
+      dims[0] = phy_prop.limits.maxComputeWorkGroupSize[0];
+      dims[1] = phy_prop.limits.maxComputeWorkGroupSize[1];
+      dims[2] = phy_prop.limits.maxComputeWorkGroupSize[2];
+      std::stringstream ss;  // use json string to return multiple int values;
+      ss << "[" << dims[0] << ", " << dims[1] << ", " << dims[2] << "]";
+      *rv = ss.str();
       break;
+    }
     case kGcnArch:
       return;
   }

diff --git a/src/target/spirv/build_vulkan.cc b/src/target/spirv/build_vulkan.cc
@@ -124,5 +124,9 @@ TVM_REGISTER_GLOBAL("target.build.webgpu").set_body_typed([](IRModule mod, std::
   return BuildSPIRV(mod, target, true);
 });
 
+TVM_REGISTER_GLOBAL("target.build.amd_gpu").set_body_typed([](IRModule mod, std::string target) {
+  return BuildSPIRV(mod, target, false);
+});
+
 }  // namespace codegen
 }  // namespace tvm
diff --git a/src/target/spirv/intrin_rule_spirv.cc b/src/target/spirv/intrin_rule_spirv.cc
@@ -93,6 +93,34 @@ TVM_REGISTER_GLOBAL("tvm.intrin.rule.webgpu.pow").set_body(DispatchGLSLPureIntri
 
 TVM_REGISTER_GLOBAL("tvm.intrin.rule.webgpu.tanh").set_body(DispatchGLSLPureIntrin<GLSLstd450Tanh>);
 
+// amd_gpu rules
+TVM_REGISTER_GLOBAL("tvm.intrin.rule.amd_gpu.floor")
+    .set_body(DispatchGLSLPureIntrin<GLSLstd450Floor>);
+
+TVM_REGISTER_GLOBAL("tvm.intrin.rule.amd_gpu.ceil")
+    .set_body(DispatchGLSLPureIntrin<GLSLstd450Ceil>);
+
+TVM_REGISTER_GLOBAL("tvm.intrin.rule.amd_gpu.round")
+    .set_body(DispatchGLSLPureIntrin<GLSLstd450Round>);
+
+TVM_REGISTER_GLOBAL("tvm.intrin.rule.amd_gpu.trunc")
+    .set_body(DispatchGLSLPureIntrin<GLSLstd450Trunc>);
+
+TVM_REGISTER_GLOBAL("tvm.intrin.rule.amd_gpu.fabs")
+    .set_body(DispatchGLSLPureIntrin<GLSLstd450FAbs>);
+
+TVM_REGISTER_GLOBAL("tvm.intrin.rule.amd_gpu.exp").set_body(DispatchGLSLPureIntrin<GLSLstd450Exp>);
+
+TVM_REGISTER_GLOBAL("tvm.intrin.rule.amd_gpu.log").set_body(DispatchGLSLPureIntrin<GLSLstd450Log>);
+
+TVM_REGISTER_GLOBAL("tvm.intrin.rule.amd_gpu.sqrt")
+    .set_body(DispatchGLSLPureIntrin<GLSLstd450Sqrt>);
+
+TVM_REGISTER_GLOBAL("tvm.intrin.rule.amd_gpu.pow").set_body(DispatchGLSLPureIntrin<GLSLstd450Pow>);
+
+TVM_REGISTER_GLOBAL("tvm.intrin.rule.amd_gpu.tanh")
+    .set_body(DispatchGLSLPureIntrin<GLSLstd450Tanh>);
+
 }  // namespace spirv
 }  // namespace codegen
 }  // namespace tvm
diff --git a/src/target/target.cc b/src/target/target.cc
@@ -47,7 +47,7 @@ TVM_STATIC_IR_FUNCTOR(ReprPrinter, vtable)
  * \brief Construct a Target node from the given name and options.
  * \param target_name The major target name. Should be one of
  * {"aocl", "aocl_sw_emu", "c", "cuda", "ext_dev", "hexagon", "hybrid", "llvm",
- *  "metal", "nvptx", "opencl", "rocm", "sdaccel", "stackvm", "vulkan"}
+ *  "metal", "nvptx", "opencl", "rocm", "sdaccel", "stackvm", "vulkan", "amd_gpu"}
  * \param options Additional options appended to the target
  * \return The constructed Target
  */
@@ -107,13 +107,14 @@ Target CreateTarget(const std::string& target_name, const std::vector<std::strin
     if (t->device_name == "intel_graphics") {
       t->thread_warp_size = 16;
     }
-  } else if (target_name == "metal" || target_name == "vulkan" || target_name == "webgpu") {
+  } else if (target_name == "metal" || target_name == "vulkan" || target_name == "webgpu" ||
+             target_name == "amd_gpu") {
     if (target_name == "metal") {
       t->device_type = kDLMetal;
-    } else if (target_name == "vulkan") {
-      t->device_type = kDLVulkan;
-    } else {
+    } else if (target_name == "webgpu") {
       t->device_type = kDLWebGPU;
+    } else {
+      t->device_type = kDLVulkan;
     }
     t->keys_array.push_back(target_name);
     t->keys_array.push_back("gpu");
@@ -312,6 +313,10 @@ Target stackvm(const std::vector<std::string>& options) { return CreateTarget("s
 Target ext_dev(const std::vector<std::string>& options) { return CreateTarget("ext_dev", options); }
 
 Target hexagon(const std::vector<std::string>& options) { return CreateTarget("hexagon", options); }
+
+Target amd_gpu(const std::vector<std::string>& options) {
+  return CreateTarget("vulkan", MergeOptions(options, {"-device=amd_gpu"}));
+}
 }  // namespace target
 
 BuildConfig BuildConfig::Create() { return BuildConfig(make_object<BuildConfigNode>()); }

diff --git a/tests/python/unittest/test_target_target.py b/tests/python/unittest/test_target_target.py
@@ -34,6 +34,10 @@ def rocm_func(data):
 def rocm_func(data):
     return data + 10
 
+@mygeneric.register("amd_gpu")
+def amd_gpu_func(data):
+    return data + 11;
+
 
 def test_target_dispatch():
     with tvm.target.cuda():
@@ -51,6 +55,9 @@ def test_target_dispatch():
     with tvm.target.create("metal"):
         assert mygeneric(1) == 3
 
+    with tvm.target.amd_gpu():
+        assert mygeneric(1) == 12
+
     assert tvm.target.Target.current() is None
 
 
@@ -66,6 +73,7 @@ def test_target_string_parse():
     assert tvm.target.intel_graphics().device_name == "intel_graphics"
     assert tvm.target.mali().device_name == "mali"
     assert tvm.target.arm_cpu().device_name == "arm_cpu"
+    assert tvm.target.amd_gpu().device_name == "amd_gpu"
 
 if __name__ == "__main__":
     test_target_dispatch()