Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TARGET] add amd_gpu target #5645

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions apps/benchmark/gpu_imagenet_bench.py
Expand Up @@ -57,12 +57,12 @@ def benchmark(network, target):
'mobilenet', 'squeezenet_v1.0', 'squeezenet_v1.1'],
help='The name of neural network')
parser.add_argument("--model", type=str,
choices=['1080ti', 'titanx', 'tx2', 'gfx900'], default='1080ti',
choices=['1080ti', 'titanx', 'tx2', 'gfx900', 'v1000'], default='1080ti',
help="The model of the test device. If your device is not listed in "
"the choices list, pick the most similar one as argument.")
parser.add_argument("--repeat", type=int, default=600)
parser.add_argument("--target", type=str,
choices=['cuda', 'opencl', 'rocm', 'nvptx', 'metal'], default='cuda',
choices=['cuda', 'opencl', 'rocm', 'nvptx', 'metal', 'vulkan', 'amd_gpu'], default='cuda',
help="The tvm compilation target")
parser.add_argument("--thread", type=int, default=1, help="The number of threads to be run.")
args = parser.parse_args()
Expand Down
1 change: 1 addition & 0 deletions python/tvm/_ffi/runtime_ctypes.py
Expand Up @@ -171,6 +171,7 @@ class TVMContext(ctypes.Structure):
'micro_dev': 13,
'hexagon': 14,
'webgpu': 15,
'amd_gpu': 7,
}
def __init__(self, device_type, device_id):
super(TVMContext, self).__init__()
Expand Down
3 changes: 2 additions & 1 deletion python/tvm/autotvm/measure/measure_methods.py
Expand Up @@ -231,7 +231,8 @@ def set_task(self, task):
def get_build_kwargs(self):
kwargs = {}
if 'cuda' in self.task.target.keys or 'opencl' in self.task.target.keys or \
'rocm' in self.task.target.keys:
'rocm' in self.task.target.keys or 'vulkan' in self.task.target.keys or \
'amd_gpu' in self.task.target.keys:
remote = request_remote(self.key, self.host, self.port)
ctx = remote.context(str(self.task.target), 0)
max_dims = ctx.max_thread_dimensions
Expand Down
1 change: 1 addition & 0 deletions python/tvm/autotvm/tophub.py
Expand Up @@ -56,6 +56,7 @@
'intel_graphics': "v0.02",

'vta': "v0.08",
'amd_gpu': "v0.01",
}

logger = logging.getLogger('autotvm')
Expand Down
4 changes: 4 additions & 0 deletions python/tvm/rpc/client.py
Expand Up @@ -194,6 +194,10 @@ def webgpu(self, dev_id=0):
"""Construct WebGPU device."""
return self.context(15, dev_id)

def amd_gpu(self, dev_id=0):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can directly use vulkan so it is not necessary

"""Construct WebGPU device."""
return self.context(7, dev_id)


class LocalSession(RPCSession):
"""RPCSession interface backed by local environment.
Expand Down
1 change: 1 addition & 0 deletions python/tvm/target/__init__.py
Expand Up @@ -56,6 +56,7 @@
"""
from .target import Target, create
from .target import cuda, rocm, mali, intel_graphics, opengl, arm_cpu, rasp, vta, bifrost, hexagon
from .target import amd_gpu
from .generic_func import GenericFunc
from .generic_func import generic_func, get_native_generic_func, override_native_generic_func
from . import datatype
Expand Down
11 changes: 11 additions & 0 deletions python/tvm/target/target.py
Expand Up @@ -343,6 +343,17 @@ def validate_hvx_length(codegen_hvx, sim_args):
args_list = target_str.split()
return _ffi_api.TargetCreate("hexagon", *args_list)

def amd_gpu(model='unknown', options=None):
"""Return an AMD integrated GPU target.

Parameters
----------
options : str or list of str
Additional options
"""
opts = ["-device=amd_gpu", '-model=%s' % model]
opts = _merge_opts(opts, options)
return _ffi_api.TargetCreate("vulkan", *opts)

def create(target_str):
"""Get a target given target string.
Expand Down
14 changes: 12 additions & 2 deletions src/runtime/vulkan/vulkan.cc
Expand Up @@ -366,7 +366,7 @@ void VulkanDeviceAPI::GetAttr(TVMContext ctx, DeviceAttrKind kind, TVMRetValue*
case kMaxThreadsPerBlock: {
VkPhysicalDeviceProperties phy_prop;
vkGetPhysicalDeviceProperties(vctx.phy_device, &phy_prop);
int64_t value = phy_prop.limits.maxComputeWorkGroupSize[0];
int64_t value = phy_prop.limits.maxComputeWorkGroupInvocations;
*rv = value;
break;
}
Expand Down Expand Up @@ -399,8 +399,18 @@ void VulkanDeviceAPI::GetAttr(TVMContext ctx, DeviceAttrKind kind, TVMRetValue*
return;
case kExist:
break;
case kMaxThreadDimensions:
case kMaxThreadDimensions: {
VkPhysicalDeviceProperties phy_prop;
vkGetPhysicalDeviceProperties(vctx.phy_device, &phy_prop);
int64_t dims[3];
dims[0] = phy_prop.limits.maxComputeWorkGroupSize[0];
dims[1] = phy_prop.limits.maxComputeWorkGroupSize[1];
dims[2] = phy_prop.limits.maxComputeWorkGroupSize[2];
std::stringstream ss; // use json string to return multiple int values;
ss << "[" << dims[0] << ", " << dims[1] << ", " << dims[2] << "]";
*rv = ss.str();
break;
}
case kGcnArch:
return;
}
Expand Down
4 changes: 4 additions & 0 deletions src/target/spirv/build_vulkan.cc
Expand Up @@ -124,5 +124,9 @@ TVM_REGISTER_GLOBAL("target.build.webgpu").set_body_typed([](IRModule mod, std::
return BuildSPIRV(mod, target, true);
});

TVM_REGISTER_GLOBAL("target.build.amd_gpu").set_body_typed([](IRModule mod, std::string target) {
return BuildSPIRV(mod, target, false);
});

} // namespace codegen
} // namespace tvm
28 changes: 28 additions & 0 deletions src/target/spirv/intrin_rule_spirv.cc
Expand Up @@ -93,6 +93,34 @@ TVM_REGISTER_GLOBAL("tvm.intrin.rule.webgpu.pow").set_body(DispatchGLSLPureIntri

TVM_REGISTER_GLOBAL("tvm.intrin.rule.webgpu.tanh").set_body(DispatchGLSLPureIntrin<GLSLstd450Tanh>);

// amd_gpu rules
TVM_REGISTER_GLOBAL("tvm.intrin.rule.amd_gpu.floor")
.set_body(DispatchGLSLPureIntrin<GLSLstd450Floor>);

TVM_REGISTER_GLOBAL("tvm.intrin.rule.amd_gpu.ceil")
.set_body(DispatchGLSLPureIntrin<GLSLstd450Ceil>);

TVM_REGISTER_GLOBAL("tvm.intrin.rule.amd_gpu.round")
.set_body(DispatchGLSLPureIntrin<GLSLstd450Round>);

TVM_REGISTER_GLOBAL("tvm.intrin.rule.amd_gpu.trunc")
.set_body(DispatchGLSLPureIntrin<GLSLstd450Trunc>);

TVM_REGISTER_GLOBAL("tvm.intrin.rule.amd_gpu.fabs")
.set_body(DispatchGLSLPureIntrin<GLSLstd450FAbs>);

TVM_REGISTER_GLOBAL("tvm.intrin.rule.amd_gpu.exp").set_body(DispatchGLSLPureIntrin<GLSLstd450Exp>);

TVM_REGISTER_GLOBAL("tvm.intrin.rule.amd_gpu.log").set_body(DispatchGLSLPureIntrin<GLSLstd450Log>);

TVM_REGISTER_GLOBAL("tvm.intrin.rule.amd_gpu.sqrt")
.set_body(DispatchGLSLPureIntrin<GLSLstd450Sqrt>);

TVM_REGISTER_GLOBAL("tvm.intrin.rule.amd_gpu.pow").set_body(DispatchGLSLPureIntrin<GLSLstd450Pow>);

TVM_REGISTER_GLOBAL("tvm.intrin.rule.amd_gpu.tanh")
.set_body(DispatchGLSLPureIntrin<GLSLstd450Tanh>);

} // namespace spirv
} // namespace codegen
} // namespace tvm
15 changes: 10 additions & 5 deletions src/target/target.cc
Expand Up @@ -47,7 +47,7 @@ TVM_STATIC_IR_FUNCTOR(ReprPrinter, vtable)
* \brief Construct a Target node from the given name and options.
* \param target_name The major target name. Should be one of
* {"aocl", "aocl_sw_emu", "c", "cuda", "ext_dev", "hexagon", "hybrid", "llvm",
* "metal", "nvptx", "opencl", "rocm", "sdaccel", "stackvm", "vulkan"}
* "metal", "nvptx", "opencl", "rocm", "sdaccel", "stackvm", "vulkan", "amd_gpu"}
* \param options Additional options appended to the target
* \return The constructed Target
*/
Expand Down Expand Up @@ -107,13 +107,14 @@ Target CreateTarget(const std::string& target_name, const std::vector<std::strin
if (t->device_name == "intel_graphics") {
t->thread_warp_size = 16;
}
} else if (target_name == "metal" || target_name == "vulkan" || target_name == "webgpu") {
} else if (target_name == "metal" || target_name == "vulkan" || target_name == "webgpu" ||
target_name == "amd_gpu") {
if (target_name == "metal") {
t->device_type = kDLMetal;
} else if (target_name == "vulkan") {
t->device_type = kDLVulkan;
} else {
} else if (target_name == "webgpu") {
t->device_type = kDLWebGPU;
} else {
t->device_type = kDLVulkan;
}
t->keys_array.push_back(target_name);
t->keys_array.push_back("gpu");
Expand Down Expand Up @@ -312,6 +313,10 @@ Target stackvm(const std::vector<std::string>& options) { return CreateTarget("s
Target ext_dev(const std::vector<std::string>& options) { return CreateTarget("ext_dev", options); }

Target hexagon(const std::vector<std::string>& options) { return CreateTarget("hexagon", options); }

Target amd_gpu(const std::vector<std::string>& options) {
return CreateTarget("vulkan", MergeOptions(options, {"-device=amd_gpu"}));
}
} // namespace target

BuildConfig BuildConfig::Create() { return BuildConfig(make_object<BuildConfigNode>()); }
Expand Down
8 changes: 8 additions & 0 deletions tests/python/unittest/test_target_target.py
Expand Up @@ -34,6 +34,10 @@ def rocm_func(data):
def rocm_func(data):
return data + 10

@mygeneric.register("amd_gpu")
def amd_gpu_func(data):
return data + 11;


def test_target_dispatch():
with tvm.target.cuda():
Expand All @@ -51,6 +55,9 @@ def test_target_dispatch():
with tvm.target.create("metal"):
assert mygeneric(1) == 3

with tvm.target.amd_gpu():
assert mygeneric(1) == 12

assert tvm.target.Target.current() is None


Expand All @@ -66,6 +73,7 @@ def test_target_string_parse():
assert tvm.target.intel_graphics().device_name == "intel_graphics"
assert tvm.target.mali().device_name == "mali"
assert tvm.target.arm_cpu().device_name == "arm_cpu"
assert tvm.target.amd_gpu().device_name == "amd_gpu"

if __name__ == "__main__":
test_target_dispatch()
Expand Down