Skip to content

Commit

Permalink
[nvcc] enable multiple arch in one fatbin (#4377)
Browse files Browse the repository at this point in the history
  • Loading branch information
yzhliu authored and tqchen committed Nov 19, 2019
1 parent 500ff05 commit f8f4ceb
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 3 deletions.
12 changes: 10 additions & 2 deletions python/tvm/autotvm/measure/measure_methods.py
Expand Up @@ -582,7 +582,13 @@ def _check():
@register_func
def tvm_callback_cuda_compile(code):
"""use nvcc to generate ptx code for better optimization"""
ptx = nvcc.compile_cuda(code, target="ptx", arch=AutotvmGlobalScope.current.cuda_target_arch)
curr_cuda_target_arch = AutotvmGlobalScope.current.cuda_target_arch
# e.g., target arch could be [
# "-gencode", "arch=compute_52,code=sm_52",
# "-gencode", "arch=compute_70,code=sm_70"
# ]
target = "fatbin" if isinstance(curr_cuda_target_arch, list) else "ptx"
ptx = nvcc.compile_cuda(code, target=target, arch=AutotvmGlobalScope.current.cuda_target_arch)
return ptx


Expand All @@ -591,8 +597,10 @@ def set_cuda_target_arch(arch):
Parameters
----------
arch: str
arch: str or list
The argument of nvcc -arch. (e.g. "sm_51", "sm_62")
it can also be a count of gencode arguments pass to nvcc command line,
e.g., ["-gencode", "arch=compute_52,code=sm_52", "-gencode", "arch=compute_70,code=sm_70"]
"""
AutotvmGlobalScope.current.cuda_target_arch = arch

Expand Down
5 changes: 4 additions & 1 deletion python/tvm/contrib/nvcc.py
Expand Up @@ -74,7 +74,10 @@ def compile_cuda(code,
file_target = path_target if path_target else temp_target
cmd = ["nvcc"]
cmd += ["--%s" % target, "-O3"]
cmd += ["-arch", arch]
if isinstance(arch, list):
cmd += arch
else:
cmd += ["-arch", arch]

if options:
if isinstance(options, str):
Expand Down

0 comments on commit f8f4ceb

Please sign in to comment.