Skip to content

Commit

Permalink
Avoid redundant ptx generation for maximum specififed compute capability
Browse files Browse the repository at this point in the history
  • Loading branch information
trevor-m committed Apr 15, 2022
1 parent 98db066 commit d6ef7c6
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions cmake/build_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,10 @@ def get_nvcc_flags():

# Build native kernels for specified compute capabilities
cc_list = full_cc_list if cc_list_env is None else [int(x) for x in cc_list_env.split(',')]
for cc in cc_list:
default_flags += ['-gencode', 'arch=compute_{cc},code=sm_{cc}'.format(cc=cc)]
cc_list = sorted(cc_list)
for cc in cc_list[:-1]:
default_flags += ['-gencode', 'arch=compute_{cc},code=sm_{cc}'.format(cc=cc)]
# Build PTX for maximum specified compute capability
default_flags += ['-gencode', 'arch=compute_{cc},code=compute_{cc}'.format(cc=max(cc_list))]
default_flags += ['-gencode', 'arch=compute_{cc},code=\\"sm_{cc},compute_{cc}\\"'.format(cc=cc_list[-1])]

return default_flags

0 comments on commit d6ef7c6

Please sign in to comment.