Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions clang/lib/Driver/ToolChains/Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -422,9 +422,6 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("--return-at-end");
} else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
// Map the -O we received to -O{0,1,2,3}.
//
// TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
// default, so it may correspond more closely to the spirit of clang -O2.

// -O3 seems like the least-bad option when -Osomething is specified to
// clang but it isn't handled below.
Expand All @@ -446,9 +443,9 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
}
CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
} else {
// If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
// to no optimizations, but ptxas's default is -O3.
CmdArgs.push_back("-O0");
// If no -O was passed, pass -O3 to ptxas -- this makes ptxas's
// optimization level the same as the ptxjitcompiler.
CmdArgs.push_back("-O3");
}
if (DIKind == DebugDirectivesOnly)
CmdArgs.push_back("-lineinfo");
Expand Down
6 changes: 3 additions & 3 deletions clang/test/Driver/cuda-external-tools.cu
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@
// RUN: --no-cuda-noopt-device-debug -O2 -c %s 2>&1 \
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT2 %s

// Regular compile without -O. This should result in us passing -O0 to ptxas.
// Regular compile without -O. This should result in us passing -O3 to ptxas.
// RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \
// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT0 %s
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT3 %s

// Regular compiles with -Os and -Oz. For lack of a better option, we map
// these to ptxas -O3.
Expand Down Expand Up @@ -75,7 +75,7 @@
// Compile with -fintegrated-as. This should still cause us to invoke ptxas.
// RUN: %clang -### -target x86_64-linux-gnu -fintegrated-as -c %s 2>&1 \
// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT0 %s
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT3 %s
// Check that we still pass -c when generating relocatable device code.
// RUN: %clang -### -target x86_64-linux-gnu -fintegrated-as -fgpu-rdc -c %s 2>&1 \
// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
Expand Down