diff --git a/libdevice/sanitizer_utils.cpp b/libdevice/sanitizer_utils.cpp index f59bc17bb948e..c2a4f7124dbff 100644 --- a/libdevice/sanitizer_utils.cpp +++ b/libdevice/sanitizer_utils.cpp @@ -862,4 +862,35 @@ __asan_set_shadow_dynamic_local(uptr ptr, uint32_t num_args) { __spirv_ocl_printf(__mem_set_shadow_dynamic_local_end); } +/// +/// ASAN initialize shdadow memory of private memory +/// + +static __SYCL_CONSTANT__ const char __mem_set_shadow_private_begin[] = + "[kernel] BEGIN __asan_set_shadow_private\n"; +static __SYCL_CONSTANT__ const char __mem_set_shadow_private_end[] = + "[kernel] END __asan_set_shadow_private\n"; +static __SYCL_CONSTANT__ const char __mem_set_shadow_private[] = + "[kernel] set_shadow_private(beg=%p, end=%p, val:%02X)\n"; + +DEVICE_EXTERN_C_NOINLINE void __asan_set_shadow_private(uptr begin, uptr size, + char val) { + if (__AsanDebug) + __spirv_ocl_printf(__mem_set_shadow_private_begin); + + auto *launch_info = (__SYCL_GLOBAL__ const LaunchInfo *)__AsanLaunchInfo; + if (launch_info->PrivateShadowOffset == 0) + return; + + if (__AsanDebug) + __spirv_ocl_printf(__mem_set_shadow_private, (void *)begin, + (void *)(begin + size), val & 0xFF); + + for (size_t i = 0; i < size; i++) + ((__SYCL_GLOBAL__ u8 *)begin)[i] = val; + + if (__AsanDebug) + __spirv_ocl_printf(__mem_set_shadow_private_end); +} + #endif // __SPIR__ || __SPIRV__ diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index f500440e02706..4ee247821fb11 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -1042,6 +1042,7 @@ struct FunctionStackPoisoner : public InstVisitor { FunctionCallee AsanStackMallocFunc[kMaxAsanStackMallocSizeClass + 1], AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1]; FunctionCallee AsanSetShadowFunc[0x100] = {}; + FunctionCallee AsanSetShadowPrivateFunc; FunctionCallee AsanPoisonStackMemoryFunc, AsanUnpoisonStackMemoryFunc; FunctionCallee AsanAllocaPoisonFunc, AsanAllocasUnpoisonFunc; @@ -1257,10 +1258,11 @@ struct FunctionStackPoisoner : public InstVisitor { // ShadowMask is not zero. If ShadowMask[i] is zero, we assume that // ShadowBytes[i] is constantly zero and doesn't need to be overwritten. void copyToShadow(ArrayRef ShadowMask, ArrayRef ShadowBytes, - IRBuilder<> &IRB, Value *ShadowBase); + IRBuilder<> &IRB, Value *ShadowBase, + bool ForceOutline = false); void copyToShadow(ArrayRef ShadowMask, ArrayRef ShadowBytes, size_t Begin, size_t End, IRBuilder<> &IRB, - Value *ShadowBase); + Value *ShadowBase, bool ForceOutline = false); void copyToShadowInline(ArrayRef ShadowMask, ArrayRef ShadowBytes, size_t Begin, size_t End, IRBuilder<> &IRB, Value *ShadowBase); @@ -3593,6 +3595,9 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) { AsanSetShadowFunc[Val] = M.getOrInsertFunction(Name.str(), IRB.getVoidTy(), IntptrTy, IntptrTy); } + AsanSetShadowPrivateFunc = + M.getOrInsertFunction("__asan_set_shadow_private", IRB.getVoidTy(), + IntptrTy, IntptrTy, IRB.getInt8Ty()); AsanAllocaPoisonFunc = M.getOrInsertFunction( kAsanAllocaPoison, IRB.getVoidTy(), IntptrTy, IntptrTy); @@ -3655,14 +3660,17 @@ void FunctionStackPoisoner::copyToShadowInline(ArrayRef ShadowMask, void FunctionStackPoisoner::copyToShadow(ArrayRef ShadowMask, ArrayRef ShadowBytes, - IRBuilder<> &IRB, Value *ShadowBase) { - copyToShadow(ShadowMask, ShadowBytes, 0, ShadowMask.size(), IRB, ShadowBase); + IRBuilder<> &IRB, Value *ShadowBase, + bool ForceOutline) { + copyToShadow(ShadowMask, ShadowBytes, 0, ShadowMask.size(), IRB, ShadowBase, + ForceOutline); } void FunctionStackPoisoner::copyToShadow(ArrayRef ShadowMask, ArrayRef ShadowBytes, size_t Begin, size_t End, - IRBuilder<> &IRB, Value *ShadowBase) { + IRBuilder<> &IRB, Value *ShadowBase, + bool ForceOutline) { assert(ShadowMask.size() == ShadowBytes.size()); size_t Done = Begin; for (size_t i = Begin, j = Begin + 1; i < End; i = j++) { @@ -3671,14 +3679,20 @@ void FunctionStackPoisoner::copyToShadow(ArrayRef ShadowMask, continue; } uint8_t Val = ShadowBytes[i]; - if (!AsanSetShadowFunc[Val]) + if (!AsanSetShadowFunc[Val] && !ForceOutline) continue; // Skip same values. for (; j < End && ShadowMask[j] && Val == ShadowBytes[j]; ++j) { } - if (j - i >= ASan.MaxInlinePoisoningSize) { + if (ForceOutline) { + RTCI.createRuntimeCall( + IRB, AsanSetShadowPrivateFunc, + {IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i)), + ConstantInt::get(IntptrTy, j - i), + ConstantInt::get(IRB.getInt8Ty(), Val)}); + } else if (j - i >= ASan.MaxInlinePoisoningSize) { copyToShadowInline(ShadowMask, ShadowBytes, Done, i, IRB, ShadowBase); RTCI.createRuntimeCall( IRB, AsanSetShadowFunc[Val], @@ -3688,7 +3702,8 @@ void FunctionStackPoisoner::copyToShadow(ArrayRef ShadowMask, } } - copyToShadowInline(ShadowMask, ShadowBytes, Done, End, IRB, ShadowBase); + if (!ForceOutline) + copyToShadowInline(ShadowMask, ShadowBytes, Done, End, IRB, ShadowBase); } // Fake stack allocator (asan_fake_stack.h) has 11 size classes @@ -4062,7 +4077,8 @@ void FunctionStackPoisoner::processStaticAllocas() { ASan.memToShadow(LocalStackBase, IRB, kSpirOffloadPrivateAS); // As mask we must use most poisoned case: red zones and after scope. // As bytes we can use either the same or just red zones only. - copyToShadow(ShadowAfterScope, ShadowAfterScope, IRB, ShadowBase); + copyToShadow(ShadowAfterScope, ShadowAfterScope, IRB, ShadowBase, + TargetTriple.isSPIROrSPIRV()); if (!StaticAllocaPoisonCallVec.empty()) { const auto &ShadowInScope = GetShadowBytes(SVD, L); @@ -4132,7 +4148,8 @@ void FunctionStackPoisoner::processStaticAllocas() { IRBuilder<> IRBElse(ElseTerm); copyToShadow(ShadowAfterScope, ShadowClean, IRBElse, ShadowBase); } else { - copyToShadow(ShadowAfterScope, ShadowClean, IRBRet, ShadowBase); + copyToShadow(ShadowAfterScope, ShadowClean, IRBRet, ShadowBase, + TargetTriple.isSPIROrSPIRV()); } } diff --git a/llvm/test/Instrumentation/AddressSanitizer/SPIRV/instrument_private_address_space.ll b/llvm/test/Instrumentation/AddressSanitizer/SPIRV/instrument_private_address_space.ll index bf412b8225b79..88c0bf21ed54f 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/SPIRV/instrument_private_address_space.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/SPIRV/instrument_private_address_space.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -asan-stack=0 -asan-globals=0 -asan-constructor-kind=none -asan-spir-privates=1 -asan-use-after-return=never -S | FileCheck %s +; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -asan-stack=0 -asan-globals=0 -asan-constructor-kind=none -asan-mapping-scale=4 -asan-spir-privates=1 -asan-use-after-return=never -S | FileCheck %s target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" target triple = "spir64-unknown-unknown" @@ -21,11 +21,15 @@ define spir_kernel void @kernel() #0 { entry: %p.i = alloca [4 x i32], align 4 ; CHECK: %shadow_ptr = call i64 @__asan_mem_to_shadow(i64 %0, i32 0) + ; CHECK: call void @__asan_set_shadow_private(i64 %4, i64 2, i8 -15) + ; CHECK: call void @__asan_set_shadow_private(i64 %5, i64 1, i8 -13) call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %p.i) call void @llvm.memcpy.p0.p1.i64(ptr align 4 %p.i, ptr addrspace(1) align 4 @__const._ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv.p, i64 16, i1 false) %arraydecay.i = getelementptr inbounds [4 x i32], ptr %p.i, i64 0, i64 0 %0 = addrspacecast ptr %arraydecay.i to ptr addrspace(4) %call.i = call spir_func i32 @_Z3fooPii(ptr addrspace(4) %0) + ; CHECK: call void @__asan_set_shadow_private(i64 %7, i64 2, i8 0) + ; CHECK: call void @__asan_set_shadow_private(i64 %8, i64 1, i8 0) ret void } diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/large_group_size.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/large_group_size.cpp new file mode 100644 index 0000000000000..86ea99bd14359 --- /dev/null +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/large_group_size.cpp @@ -0,0 +1,26 @@ +// REQUIRES: linux +// RUN: %{build} %device_asan_flags -O2 -g -o %t +// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK %s + +#include + +#include + +int main() { + sycl::queue Q; + constexpr std::size_t N = 12345678; + auto *array = sycl::malloc_device(N, Q); + + Q.submit([&](sycl::handler &h) { + h.parallel_for( + sycl::nd_range<1>(N + 1, 1), + [=](sycl::nd_item<1> item) { ++array[item.get_global_id(0)]; }); + }); + Q.wait(); + // CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on Device USM + // CHECK: {{READ of size 1 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(12345678, 0, 0\)}} + // CHECK: {{ #0 .* .*large_group_size.cpp:}}[[@LINE-5]] + + sycl::free(array, Q); + return 0; +} diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_char.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_char.cpp index 54acba761ae6e..6038752ca14c3 100644 --- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_char.cpp +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_char.cpp @@ -16,7 +16,7 @@ int main() { sycl::queue Q; - constexpr std::size_t N = 12345; + constexpr std::size_t N = 12; #if defined(MALLOC_HOST) auto *array = sycl::malloc_host(N, Q); #elif defined(MALLOC_SHARED) @@ -34,7 +34,7 @@ int main() { // CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on Device USM // CHECK-HOST: ERROR: DeviceSanitizer: out-of-bounds-access on Host USM // CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on Shared USM - // CHECK: {{READ of size 1 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(12345, 0, 0\)}} + // CHECK: {{READ of size 1 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(12, 0, 0\)}} // CHECK: {{ #0 .* .*parallel_for_char.cpp:}}[[@LINE-7]] sycl::free(array, Q); diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_double.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_double.cpp index f6c12fcc75818..eb53f29dfb53e 100644 --- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_double.cpp +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_double.cpp @@ -16,7 +16,7 @@ int main() { sycl::queue Q; - constexpr std::size_t N = 123456; + constexpr std::size_t N = 12; #if defined(MALLOC_HOST) auto *array = sycl::malloc_host(N, Q); #elif defined(MALLOC_SHARED) @@ -34,7 +34,7 @@ int main() { // CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on Device USM // CHECK-HOST: ERROR: DeviceSanitizer: out-of-bounds-access on Host USM // CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on Shared USM - // CHECK: {{READ of size 8 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(123456, 0, 0\)}} + // CHECK: {{READ of size 8 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(12, 0, 0\)}} // CHECK: {{ #0 .* .*parallel_for_double.cpp:}}[[@LINE-7]] sycl::free(array, Q); diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_func.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_func.cpp index f582ec78226cb..6627b26295234 100644 --- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_func.cpp +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_func.cpp @@ -18,12 +18,12 @@ __attribute__((noinline)) void foo(int *array, size_t i) { array[i] = 1; } // CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on Device USM // CHECK-HOST: ERROR: DeviceSanitizer: out-of-bounds-access on Host USM // CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on Shared USM -// CHECK: {{WRITE of size 4 at kernel <.*MyKernel> LID\(0, 0, 0\) GID\(123, 0, 0\)}} +// CHECK: {{WRITE of size 4 at kernel <.*MyKernel> LID\(0, 0, 0\) GID\(12, 0, 0\)}} // CHECK: {{ #0 foo\(int\*, unsigned long\) .*parallel_for_func.cpp:}}[[@LINE-5]] int main() { sycl::queue Q; - constexpr std::size_t N = 123; + constexpr std::size_t N = 12; #if defined(MALLOC_HOST) auto *array = sycl::malloc_host(N, Q); #elif defined(MALLOC_SHARED) diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_int.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_int.cpp index d51e3263ea7c5..178f578b2547e 100644 --- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_int.cpp +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_int.cpp @@ -16,7 +16,7 @@ int main() { sycl::queue Q; - constexpr std::size_t N = 512; + constexpr std::size_t N = 12; #if defined(MALLOC_HOST) auto *array = sycl::malloc_host(N, Q); #elif defined(MALLOC_SHARED) @@ -34,7 +34,7 @@ int main() { // CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on Device USM // CHECK-HOST: ERROR: DeviceSanitizer: out-of-bounds-access on Host USM // CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on Shared USM - // CHECK: {{READ of size 4 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(512, 0, 0\)}} + // CHECK: {{READ of size 4 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(12, 0, 0\)}} // CHECK: {{ #0 .* .*parallel_for_int.cpp:}}[[@LINE-7]] sycl::free(array, Q); diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_short.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_short.cpp index 8b16598531605..cd488743453cc 100644 --- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_short.cpp +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_short.cpp @@ -16,7 +16,7 @@ int main() { sycl::queue Q; - constexpr std::size_t N = 1024; + constexpr std::size_t N = 12; #if defined(MALLOC_HOST) auto *array = sycl::malloc_host(N, Q); #elif defined(MALLOC_SHARED) @@ -34,7 +34,7 @@ int main() { // CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on Device USM // CHECK-HOST: ERROR: DeviceSanitizer: out-of-bounds-access on Host USM // CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on Shared USM - // CHECK: {{READ of size 2 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(1024, 0, 0\)}} + // CHECK: {{READ of size 2 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(12, 0, 0\)}} // CHECK: {{ #0 .* .*parallel_for_short.cpp:}}[[@LINE-7]] sycl::free(array, Q); diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_no_local_size.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_no_local_size.cpp index 041f2b2ee1a69..9fb77d98766c6 100644 --- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_no_local_size.cpp +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_no_local_size.cpp @@ -16,7 +16,7 @@ int main() { sycl::queue Q; - constexpr std::size_t N = 12345; + constexpr std::size_t N = 12; #if defined(MALLOC_HOST) auto *array = sycl::malloc_host(N, Q); #elif defined(MALLOC_SHARED) @@ -33,7 +33,7 @@ int main() { // CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on Device USM // CHECK-HOST: ERROR: DeviceSanitizer: out-of-bounds-access on Host USM // CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on Shared USM - // CHECK: READ of size 1 at kernel {{<.*MyKernel.*>}} LID({{.*}}, 0, 0) GID(12345, 0, 0) + // CHECK: READ of size 1 at kernel {{<.*MyKernel.*>}} LID({{.*}}, 0, 0) GID(12, 0, 0) // CHECK: {{ #0 .* .*parallel_no_local_size.cpp:}}[[@LINE-7]] sycl::free(array, Q);