diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index a76b8e93902c7..f5373df0e6696 100644 --- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -118,13 +118,19 @@ struct ThreadSanitizerOnSpirv { void initialize(); - void instrumentKernelsMetadata(); + void instrumentModule(); void appendDebugInfoToArgs(Instruction *I, SmallVectorImpl &Args); private: + void instrumentGlobalVariables(); + + void instrumentKernelsMetadata(); + bool isSupportedSPIRKernel(Function &F); + bool isUnsupportedDeviceGlobal(const GlobalVariable &G); + GlobalVariable *GetOrCreateGlobalString(StringRef Name, StringRef Value, unsigned AddressSpace); @@ -243,7 +249,7 @@ PreservedAnalyses ModuleThreadSanitizerPass::run(Module &M, return PreservedAnalyses::all(); if (Triple(M.getTargetTriple()).isSPIROrSPIRV()) { ThreadSanitizerOnSpirv Spirv(M); - Spirv.instrumentKernelsMetadata(); + Spirv.instrumentModule(); } else insertModuleCtor(M); return PreservedAnalyses::none(); @@ -327,6 +333,70 @@ bool ThreadSanitizerOnSpirv::isSupportedSPIRKernel(Function &F) { return true; } +bool ThreadSanitizerOnSpirv::isUnsupportedDeviceGlobal( + const GlobalVariable &G) { + if (G.user_empty()) + return true; + // Skip instrumenting on "__TsanKernelMetadata" etc. + if (G.getName().starts_with("__Tsan")) + return true; + if (G.getName().starts_with("__tsan_")) + return true; + if (G.getName().starts_with("__spirv_BuiltIn")) + return true; + if (G.getName().starts_with("__usid_str")) + return true; + // TODO: Will support global variable with local address space later. + if (G.getAddressSpace() == kSpirOffloadLocalAS) + return true; + // Global variables have constant value or constant address space will not + // trigger race condition. + if (G.isConstant() || G.getAddressSpace() == kSpirOffloadConstantAS) + return true; + return false; +} + +void ThreadSanitizerOnSpirv::instrumentModule() { + instrumentGlobalVariables(); + instrumentKernelsMetadata(); +} + +void ThreadSanitizerOnSpirv::instrumentGlobalVariables() { + SmallVector DeviceGlobalMetadata; + + // Device global metadata is described by a structure + // size_t device_global_size + // size_t beginning address of the device global + StructType *StructTy = StructType::get(IntptrTy, IntptrTy); + + for (auto &G : M.globals()) { + if (isUnsupportedDeviceGlobal(G)) { + for (auto *User : G.users()) + if (auto *Inst = dyn_cast(User)) + Inst->setNoSanitizeMetadata(); + continue; + } + + DeviceGlobalMetadata.push_back(ConstantStruct::get( + StructTy, + ConstantInt::get(IntptrTy, DL.getTypeAllocSize(G.getValueType())), + ConstantExpr::getPointerCast(&G, IntptrTy))); + } + + if (DeviceGlobalMetadata.empty()) + return; + + // Create meta data global to record device globals' information + ArrayType *ArrayTy = ArrayType::get(StructTy, DeviceGlobalMetadata.size()); + Constant *MetadataInitializer = + ConstantArray::get(ArrayTy, DeviceGlobalMetadata); + GlobalVariable *MsanDeviceGlobalMetadata = new GlobalVariable( + M, MetadataInitializer->getType(), false, GlobalValue::AppendingLinkage, + MetadataInitializer, "__TsanDeviceGlobalMetadata", nullptr, + GlobalValue::NotThreadLocal, 1); + MsanDeviceGlobalMetadata->setUnnamedAddr(GlobalValue::UnnamedAddr::Local); +} + void ThreadSanitizerOnSpirv::instrumentKernelsMetadata() { SmallVector SpirKernelsMetadata; diff --git a/llvm/test/Instrumentation/ThreadSanitizer/SPIRV/instrument_device_global.ll b/llvm/test/Instrumentation/ThreadSanitizer/SPIRV/instrument_device_global.ll new file mode 100644 index 0000000000000..0d5089a25d4ea --- /dev/null +++ b/llvm/test/Instrumentation/ThreadSanitizer/SPIRV/instrument_device_global.ll @@ -0,0 +1,20 @@ +; RUN: opt < %s -passes='function(tsan),module(tsan-module)' -tsan-instrument-func-entry-exit=0 -tsan-instrument-memintrinsics=0 -S | FileCheck %s +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spir64-unknown-unknown" + +@dev_global = external addrspace(1) global { [4 x i32] } +@dev_global_no_users = dso_local addrspace(1) global { [4 x i32] } zeroinitializer +@.str = external addrspace(1) constant [59 x i8] +@__spirv_BuiltInGlobalInvocationId = external addrspace(1) constant <3 x i64> + +; CHECK: @__TsanDeviceGlobalMetadata +; CHECK-NOT: @dev_global_no_users +; CHECK-NOT: @.str +; CHECK-NOT: @__spirv_BuiltInGlobalInvocationId +; CHECK-SAME: @dev_global + +define spir_func void @test() { +entry: + %call = call spir_func ptr addrspace(4) null(ptr addrspace(4) addrspacecast (ptr addrspace(1) @dev_global to ptr addrspace(4)), i64 0) + ret void +} diff --git a/sycl/test-e2e/ThreadSanitizer/check_device_global.cpp b/sycl/test-e2e/ThreadSanitizer/check_device_global.cpp new file mode 100644 index 0000000000000..d0cc151424ac6 --- /dev/null +++ b/sycl/test-e2e/ThreadSanitizer/check_device_global.cpp @@ -0,0 +1,30 @@ +// REQUIRES: linux, cpu || (gpu && level_zero) +// ALLOW_RETRIES: 10 +// RUN: %{build} %device_tsan_flags -O0 -g -o %t1.out +// RUN: %{run} %t1.out 2>&1 | FileCheck %s +// RUN: %{build} %device_tsan_flags -O2 -g -o %t2.out +// RUN: %{run} %t2.out 2>&1 | FileCheck %s +#include +#include + +using namespace sycl; +using namespace sycl::ext::oneapi; +using namespace sycl::ext::oneapi::experimental; + +sycl::ext::oneapi::experimental::device_global< + int[4], decltype(properties(device_image_scope, host_access_read_write))> + dev_global; + +int main() { + sycl::queue Q; + + Q.submit([&](sycl::handler &h) { + h.parallel_for(sycl::nd_range<1>(32, 8), + [=](sycl::nd_item<1>) { dev_global[0]++; }); + }).wait(); + // CHECK: WARNING: DeviceSanitizer: data race + // CHECK-NEXT: When write of size 4 at 0x{{.*}} in kernel <{{.*}}Test> + // CHECK-NEXT: #0 {{.*}}check_device_global.cpp:[[@LINE-4]] + + return 0; +} diff --git a/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_ddi.cpp b/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_ddi.cpp index b71b750acc29a..7960d987f2ba9 100644 --- a/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_ddi.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_ddi.cpp @@ -130,6 +130,95 @@ ur_result_t urContextRelease( return UR_RESULT_SUCCESS; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramBuild +ur_result_t urProgramBuild( + /// [in] handle of the context object + ur_context_handle_t hContext, + /// [in] handle of the program object + ur_program_handle_t hProgram, + /// [in] string of build options + const char *pOptions) { + getContext()->logger.debug("==== urProgramBuild"); + + UR_CALL( + getContext()->urDdiTable.Program.pfnBuild(hContext, hProgram, pOptions)); + + UR_CALL(getTsanInterceptor()->registerProgram(hProgram)); + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramBuildExp +ur_result_t urProgramBuildExp( + /// [in] Handle of the program to build. + ur_program_handle_t hProgram, + /// [in] number of devices + uint32_t numDevices, + /// [in][range(0, numDevices)] pointer to array of device handles + ur_device_handle_t *phDevices, + /// [in][optional] pointer to build options null-terminated string. + const char *pOptions) { + getContext()->logger.debug("==== urProgramBuildExp"); + + UR_CALL(getContext()->urDdiTable.ProgramExp.pfnBuildExp(hProgram, numDevices, + phDevices, pOptions)); + UR_CALL(getTsanInterceptor()->registerProgram(hProgram)); + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramLink +ur_result_t urProgramLink( + /// [in] handle of the context instance. + ur_context_handle_t hContext, + /// [in] number of program handles in `phPrograms`. + uint32_t count, + /// [in][range(0, count)] pointer to array of program handles. + const ur_program_handle_t *phPrograms, + /// [in][optional] pointer to linker options null-terminated string. + const char *pOptions, + /// [out] pointer to handle of program object created. + ur_program_handle_t *phProgram) { + getContext()->logger.debug("==== urProgramLink"); + + UR_CALL(getContext()->urDdiTable.Program.pfnLink(hContext, count, phPrograms, + pOptions, phProgram)); + + UR_CALL(getTsanInterceptor()->registerProgram(*phProgram)); + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramLinkExp +ur_result_t urProgramLinkExp( + /// [in] handle of the context instance. + ur_context_handle_t hContext, + /// [in] number of devices + uint32_t numDevices, + /// [in][range(0, numDevices)] pointer to array of device handles + ur_device_handle_t *phDevices, + /// [in] number of program handles in `phPrograms`. + uint32_t count, + /// [in][range(0, count)] pointer to array of program handles. + const ur_program_handle_t *phPrograms, + /// [in][optional] pointer to linker options null-terminated string. + const char *pOptions, + /// [out] pointer to handle of program object created. + ur_program_handle_t *phProgram) { + getContext()->logger.debug("==== urProgramLinkExp"); + + UR_CALL(getContext()->urDdiTable.ProgramExp.pfnLinkExp( + hContext, numDevices, phDevices, count, phPrograms, pOptions, phProgram)); + + UR_CALL(getTsanInterceptor()->registerProgram(*phProgram)); + + return UR_RESULT_SUCCESS; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMDeviceAlloc __urdlllocal ur_result_t UR_APICALL urUSMDeviceAlloc( @@ -283,6 +372,39 @@ __urdlllocal ur_result_t UR_APICALL urGetContextProcAddrTable( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's Program table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +ur_result_t urGetProgramProcAddrTable( + /// [in,out] pointer to table of DDI function pointers + ur_program_dditable_t *pDdiTable) { + pDdiTable->pfnBuild = ur_sanitizer_layer::tsan::urProgramBuild; + pDdiTable->pfnLink = ur_sanitizer_layer::tsan::urProgramLink; + + return UR_RESULT_SUCCESS; +} + +/// @brief Exported function for filling application's ProgramExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +ur_result_t urGetProgramExpProcAddrTable( + /// [in,out] pointer to table of DDI function pointers + ur_program_exp_dditable_t *pDdiTable) { + ur_result_t result = UR_RESULT_SUCCESS; + + pDdiTable->pfnBuildExp = ur_sanitizer_layer::tsan::urProgramBuildExp; + pDdiTable->pfnLinkExp = ur_sanitizer_layer::tsan::urProgramLinkExp; + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's USM table /// with current process' addresses @@ -363,6 +485,16 @@ ur_result_t initTsanDDITable(ur_dditable_t *dditable) { UR_API_VERSION_CURRENT, &dditable->Context); } + if (UR_RESULT_SUCCESS == result) { + result = + ur_sanitizer_layer::tsan::urGetProgramProcAddrTable(&dditable->Program); + } + + if (UR_RESULT_SUCCESS == result) { + result = ur_sanitizer_layer::tsan::urGetProgramExpProcAddrTable( + &dditable->ProgramExp); + } + if (UR_RESULT_SUCCESS == result) { result = ur_sanitizer_layer::tsan::urGetUSMProcAddrTable( UR_API_VERSION_CURRENT, &dditable->USM); diff --git a/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_interceptor.cpp b/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_interceptor.cpp index a37ebecb85fc8..9fd3d29b086d4 100644 --- a/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_interceptor.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_interceptor.cpp @@ -69,15 +69,14 @@ ur_result_t DeviceInfo::allocShadowMemory() { return UR_RESULT_SUCCESS; } -void ContextInfo::insertAllocInfo(ur_device_handle_t Device, - std::shared_ptr &AI) { +void ContextInfo::insertAllocInfo(ur_device_handle_t Device, TsanAllocInfo AI) { if (Device) { std::scoped_lock Guard(AllocInfosMapMutex); - AllocInfosMap[Device].emplace_back(AI); + AllocInfosMap[Device].emplace_back(std::move(AI)); } else { for (auto Device : DeviceList) { std::scoped_lock Guard(AllocInfosMapMutex); - AllocInfosMap[Device].emplace_back(AI); + AllocInfosMap[Device].emplace_back(std::move(AI)); } } } @@ -103,16 +102,62 @@ ur_result_t TsanInterceptor::allocateMemory(ur_context_handle_t Context, Context, Device, Properties, Pool, Size, &Allocated)); } - auto AI = std::make_shared( - TsanAllocInfo{reinterpret_cast(Allocated), Size}); - + auto AI = TsanAllocInfo{reinterpret_cast(Allocated), Size}; // For updating shadow memory - CI->insertAllocInfo(Device, AI); + CI->insertAllocInfo(Device, std::move(AI)); *ResultPtr = Allocated; return UR_RESULT_SUCCESS; } +ur_result_t TsanInterceptor::registerProgram(ur_program_handle_t Program) { + getContext()->logger.info("registerDeviceGlobals"); + UR_CALL(registerDeviceGlobals(Program)); + return UR_RESULT_SUCCESS; +} + +ur_result_t TsanInterceptor::registerDeviceGlobals(ur_program_handle_t Program) { + std::vector Devices = GetDevices(Program); + assert(Devices.size() != 0 && "No devices in registerDeviceGlobals"); + auto Context = GetContext(Program); + auto ContextInfo = getContextInfo(Context); + + for (auto Device : Devices) { + ManagedQueue Queue(Context, Device); + + size_t MetadataSize; + void *MetadataPtr; + auto Result = getContext()->urDdiTable.Program.pfnGetGlobalVariablePointer( + Device, Program, kSPIR_TsanDeviceGlobalMetadata, &MetadataSize, + &MetadataPtr); + if (Result != UR_RESULT_SUCCESS) { + getContext()->logger.info("No device globals"); + continue; + } + + const uint64_t NumOfDeviceGlobal = MetadataSize / sizeof(DeviceGlobalInfo); + assert((MetadataSize % sizeof(DeviceGlobalInfo) == 0) && + "DeviceGlobal metadata size is not correct"); + std::vector GVInfos(NumOfDeviceGlobal); + Result = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( + Queue, true, &GVInfos[0], MetadataPtr, + sizeof(DeviceGlobalInfo) * NumOfDeviceGlobal, 0, nullptr, nullptr); + if (Result != UR_RESULT_SUCCESS) { + getContext()->logger.error("Device Global[{}] Read Failed: {}", + kSPIR_TsanDeviceGlobalMetadata, Result); + return Result; + } + + for (size_t i = 0; i < NumOfDeviceGlobal; i++) { + const auto &GVInfo = GVInfos[i]; + auto AI = TsanAllocInfo{GVInfo.Addr, GVInfo.Size}; + ContextInfo->insertAllocInfo(Device, std::move(AI)); + } + } + + return UR_RESULT_SUCCESS; +} + ur_result_t TsanInterceptor::insertContext(ur_context_handle_t Context, std::shared_ptr &CI) { std::scoped_lock Guard(m_ContextMapMutex); @@ -225,9 +270,10 @@ TsanInterceptor::updateShadowMemory(std::shared_ptr &CI, ur_queue_handle_t Queue) { std::scoped_lock Guard(CI->AllocInfosMapMutex); for (auto &AllocInfo : CI->AllocInfosMap[DI->Handle]) { - UR_CALL(DI->Shadow->CleanShadow(Queue, AllocInfo->AllocBegin, - AllocInfo->AllocSize)); + UR_CALL(DI->Shadow->CleanShadow(Queue, AllocInfo.AllocBegin, + AllocInfo.AllocSize)); } + CI->AllocInfosMap[DI->Handle].clear(); return UR_RESULT_SUCCESS; } diff --git a/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_interceptor.hpp b/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_interceptor.hpp index 661b0d1711ee1..f7d79c60a2096 100644 --- a/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_interceptor.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_interceptor.hpp @@ -48,8 +48,7 @@ struct ContextInfo { std::vector DeviceList; ur_shared_mutex AllocInfosMapMutex; - std::unordered_map>> + std::unordered_map> AllocInfosMap; explicit ContextInfo(ur_context_handle_t Context) : Handle(Context) { @@ -68,8 +67,12 @@ struct ContextInfo { ContextInfo &operator=(const ContextInfo &) = delete; - void insertAllocInfo(ur_device_handle_t Device, - std::shared_ptr &AI); + void insertAllocInfo(ur_device_handle_t Device, TsanAllocInfo AI); +}; + +struct DeviceGlobalInfo { + uptr Size; + uptr Addr; }; struct TsanRuntimeDataWrapper { @@ -132,6 +135,8 @@ class TsanInterceptor { ur_usm_pool_handle_t Pool, size_t Size, AllocType Type, void **ResultPtr); + ur_result_t registerProgram(ur_program_handle_t Program); + ur_result_t insertContext(ur_context_handle_t Context, std::shared_ptr &CI); @@ -168,6 +173,8 @@ class TsanInterceptor { ur_queue_handle_t Queue, ur_kernel_handle_t Kernel, LaunchInfo &LaunchInfo); + ur_result_t registerDeviceGlobals(ur_program_handle_t Program); + private: std::unordered_map> m_ContextMap; diff --git a/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_libdevice.hpp b/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_libdevice.hpp index 25e876b6323e3..fc4251b3cc46b 100644 --- a/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_libdevice.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_libdevice.hpp @@ -94,6 +94,10 @@ struct TsanRuntimeData { TsanErrorReport Report[TSAN_MAX_NUM_REPORTS]; }; +constexpr auto kSPIR_TsanDeviceGlobalMetadata = "__TsanDeviceGlobalMetadata"; + +constexpr auto kSPIR_TsanSpirKernelMetadata = "__TsanKernelMetadata"; + #if !defined(__SPIR__) && !defined(__SPIRV__) } // namespace ur_sanitizer_layer #endif // !__SPIR__ && !__SPIRV__