diff --git a/libdevice/sanitizer/msan_rtl.cpp b/libdevice/sanitizer/msan_rtl.cpp index faf897690c48f..dad35683cb990 100644 --- a/libdevice/sanitizer/msan_rtl.cpp +++ b/libdevice/sanitizer/msan_rtl.cpp @@ -14,7 +14,7 @@ DeviceGlobal __MsanLaunchInfo; #define GetMsanLaunchInfo \ - ((__SYCL_GLOBAL__ MsanLaunchInfo *)__MsanLaunchInfo.get()) + ((__SYCL_GLOBAL__ MsanRuntimeData *)__MsanLaunchInfo.get()) namespace { @@ -160,14 +160,11 @@ inline uptr __msan_get_shadow_dg2(uptr addr, uint32_t as) { } if (as != ADDRESS_SPACE_GLOBAL || !(addr & DG2_DEVICE_USM_MASK)) - return (uptr)((__SYCL_GLOBAL__ MsanLaunchInfo *)__MsanLaunchInfo.get()) - ->CleanShadow; + return (uptr)GetMsanLaunchInfo->CleanShadow; // Device USM only - auto shadow_begin = ((__SYCL_GLOBAL__ MsanLaunchInfo *)__MsanLaunchInfo.get()) - ->GlobalShadowOffset; - auto shadow_end = ((__SYCL_GLOBAL__ MsanLaunchInfo *)__MsanLaunchInfo.get()) - ->GlobalShadowOffsetEnd; + auto shadow_begin = GetMsanLaunchInfo->GlobalShadowOffset; + auto shadow_end = GetMsanLaunchInfo->GlobalShadowOffsetEnd; if (addr < shadow_begin) { return addr + (shadow_begin - DG2_DEVICE_USM_BEGIN); } else { diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp index 8031758f04e2b..af14e2484b1be 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp @@ -68,7 +68,9 @@ ur_result_t MsanInterceptor::allocateMemory(ur_context_handle_t Context, *ResultPtr = Allocated; - ContextInfo->MaxAllocatedSize = std::max(ContextInfo->MaxAllocatedSize, Size); + if (Type != AllocType::DEVICE_USM) { + ContextInfo->CleanShadowSize = std::max(ContextInfo->CleanShadowSize, Size); + } // For host/shared usm, we only record the alloc size. if (Type != AllocType::DEVICE_USM) { @@ -138,15 +140,16 @@ ur_result_t MsanInterceptor::postLaunchKernel(ur_kernel_handle_t Kernel, // FIXME: We must use block operation here, until we support // urEventSetCallback auto Result = getContext()->urDdiTable.Queue.pfnFinish(Queue); + UR_CALL(LaunchInfo.Data.syncFromDevice(Queue)); if (Result == UR_RESULT_SUCCESS) { - const auto &Report = LaunchInfo.Data->Report; + const auto &Report = LaunchInfo.Data.Host.Report; if (!Report.Flag) { return Result; } - ReportUsesUninitializedValue(LaunchInfo.Data->Report, Kernel); + ReportUsesUninitializedValue(LaunchInfo.Data.Host.Report, Kernel); exitWithErrors(); } @@ -286,8 +289,8 @@ MsanInterceptor::registerDeviceGlobals(ur_program_handle_t Program) { MsanShadowMemoryDG2::IsDeviceUSM(GVInfo.Addr))) { UR_CALL(DeviceInfo->Shadow->EnqueuePoisonShadow(Queue, GVInfo.Addr, GVInfo.Size, 0)); - ContextInfo->MaxAllocatedSize = - std::max(ContextInfo->MaxAllocatedSize, GVInfo.Size); + ContextInfo->CleanShadowSize = + std::max(ContextInfo->CleanShadowSize, GVInfo.Size); } } } @@ -471,16 +474,20 @@ ur_result_t MsanInterceptor::prepareLaunch( // Set LaunchInfo auto ContextInfo = getContextInfo(LaunchInfo.Context); - LaunchInfo.Data->GlobalShadowOffset = DeviceInfo->Shadow->ShadowBegin; - LaunchInfo.Data->GlobalShadowOffsetEnd = DeviceInfo->Shadow->ShadowEnd; + LaunchInfo.Data.Host.GlobalShadowOffset = DeviceInfo->Shadow->ShadowBegin; + LaunchInfo.Data.Host.GlobalShadowOffsetEnd = DeviceInfo->Shadow->ShadowEnd; + + LaunchInfo.Data.Host.DeviceTy = DeviceInfo->Type; + LaunchInfo.Data.Host.Debug = getContext()->Options.Debug ? 1 : 0; - LaunchInfo.Data->DeviceTy = DeviceInfo->Type; - LaunchInfo.Data->Debug = getContext()->Options.Debug ? 1 : 0; + // Clean shadow + // Its content is always zero, and is used for unsupport memory types UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc( ContextInfo->Handle, DeviceInfo->Handle, nullptr, nullptr, - ContextInfo->MaxAllocatedSize, (void **)&LaunchInfo.Data->CleanShadow)); - UR_CALL(EnqueueUSMBlockingSet(Queue, (void *)LaunchInfo.Data->CleanShadow, 0, - ContextInfo->MaxAllocatedSize, 0, nullptr, + ContextInfo->CleanShadowSize, + (void **)&LaunchInfo.Data.Host.CleanShadow)); + UR_CALL(EnqueueUSMBlockingSet(Queue, (void *)LaunchInfo.Data.Host.CleanShadow, + 0, ContextInfo->CleanShadowSize, 0, nullptr, nullptr)); if (LaunchInfo.LocalWorkSize.empty()) { @@ -510,8 +517,8 @@ ur_result_t MsanInterceptor::prepareLaunch( // Write shadow memory offset for local memory if (KernelInfo.IsCheckLocals) { if (DeviceInfo->Shadow->AllocLocalShadow( - Queue, NumWG, LaunchInfo.Data->LocalShadowOffset, - LaunchInfo.Data->LocalShadowOffsetEnd) != UR_RESULT_SUCCESS) { + Queue, NumWG, LaunchInfo.Data.Host.LocalShadowOffset, + LaunchInfo.Data.Host.LocalShadowOffsetEnd) != UR_RESULT_SUCCESS) { getContext()->logger.warning( "Failed to allocate shadow memory for local " "memory, maybe the number of workgroup ({}) is too " @@ -520,18 +527,18 @@ ur_result_t MsanInterceptor::prepareLaunch( getContext()->logger.warning("Skip checking local memory of kernel <{}> ", GetKernelName(Kernel)); } else { - getContext()->logger.debug("ShadowMemory(Local, WorkGroup={}, {} - {})", - NumWG, - (void *)LaunchInfo.Data->LocalShadowOffset, - (void *)LaunchInfo.Data->LocalShadowOffsetEnd); + getContext()->logger.debug( + "ShadowMemory(Local, WorkGroup={}, {} - {})", NumWG, + (void *)LaunchInfo.Data.Host.LocalShadowOffset, + (void *)LaunchInfo.Data.Host.LocalShadowOffsetEnd); } } // Write shadow memory offset for private memory if (KernelInfo.IsCheckPrivates) { if (DeviceInfo->Shadow->AllocPrivateShadow( - Queue, NumWG, LaunchInfo.Data->PrivateShadowOffset, - LaunchInfo.Data->PrivateShadowOffsetEnd) != UR_RESULT_SUCCESS) { + Queue, NumWG, LaunchInfo.Data.Host.PrivateShadowOffset, + LaunchInfo.Data.Host.PrivateShadowOffsetEnd) != UR_RESULT_SUCCESS) { getContext()->logger.warning( "Failed to allocate shadow memory for private " "memory, maybe the number of workgroup ({}) is too " @@ -542,8 +549,8 @@ ur_result_t MsanInterceptor::prepareLaunch( } else { getContext()->logger.debug( "ShadowMemory(Private, WorkGroup={}, {} - {})", NumWG, - (void *)LaunchInfo.Data->PrivateShadowOffset, - (void *)LaunchInfo.Data->PrivateShadowOffsetEnd); + (void *)LaunchInfo.Data.Host.PrivateShadowOffset, + (void *)LaunchInfo.Data.Host.PrivateShadowOffsetEnd); } // Write local arguments info if (!KernelInfo.LocalArgs.empty()) { @@ -553,22 +560,26 @@ ur_result_t MsanInterceptor::prepareLaunch( getContext()->logger.debug("LocalArgs (argIndex={}, size={})", ArgIndex, ArgInfo.Size); } - UR_CALL(LaunchInfo.importLocalArgsInfo(Queue, LocalArgsInfo)); + UR_CALL(LaunchInfo.Data.importLocalArgsInfo(Queue, LocalArgsInfo)); } } + // sync msan runtime data to device side + UR_CALL(LaunchInfo.Data.syncToDevice(Queue)); + getContext()->logger.info( "LaunchInfo {} (GlobalShadow={}, LocalShadow={}, PrivateShadow={}, " "CleanShadow={}, LocalArgs={}, NumLocalArgs={}, Device={}, Debug={})", - (void *)LaunchInfo.Data, (void *)LaunchInfo.Data->GlobalShadowOffset, - (void *)LaunchInfo.Data->LocalShadowOffset, - (void *)LaunchInfo.Data->PrivateShadowOffset, - (void *)LaunchInfo.Data->CleanShadow, (void *)LaunchInfo.Data->LocalArgs, - LaunchInfo.Data->NumLocalArgs, ToString(LaunchInfo.Data->DeviceTy), - LaunchInfo.Data->Debug); - - ur_result_t URes = - EnqueueWriteGlobal("__MsanLaunchInfo", &LaunchInfo.Data, sizeof(uptr)); + (void *)LaunchInfo.Data.getDevicePtr(), + (void *)LaunchInfo.Data.Host.GlobalShadowOffset, + (void *)LaunchInfo.Data.Host.LocalShadowOffset, + (void *)LaunchInfo.Data.Host.PrivateShadowOffset, + (void *)LaunchInfo.Data.Host.CleanShadow, + (void *)LaunchInfo.Data.Host.LocalArgs, LaunchInfo.Data.Host.NumLocalArgs, + ToString(LaunchInfo.Data.Host.DeviceTy), LaunchInfo.Data.Host.Debug); + + ur_result_t URes = EnqueueWriteGlobal( + "__MsanLaunchInfo", &LaunchInfo.Data.DevicePtr, sizeof(uptr)); if (URes != UR_RESULT_SUCCESS) { getContext()->logger.info("EnqueueWriteGlobal(__MsanLaunchInfo) " "failed, maybe empty kernel: {}", @@ -641,47 +652,30 @@ ContextInfo::~ContextInfo() { ur_result_t USMLaunchInfo::initialize() { UR_CALL(getContext()->urDdiTable.Context.pfnRetain(Context)); UR_CALL(getContext()->urDdiTable.Device.pfnRetain(Device)); - UR_CALL(getContext()->urDdiTable.USM.pfnSharedAlloc( - Context, Device, nullptr, nullptr, sizeof(MsanLaunchInfo), - (void **)&Data)); - *Data = MsanLaunchInfo{}; return UR_RESULT_SUCCESS; } -USMLaunchInfo::~USMLaunchInfo() { - [[maybe_unused]] ur_result_t Result; - if (Data) { - if (Data->CleanShadow) { - Result = getContext()->urDdiTable.USM.pfnFree(Context, - (void *)Data->CleanShadow); - assert(Result == UR_RESULT_SUCCESS); - } - Result = getContext()->urDdiTable.USM.pfnFree(Context, (void *)Data); +MsanRuntimeDataWrapper::~MsanRuntimeDataWrapper() { + if (Host.CleanShadow) { + [[maybe_unused]] auto Result = + getContext()->urDdiTable.USM.pfnFree(Context, (void *)Host.CleanShadow); + assert(Result == UR_RESULT_SUCCESS); + } + if (DevicePtr) { + [[maybe_unused]] auto Result = + getContext()->urDdiTable.USM.pfnFree(Context, (void *)DevicePtr); assert(Result == UR_RESULT_SUCCESS); } +} + +USMLaunchInfo::~USMLaunchInfo() { + [[maybe_unused]] ur_result_t Result; Result = getContext()->urDdiTable.Context.pfnRelease(Context); assert(Result == UR_RESULT_SUCCESS); Result = getContext()->urDdiTable.Device.pfnRelease(Device); assert(Result == UR_RESULT_SUCCESS); } -ur_result_t USMLaunchInfo::importLocalArgsInfo( - ur_queue_handle_t Queue, const std::vector &LocalArgs) { - assert(!LocalArgs.empty()); - - Data->NumLocalArgs = LocalArgs.size(); - const size_t LocalArgsInfoSize = sizeof(MsanLocalArgsInfo) * LocalArgs.size(); - UR_CALL(getContext()->urDdiTable.USM.pfnSharedAlloc( - Context, Device, nullptr, nullptr, LocalArgsInfoSize, - ur_cast(&Data->LocalArgs))); - - UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( - Queue, true, Data->LocalArgs, LocalArgs.data(), LocalArgsInfoSize, 0, - nullptr, nullptr)); - - return UR_RESULT_SUCCESS; -} - } // namespace msan using namespace msan; diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.hpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.hpp index 5147172edbd71..fb50e36f5a513 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.hpp @@ -134,7 +134,7 @@ struct ProgramInfo { struct ContextInfo { ur_context_handle_t Handle; - size_t MaxAllocatedSize = 1024; + size_t CleanShadowSize = 1024; std::atomic RefCount = 1; std::vector DeviceList; @@ -148,8 +148,71 @@ struct ContextInfo { ~ContextInfo(); }; +struct MsanRuntimeDataWrapper { + MsanRuntimeData Host{}; + + MsanRuntimeData *DevicePtr = nullptr; + + ur_context_handle_t Context{}; + + ur_device_handle_t Device{}; + + MsanRuntimeDataWrapper(ur_context_handle_t Context, ur_device_handle_t Device) + : Context(Context), Device(Device) {} + + ~MsanRuntimeDataWrapper(); + + MsanRuntimeData *getDevicePtr() { + if (DevicePtr == nullptr) { + ur_result_t Result = getContext()->urDdiTable.USM.pfnDeviceAlloc( + Context, Device, nullptr, nullptr, sizeof(MsanRuntimeData), + (void **)&DevicePtr); + if (Result != UR_RESULT_SUCCESS) { + getContext()->logger.error( + "Failed to alloc device usm for msan runtime data: {}", Result); + } + } + return DevicePtr; + } + + ur_result_t syncFromDevice(ur_queue_handle_t Queue) { + UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( + Queue, true, ur_cast(&Host), getDevicePtr(), + sizeof(MsanRuntimeData), 0, nullptr, nullptr)); + + return UR_RESULT_SUCCESS; + } + + ur_result_t syncToDevice(ur_queue_handle_t Queue) { + UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( + Queue, true, getDevicePtr(), ur_cast(&Host), + sizeof(MsanRuntimeData), 0, nullptr, nullptr)); + + return UR_RESULT_SUCCESS; + } + + ur_result_t + importLocalArgsInfo(ur_queue_handle_t Queue, + const std::vector &LocalArgs) { + assert(!LocalArgs.empty()); + + Host.NumLocalArgs = LocalArgs.size(); + const size_t LocalArgsInfoSize = + sizeof(MsanLocalArgsInfo) * Host.NumLocalArgs; + UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc( + Context, Device, nullptr, nullptr, LocalArgsInfoSize, + ur_cast(&Host.LocalArgs))); + + UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( + Queue, true, Host.LocalArgs, &LocalArgs[0], LocalArgsInfoSize, 0, + nullptr, nullptr)); + + return UR_RESULT_SUCCESS; + } +}; + struct USMLaunchInfo { - MsanLaunchInfo *Data = nullptr; + MsanRuntimeDataWrapper Data; ur_context_handle_t Context = nullptr; ur_device_handle_t Device = nullptr; @@ -161,8 +224,9 @@ struct USMLaunchInfo { USMLaunchInfo(ur_context_handle_t Context, ur_device_handle_t Device, const size_t *GlobalWorkSize, const size_t *LocalWorkSize, const size_t *GlobalWorkOffset, uint32_t WorkDim) - : Context(Context), Device(Device), GlobalWorkSize(GlobalWorkSize), - GlobalWorkOffset(GlobalWorkOffset), WorkDim(WorkDim) { + : Data(Context, Device), Context(Context), Device(Device), + GlobalWorkSize(GlobalWorkSize), GlobalWorkOffset(GlobalWorkOffset), + WorkDim(WorkDim) { if (LocalWorkSize) { this->LocalWorkSize = std::vector(LocalWorkSize, LocalWorkSize + WorkDim); @@ -171,9 +235,6 @@ struct USMLaunchInfo { ~USMLaunchInfo(); ur_result_t initialize(); - ur_result_t - importLocalArgsInfo(ur_queue_handle_t Queue, - const std::vector &LocalArgs); }; struct DeviceGlobalInfo { diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_libdevice.hpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_libdevice.hpp index 8d65f54daa242..8d6fdf9a335f8 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_libdevice.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_libdevice.hpp @@ -44,7 +44,7 @@ struct MsanLocalArgsInfo { uint64_t Size = 0; }; -struct MsanLaunchInfo { +struct MsanRuntimeData { uintptr_t GlobalShadowOffset = 0; uintptr_t GlobalShadowOffsetEnd = 0;