Skip to content

Commit

Permalink
[SYCL][L0] Skip uninitialized buffer migration copy (#7168)
Browse files Browse the repository at this point in the history
Signed-off-by: Sergey V Maslov <sergey.v.maslov@intel.com>
  • Loading branch information
smaslov-intel committed Oct 25, 2022
1 parent 0417651 commit b0c3404
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 16 deletions.
48 changes: 38 additions & 10 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3127,8 +3127,7 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,

// intel extensions for GPU information
case PI_DEVICE_INFO_DEVICE_ID:
return ReturnValue(
pi_uint32{Device->ZeDeviceProperties->deviceId});
return ReturnValue(pi_uint32{Device->ZeDeviceProperties->deviceId});
case PI_DEVICE_INFO_PCI_ADDRESS: {
if (getenv("ZES_ENABLE_SYSMAN") == nullptr) {
zePrint("Set SYCL_ENABLE_PCI=1 to obtain PCI data.\n");
Expand Down Expand Up @@ -3958,8 +3957,12 @@ pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size,
if ((Flags & PI_MEM_FLAGS_HOST_PTR_USE) != 0 ||
(Flags & PI_MEM_FLAGS_HOST_PTR_COPY) != 0) {

// We don't yet know which device needs this buffer, so make the first
// device in the context be the master, and hold the initial valid
// allocation.
char *ZeHandleDst;
PI_CALL(Buffer->getZeHandle(ZeHandleDst, _pi_mem::write_only));
PI_CALL(Buffer->getZeHandle(ZeHandleDst, _pi_mem::write_only,
Context->Devices[0]));
if (Buffer->OnHost) {
// Do a host to host copy.
// For an imported HostPtr the copy is unneeded.
Expand Down Expand Up @@ -4331,7 +4334,7 @@ pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle,
// represent the buffer in this context) copy the data to a newly
// created device allocation.
char *ZeHandleDst;
PI_CALL(Buffer->getZeHandle(ZeHandleDst, _pi_mem::write_only));
PI_CALL(Buffer->getZeHandle(ZeHandleDst, _pi_mem::write_only, Device));

// zeCommandListAppendMemoryCopy must not be called from simultaneous
// threads with the same command list handle, so we need exclusive lock.
Expand Down Expand Up @@ -8757,9 +8760,16 @@ size_t _pi_buffer::getAlignment() const {
pi_result _pi_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode,
pi_device Device) {

// NOTE: There might be no valid allocation at all yet and we get
// here from piEnqueueKernelLaunch that would be doing the buffer
// initialization. In this case the Device is not null as kernel
// launch is always on a specific device.
if (!Device)
Device = LastDeviceWithValidAllocation;
PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE);
// If the device is still not selected then use the first one in
// the context of the buffer.
if (!Device)
Device = Context->Devices[0];

auto &Allocation = Allocations[Device];

Expand Down Expand Up @@ -8814,6 +8824,9 @@ pi_result _pi_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode,
// devices in the context have the same root.
PI_CALL(getZeHandle(ZeHandle, AccessMode, Context->SingleRootDevice));
Allocation.ReleaseAction = allocation_t::keep;
Allocation.ZeHandle = ZeHandle;
Allocation.Valid = true;
return PI_SUCCESS;
} else { // Create device allocation
if (enableBufferPooling()) {
Allocation.ReleaseAction = allocation_t::free;
Expand All @@ -8836,13 +8849,27 @@ pi_result _pi_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode,
if (Device == LastDeviceWithValidAllocation)
die("getZeHandle: last used allocation is not valid");

Allocation.Valid = true;
// For write-only access the allocation contents is not going to be used.
// So don't do anything to make it "valid".
if (AccessMode != _pi_mem::write_only) {
char *ZeHandleSrc;
bool NeedCopy = AccessMode != _pi_mem::write_only;
// It's also possible that the buffer doesn't have a valid allocation
// yet presumably when it is passed to a kernel that will perform
// it's intialization.
if (NeedCopy && !LastDeviceWithValidAllocation) {
NeedCopy = false;
}
char *ZeHandleSrc = nullptr;
if (NeedCopy) {
PI_CALL(getZeHandle(ZeHandleSrc, _pi_mem::read_only,
LastDeviceWithValidAllocation));
// It's possible with the single root-device contexts that
// the buffer is represented by the single root-device
// allocation and then skip the copy to itself.
if (ZeHandleSrc == ZeHandle)
NeedCopy = false;
}

if (NeedCopy) {
// Copy valid buffer data to this allocation.
// TODO: see if we should better use peer's device allocation used
// directly, if that capability is reported with zeDeviceCanAccessPeer,
Expand All @@ -8851,7 +8878,7 @@ pi_result _pi_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode,
//
// zeCommandListAppendMemoryCopy must not be called from simultaneous
// threads with the same command list handle, so we need exclusive lock.
ze_bool_t P2P;
ze_bool_t P2P = false;
ZE_CALL(
zeDeviceCanAccessPeer,
(Device->ZeDevice, LastDeviceWithValidAllocation->ZeDevice, &P2P));
Expand Down Expand Up @@ -8895,9 +8922,10 @@ pi_result _pi_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode,
Size, nullptr, 0, nullptr));
}
}
Allocation.Valid = true;
LastDeviceWithValidAllocation = Device;
}

LastDeviceWithValidAllocation = Device;
// Invalidate other allocations that would become not valid if
// this access is not read-only.
if (AccessMode != _pi_mem::read_only) {
Expand Down
8 changes: 2 additions & 6 deletions sycl/plugins/level_zero/pi_level_zero.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1131,12 +1131,8 @@ struct _pi_buffer final : _pi_mem {
}
}

// Make first device in the context be the master. Mark that
// allocation (yet to be made) having "valid" data. And real
// allocation and initialization should follow the buffer
// construction with a "write_only" access copy.
LastDeviceWithValidAllocation = Context->Devices[0];
Allocations[LastDeviceWithValidAllocation].Valid = true;
// This initialization does not end up with any valid allocation yet.
LastDeviceWithValidAllocation = nullptr;
}

// Sub-buffer constructor
Expand Down

0 comments on commit b0c3404

Please sign in to comment.