diff --git a/sycl/include/sycl/detail/pi.def b/sycl/include/sycl/detail/pi.def index e24c00892b266..0133bcfa7a100 100644 --- a/sycl/include/sycl/detail/pi.def +++ b/sycl/include/sycl/detail/pi.def @@ -174,4 +174,28 @@ _PI_API(piextCommandBufferMemBufferRead) _PI_API(piextCommandBufferMemBufferReadRect) _PI_API(piextEnqueueCommandBuffer) +_PI_API(piextUSMPitchedAlloc) + +// Bindless Images +_PI_API(piextMemUnsampledImageHandleDestroy) +_PI_API(piextMemSampledImageHandleDestroy) +_PI_API(piextBindlessImageSamplerCreate) +_PI_API(piextMemImageAllocate) +_PI_API(piextMemImageFree) +_PI_API(piextMemUnsampledImageCreate) +_PI_API(piextMemSampledImageCreate) +_PI_API(piextMemImageCopy) +_PI_API(piextMemImageGetInfo) +_PI_API(piextMemMipmapGetLevel) +_PI_API(piextMemMipmapFree) + +// Interop +_PI_API(piextMemImportOpaqueFD) +_PI_API(piextMemReleaseInterop) +_PI_API(piextMemMapExternalArray) +_PI_API(piextImportExternalSemaphoreOpaqueFD) +_PI_API(piextDestroyExternalSemaphore) +_PI_API(piextWaitExternalSemaphore) +_PI_API(piextSignalExternalSemaphore) + #undef _PI_API diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index ef37673ca117e..4445cfb124a31 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -99,9 +99,53 @@ // 14.34 Added command-buffer extension methods // 14.35 Added piextEnablePeerAccess, piextDisablePeerAccess, // piextPeerAccessGetInfo, and pi_peer_attr enum. +// 14.36 Adding support for experimental bindless images. This includes: +// - Added device info queries +// - Device queries for bindless image support +// - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT +// - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_SHARED_USM_SUPPORT +// - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_1D_USM_SUPPORT +// - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_2D_USM_SUPPORT +// - Device queries for pitched USM allocations +// - PI_EXT_ONEAPI_DEVICE_INFO_IMAGE_PITCH_ALIGN +// - PI_EXT_ONEAPI_DEVICE_INFO_MAX_IMAGE_LINEAR_WIDTH +// - PI_EXT_ONEAPI_DEVICE_INFO_MAX_IMAGE_LINEAR_HEIGHT +// - PI_EXT_ONEAPI_DEVICE_INFO_MAX_IMAGE_LINEAR_PITCH +// - Device queries for mipmap image support +// - PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_SUPPORT +// - PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_ANISOTROPY_SUPPORT +// - PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_MAX_ANISOTROPY +// - PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_LEVEL_REFERENCE_SUPPORT +// - Device queries for interop memory support +// - PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_MEMORY_IMPORT_SUPPORT +// - PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_MEMORY_EXPORT_SUPPORT +// - PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_SEMAPHORE_IMPORT_SUPPORT +// - PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT +// - Added PI_IMAGE_INFO_DEPTH to _pi_image_info +// - Added _pi_image_copy_flags enum to determine direction of copy +// - Added new extension functions +// - piextBindlessImageSamplerCreate +// - piextUSMPitchedAlloc +// - piextMemUnsampledImageHandleDestroy +// - piextMemSampledImageHandleDestroy +// - piextMemImageAllocate +// - piextMemImageFree +// - piextMemUnsampledImageCreate +// - piextMemSampledImageCreate +// - piextMemImageCopy +// - piextMemImageGetInfo +// - piextMemMipmapGetLevel +// - piextMemMipmapFree +// - piextMemImportOpaqueFD +// - piextMemMapExternalArray +// - piextMemReleaseInterop +// - piextImportExternalSemaphoreOpaqueFD +// - piextDestroyExternalSemaphore +// - piextWaitExternalSemaphore +// - piextSignalExternalSemaphore #define _PI_H_VERSION_MAJOR 14 -#define _PI_H_VERSION_MINOR 35 +#define _PI_H_VERSION_MINOR 36 #define _PI_STRING_HELPER(a) #a #define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b) @@ -363,6 +407,25 @@ typedef enum { PI_EXT_INTEL_DEVICE_INFO_MEM_CHANNEL_SUPPORT = 0x20008, // The number of max registers per block (device specific) PI_EXT_CODEPLAY_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP = 0x20009, + + // Bindless images, mipmaps, interop + PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT = 0x20100, + PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_SHARED_USM_SUPPORT = 0x20101, + PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_1D_USM_SUPPORT = 0x20102, + PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_2D_USM_SUPPORT = 0x20103, + PI_EXT_ONEAPI_DEVICE_INFO_IMAGE_PITCH_ALIGN = 0x20104, + PI_EXT_ONEAPI_DEVICE_INFO_MAX_IMAGE_LINEAR_WIDTH = 0x20105, + PI_EXT_ONEAPI_DEVICE_INFO_MAX_IMAGE_LINEAR_HEIGHT = 0x20106, + PI_EXT_ONEAPI_DEVICE_INFO_MAX_IMAGE_LINEAR_PITCH = 0x20107, + PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_SUPPORT = 0x20108, + PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_ANISOTROPY_SUPPORT = 0x20109, + PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_MAX_ANISOTROPY = 0x2010A, + PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_LEVEL_REFERENCE_SUPPORT = 0x2010B, + PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_MEMORY_IMPORT_SUPPORT = 0x2010C, + PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_MEMORY_EXPORT_SUPPORT = 0x2010D, + PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_SEMAPHORE_IMPORT_SUPPORT = 0x2010E, + PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT = 0x2010F, + } _pi_device_info; typedef enum { @@ -548,6 +611,12 @@ typedef enum { PI_IMAGE_CHANNEL_TYPE_FLOAT = 0x10DE } _pi_image_channel_type; +typedef enum { + PI_IMAGE_COPY_HOST_TO_DEVICE = 0, + PI_IMAGE_COPY_DEVICE_TO_HOST = 1, + PI_IMAGE_COPY_DEVICE_TO_DEVICE = 2 +} _pi_image_copy_flags; + typedef enum { PI_BUFFER_CREATE_TYPE_REGION = 0x1220 } _pi_buffer_create_type; const pi_bool PI_TRUE = 1; @@ -590,6 +659,7 @@ constexpr pi_sampler_properties PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS = 0x1152; constexpr pi_sampler_properties PI_SAMPLER_PROPERTIES_ADDRESSING_MODE = 0x1153; constexpr pi_sampler_properties PI_SAMPLER_PROPERTIES_FILTER_MODE = 0x1154; +constexpr pi_sampler_properties PI_SAMPLER_PROPERTIES_MIP_FILTER_MODE = 0x1155; using pi_memory_order_capabilities = pi_bitfield; constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_RELAXED = 0x01; @@ -707,6 +777,8 @@ using pi_kernel_info = _pi_kernel_info; using pi_profiling_info = _pi_profiling_info; using pi_kernel_cache_config = _pi_kernel_cache_config; +using pi_image_copy_flags = _pi_image_copy_flags; + // For compatibility with OpenCL define this not as enum. using pi_device_partition_property = intptr_t; static constexpr pi_device_partition_property PI_DEVICE_PARTITION_EQUALLY = @@ -1010,6 +1082,10 @@ using pi_program = _pi_program *; using pi_kernel = _pi_kernel *; using pi_event = _pi_event *; using pi_sampler = _pi_sampler *; +using pi_image_handle = pi_uint64; +using pi_image_mem_handle = void *; +using pi_interop_mem_handle = pi_uint64; +using pi_interop_semaphore_handle = pi_uint64; typedef struct { pi_image_channel_order image_channel_order; @@ -1850,6 +1926,22 @@ __SYCL_EXPORT pi_result piextUSMSharedAlloc(void **result_ptr, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment); +/// Allocates memory accessible on device +/// +/// \param result_ptr contains the allocated memory +/// \param result_pitch contains the returned memory pitch +/// \param context is the pi_context +/// \param device is the device the memory will be allocated on +/// \param properties are optional allocation properties +/// \param width_in_bytes is the width of the allocation in bytes +/// \param height is the height of the allocation in rows +/// \param element_size_bytes is the size in bytes of an element in the +/// allocation +__SYCL_EXPORT pi_result piextUSMPitchedAlloc( + void **result_ptr, size_t *result_pitch, pi_context context, + pi_device device, pi_usm_mem_properties *properties, size_t width_in_bytes, + size_t height, unsigned int element_size_bytes); + /// Indicates that the allocated USM memory is no longer needed on the runtime /// side. The actual freeing of the memory may be done in a blocking or deferred /// manner, e.g. to avoid issues with indirect memory access from kernels. @@ -2363,6 +2455,222 @@ piextEnqueueCommandBuffer(pi_ext_command_buffer command_buffer, pi_queue queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event); +/// API to destroy bindless unsampled image handles. +/// +/// \param context is the pi_context +/// \param device is the pi_device +/// \param handle is the image handle +__SYCL_EXPORT pi_result piextMemUnsampledImageHandleDestroy( + pi_context context, pi_device device, pi_image_handle handle); + +/// API to destroy bindless sampled image handles. +/// +/// \param context is the pi_context +/// \param handle is the image handle +__SYCL_EXPORT pi_result piextMemSampledImageHandleDestroy( + pi_context context, pi_device device, pi_image_handle handle); + +/// API to allocate memory for bindless images. +/// +/// \param context is the pi_context +/// \param device is the pi_device +/// \param flags are extra flags to pass (currently unused) +/// \param image_format format of the image (channel order and data type) +/// \param image_desc image descriptor +/// \param ret_mem is the returning memory handle to newly allocated memory +__SYCL_EXPORT pi_result piextMemImageAllocate(pi_context context, + pi_device device, + pi_image_format *image_format, + pi_image_desc *image_desc, + pi_image_mem_handle *ret_mem); + +/// API to retrieve individual image from mipmap. +/// +/// \param context is the pi_context +/// \param device is the pi_device +/// \param mip_mem is the memory handle to the mipmap +/// \param level is the requested level of the mipmap +/// \param ret_mem is the returning memory handle to the individual image +__SYCL_EXPORT pi_result piextMemMipmapGetLevel(pi_context context, + pi_device device, + pi_image_mem_handle mip_mem, + unsigned int level, + pi_image_mem_handle *ret_mem); + +/// API to free memory for bindless images. +/// +/// \param context is the pi_context +/// \param device is the pi_device +/// \param memory_handle is the handle to image memory to be freed +__SYCL_EXPORT pi_result piextMemImageFree(pi_context context, pi_device device, + pi_image_mem_handle memory_handle); + +/// API to free mipmap memory for bindless images. +/// +/// \param context is the pi_context +/// \param device is the pi_device +/// \param memory_handle is the handle to image memory to be freed +__SYCL_EXPORT pi_result piextMemMipmapFree(pi_context context, pi_device device, + pi_image_mem_handle memory_handle); + +/// API to create bindless image handles. +/// +/// \param context is the pi_context +/// \param device is the pi_device +/// \param img_mem is the handle to memory from which to create the image +/// \param image_format format of the image (channel order and data type) +/// \param image_desc image descriptor +/// \param ret_mem is the returning pi_mem image object +/// \param ret_handle is the returning memory handle to newly allocated memory +__SYCL_EXPORT pi_result piextMemUnsampledImageCreate( + pi_context context, pi_device device, pi_image_mem_handle img_mem, + pi_image_format *image_format, pi_image_desc *image_desc, pi_mem *ret_mem, + pi_image_handle *ret_handle); + +/// API to create sampled bindless image handles. +/// +/// \param context is the pi_context +/// \param device is the pi_device +/// \param img_mem is the handle to memory from which to create the image +/// \param image_format format of the image (channel order and data type) +/// \param image_desc image descriptor +/// \param sampler is the pi_sampler +/// \param ret_mem is the returning pi_mem image object +/// \param ret_handle is the returning memory handle to newly allocated memory +__SYCL_EXPORT pi_result piextMemSampledImageCreate( + pi_context context, pi_device device, pi_image_mem_handle img_mem, + pi_image_format *image_format, pi_image_desc *image_desc, + pi_sampler sampler, pi_mem *ret_mem, pi_image_handle *ret_handle); + +/// API to create samplers for bindless images. +/// +/// \param context is the pi_context +/// \param device is the pi_device +/// \param sampler_properties is the pointer to the sampler properties bitfield +/// \param min_mipmap_level_clamp is the minimum mipmap level to sample from +/// \param max_mipmap_level_clamp is the maximum mipmap level to sample from +/// \param max_anisotropy is the maximum anisotropic ratio +/// \param result_sampler is the returned sampler +__SYCL_EXPORT pi_result piextBindlessImageSamplerCreate( + pi_context context, const pi_sampler_properties *sampler_properties, + float min_mipmap_level_clamp, float max_mipmap_level_clamp, + float max_anisotropy, pi_sampler *result_sampler); + +/// API to copy image data Host to Device or Device to Host. +/// +/// \param queue is the queue to submit to +/// \param dst_ptr is the location the data will be copied to +/// \param src_ptr is the data to be copied +/// \param image_format format of the image (channel order and data type) +/// \param image_desc image descriptor +/// \param flags flags describing copy direction (H2D or D2H) +/// \param src_offset is the offset into the source image/memory +/// \param dst_offset is the offset into the destination image/memory +/// \param copy_extent is the extent (region) of the image/memory to copy +/// \param host_extent is the extent (region) of the memory on the host +/// \param num_events_in_wait_list is the number of events in the wait list +/// \param event_wait_list is the list of events to wait on before copying +/// \param event is the returned event representing this operation +__SYCL_EXPORT pi_result piextMemImageCopy( + pi_queue command_queue, void *dst_ptr, void *src_ptr, + const pi_image_format *image_format, const pi_image_desc *image_desc, + const pi_image_copy_flags flags, pi_image_offset src_offset, + pi_image_offset dst_offset, pi_image_region copy_extent, + pi_image_region host_extent, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event); + +/// API to query an image memory handle for specific properties. +/// +/// \param mem_handle is the handle to the image memory +/// \param param_name is the queried info name +/// \param param_value is the returned query value +/// \param param_value_size_ret is the returned query value size +__SYCL_EXPORT pi_result piextMemImageGetInfo( + const pi_image_mem_handle mem_handle, pi_image_info param_name, + void *param_value, size_t *param_value_size_ret); + +/// API to import external memory in the form of a file descriptor. +/// +/// \param context is the pi_context +/// \param device is the pi_device +/// \param size is the size of the external memory +/// \param file_descriptor is the file descriptor +/// \param ret_handle is the returned interop memory handle to the external +/// memory +__SYCL_EXPORT pi_result +piextMemImportOpaqueFD(pi_context context, pi_device device, size_t size, + int file_descriptor, pi_interop_mem_handle *ret_handle); + +/// API to map an interop memory handle to an image memory handle. +/// +/// \param context is the pi_context +/// \param device is the pi_device +/// \param image_format format of the image (channel order and data type) +/// \param image_desc image descriptor +/// \param mem_handle is the interop memory handle to the external memory +/// \param ret_mem is the returned image memory handle to the externally +/// allocated memory +__SYCL_EXPORT pi_result piextMemMapExternalArray( + pi_context context, pi_device device, pi_image_format *image_format, + pi_image_desc *image_desc, pi_interop_mem_handle mem_handle, + pi_image_mem_handle *ret_mem); + +/// API to destroy interop memory. +/// +/// \param context is the pi_context +/// \param device is the pi_device +/// \param memory_handle is the handle to interop memory to be freed +__SYCL_EXPORT pi_result piextMemReleaseInterop( + pi_context context, pi_device device, pi_interop_mem_handle memory_handle); + +/// API to import an external semaphore in the form of a file descriptor. +/// +/// \param context is the pi_context +/// \param device is the pi_device +/// \param file_descriptor is the file descriptor +/// \param ret_handle is the returned interop semaphore handle to the external +/// semaphore +__SYCL_EXPORT pi_result piextImportExternalSemaphoreOpaqueFD( + pi_context context, pi_device device, int file_descriptor, + pi_interop_semaphore_handle *ret_handle); + +/// API to destroy the external semaphore handle. +/// +/// \param context is the pi_context +/// \param device is the pi_device +/// \param sem_handle is the interop semaphore handle to the external semaphore +/// to be destroyed +__SYCL_EXPORT pi_result +piextDestroyExternalSemaphore(pi_context context, pi_device device, + pi_interop_semaphore_handle sem_handle); + +/// API to instruct the queue with a non-blocking wait on an external semaphore. +/// +/// \param command_queue is the queue instructed to wait +/// \param sem_handle is the interop semaphore handle +/// \param num_events_in_wait_list is the number of events in the wait list +/// \param event_wait_list is the list of events to wait on before this +/// operation +/// \param event is the returned event representing this operation +__SYCL_EXPORT pi_result piextWaitExternalSemaphore( + pi_queue command_queue, pi_interop_semaphore_handle sem_handle, + pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, + pi_event *event); + +/// API to instruct the queue to signal the external semaphore handle once all +/// previous commands have completed execution. +/// +/// \param command_queue is the queue instructed to signal +/// \param sem_handle is the interop semaphore handle to signal +/// \param num_events_in_wait_list is the number of events in the wait list +/// \param event_wait_list is the list of events to wait on before this +/// operation +/// \param event is the returned event representing this operation +__SYCL_EXPORT pi_result piextSignalExternalSemaphore( + pi_queue command_queue, pi_interop_semaphore_handle sem_handle, + pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, + pi_event *event); + struct _pi_plugin { // PI version supported by host passed to the plugin. The Plugin // checks and writes the appropriate Function Pointers in diff --git a/sycl/include/sycl/detail/pi.hpp b/sycl/include/sycl/detail/pi.hpp index c954ce37510a3..65f447a144b02 100644 --- a/sycl/include/sycl/detail/pi.hpp +++ b/sycl/include/sycl/detail/pi.hpp @@ -156,6 +156,13 @@ using PiExtSyncPoint = ::pi_ext_sync_point; using PiExtCommandBuffer = ::pi_ext_command_buffer; using PiExtCommandBufferDesc = ::pi_ext_command_buffer_desc; using PiPeerAttr = ::pi_peer_attr; +using PiImageHandle = ::pi_image_handle; +using PiImageMemHandle = ::pi_image_mem_handle; +using PiImageCopyFlags = ::pi_image_copy_flags; +using PiInteropMemHandle = ::pi_interop_mem_handle; +using PiInteropSemaphoreHandle = ::pi_interop_semaphore_handle; +using PiImageOffset = ::pi_image_offset_struct; +using PiImageRegion = ::pi_image_region_struct; __SYCL_EXPORT void contextSetExtendedDeleter(const sycl::context &constext, pi_context_extended_deleter func, diff --git a/sycl/plugins/cuda/CMakeLists.txt b/sycl/plugins/cuda/CMakeLists.txt index 99f6f601ee46a..701b6bc4e6555 100644 --- a/sycl/plugins/cuda/CMakeLists.txt +++ b/sycl/plugins/cuda/CMakeLists.txt @@ -64,6 +64,8 @@ add_sycl_plugin(cuda "../unified_runtime/ur/adapters/cuda/enqueue.cpp" "../unified_runtime/ur/adapters/cuda/event.cpp" "../unified_runtime/ur/adapters/cuda/event.hpp" + "../unified_runtime/ur/adapters/cuda/image.cpp" + "../unified_runtime/ur/adapters/cuda/image.hpp" "../unified_runtime/ur/adapters/cuda/kernel.cpp" "../unified_runtime/ur/adapters/cuda/kernel.hpp" "../unified_runtime/ur/adapters/cuda/memory.cpp" diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 85b455309f64e..c7691d2a07231 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -158,6 +158,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piextUSMHostAlloc, pi2ur::piextUSMHostAlloc) _PI_CL(piextUSMDeviceAlloc, pi2ur::piextUSMDeviceAlloc) _PI_CL(piextUSMSharedAlloc, pi2ur::piextUSMSharedAlloc) + _PI_CL(piextUSMPitchedAlloc, pi2ur::piextUSMPitchedAlloc) _PI_CL(piextUSMFree, pi2ur::piextUSMFree) _PI_CL(piextUSMEnqueueMemset, pi2ur::piextUSMEnqueueMemset) _PI_CL(piextUSMEnqueueMemcpy, pi2ur::piextUSMEnqueueMemcpy) @@ -196,10 +197,38 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piextCommandBufferMemBufferCopyRect, pi2ur::piextCommandBufferMemBufferCopyRect) _PI_CL(piextEnqueueCommandBuffer, pi2ur::piextEnqueueCommandBuffer) + // Peer to Peer _PI_CL(piextEnablePeerAccess, pi2ur::piextEnablePeerAccess) _PI_CL(piextDisablePeerAccess, pi2ur::piextDisablePeerAccess) _PI_CL(piextPeerAccessGetInfo, pi2ur::piextPeerAccessGetInfo) + + // Bindless Images + _PI_CL(piextMemUnsampledImageHandleDestroy, + pi2ur::piextMemUnsampledImageHandleDestroy) + _PI_CL(piextMemSampledImageHandleDestroy, + pi2ur::piextMemSampledImageHandleDestroy) + _PI_CL(piextMemImageAllocate, pi2ur::piextMemImageAllocate) + _PI_CL(piextMemImageFree, pi2ur::piextMemImageFree) + _PI_CL(piextMemUnsampledImageCreate, pi2ur::piextMemUnsampledImageCreate) + _PI_CL(piextMemSampledImageCreate, pi2ur::piextMemSampledImageCreate) + _PI_CL(piextBindlessImageSamplerCreate, + pi2ur::piextBindlessImageSamplerCreate) + _PI_CL(piextMemImageCopy, pi2ur::piextMemImageCopy) + _PI_CL(piextMemImageGetInfo, pi2ur::piextMemImageGetInfo) + + _PI_CL(piextMemMipmapGetLevel, pi2ur::piextMemMipmapGetLevel) + _PI_CL(piextMemMipmapFree, pi2ur::piextMemMipmapFree) + + _PI_CL(piextMemImportOpaqueFD, pi2ur::piextMemImportOpaqueFD) + _PI_CL(piextMemReleaseInterop, pi2ur::piextMemReleaseInterop) + _PI_CL(piextMemMapExternalArray, pi2ur::piextMemMapExternalArray) + _PI_CL(piextImportExternalSemaphoreOpaqueFD, + pi2ur::piextImportExternalSemaphoreOpaqueFD) + _PI_CL(piextDestroyExternalSemaphore, pi2ur::piextDestroyExternalSemaphore) + _PI_CL(piextWaitExternalSemaphore, pi2ur::piextWaitExternalSemaphore) + _PI_CL(piextSignalExternalSemaphore, pi2ur::piextSignalExternalSemaphore) + #undef _PI_CL return PI_SUCCESS; diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index 9272e8f557b99..f273220dd954e 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -1826,6 +1826,12 @@ pi_result piEnqueueMemImageWrite(pi_queue, pi_mem, pi_bool, pi_image_offset, DIE_NO_IMPLEMENTATION; } +pi_result piextBindlessImageSamplerCreate(pi_context, + const pi_sampler_properties *, float, + float, float, pi_sampler *) { + DIE_NO_IMPLEMENTATION; +} + pi_result piEnqueueMemImageCopy(pi_queue, pi_mem, pi_mem, pi_image_offset, pi_image_offset, pi_image_region, pi_uint32, const pi_event *, pi_event *) { @@ -1968,6 +1974,12 @@ pi_result piextUSMSharedAlloc(void **ResultPtr, pi_context Context, return PI_SUCCESS; } +pi_result piextUSMPitchedAlloc(void **, size_t *, pi_context, pi_device, + pi_usm_mem_properties *, size_t, size_t, + unsigned int) { + DIE_NO_IMPLEMENTATION; +} + pi_result piextUSMFree(pi_context Context, void *Ptr) { if (Context == nullptr) { return PI_ERROR_INVALID_CONTEXT; @@ -2267,6 +2279,96 @@ pi_result piextPeerAccessGetInfo(pi_device command_device, return ReturnValue(pi_int32{0}); } +pi_result piextMemUnsampledImageHandleDestroy(pi_context, pi_device, + pi_image_handle) { + DIE_NO_IMPLEMENTATION; +} + +pi_result piextMemSampledImageHandleDestroy(pi_context, pi_device, + pi_image_handle) { + DIE_NO_IMPLEMENTATION; +} + +pi_result piextMemImageAllocate(pi_context, pi_device, pi_image_format *, + pi_image_desc *, pi_image_mem_handle *) { + DIE_NO_IMPLEMENTATION; +} + +pi_result piextMemMipmapGetLevel(pi_context, pi_device, pi_image_mem_handle, + unsigned int, pi_image_mem_handle *) { + DIE_NO_IMPLEMENTATION; +} + +pi_result piextMemImageFree(pi_context, pi_device, pi_image_mem_handle) { + DIE_NO_IMPLEMENTATION; +} + +pi_result piextMemMipmapFree(pi_context, pi_device, pi_image_mem_handle) { + DIE_NO_IMPLEMENTATION; +} + +pi_result piextMemUnsampledImageCreate(pi_context, pi_device, + pi_image_mem_handle, pi_image_format *, + pi_image_desc *, pi_mem *, + pi_image_handle *) { + DIE_NO_IMPLEMENTATION; +} + +pi_result piextMemSampledImageCreate(pi_context, pi_device, pi_image_mem_handle, + pi_image_format *, pi_image_desc *, + pi_sampler, pi_mem *, pi_image_handle *) { + DIE_NO_IMPLEMENTATION; +} + +pi_result piextMemImageCopy(pi_queue, void *, void *, const pi_image_format *, + const pi_image_desc *, const pi_image_copy_flags, + pi_image_offset, pi_image_offset, pi_image_region, + pi_image_region, pi_uint32, const pi_event *, + pi_event *) { + DIE_NO_IMPLEMENTATION; +} + +pi_result piextMemImageGetInfo(const pi_image_mem_handle, pi_image_info, void *, + size_t *) { + DIE_NO_IMPLEMENTATION; +} + +pi_result piextMemImportOpaqueFD(pi_context, pi_device, size_t, int, + pi_interop_mem_handle *) { + DIE_NO_IMPLEMENTATION; +} + +pi_result piextMemMapExternalArray(pi_context, pi_device, pi_image_format *, + pi_image_desc *, pi_interop_mem_handle, + pi_image_mem_handle *) { + DIE_NO_IMPLEMENTATION; +} + +pi_result piextMemReleaseInterop(pi_context, pi_device, pi_interop_mem_handle) { + DIE_NO_IMPLEMENTATION; +} + +pi_result piextImportExternalSemaphoreOpaqueFD(pi_context, pi_device, int, + pi_interop_semaphore_handle *) { + DIE_NO_IMPLEMENTATION; +} + +pi_result piextDestroyExternalSemaphore(pi_context, pi_device, + pi_interop_semaphore_handle) { + DIE_NO_IMPLEMENTATION; +} + +pi_result piextWaitExternalSemaphore(pi_queue, pi_interop_semaphore_handle, + pi_uint32, const pi_event *, pi_event *) { + DIE_NO_IMPLEMENTATION; +} + +pi_result piextSignalExternalSemaphore(pi_queue, pi_interop_semaphore_handle, + pi_uint32, const pi_event *, + pi_event *) { + DIE_NO_IMPLEMENTATION; +} + #ifdef _WIN32 #define __SYCL_PLUGIN_DLL_NAME "pi_esimd_emulator.dll" #include "../common_win_pi_trace/common_win_pi_trace.hpp" diff --git a/sycl/plugins/level_zero/CMakeLists.txt b/sycl/plugins/level_zero/CMakeLists.txt index 8c5a0d4f92c43..bd24ffea80220 100755 --- a/sycl/plugins/level_zero/CMakeLists.txt +++ b/sycl/plugins/level_zero/CMakeLists.txt @@ -107,8 +107,9 @@ add_sycl_plugin(level_zero "../unified_runtime/ur/adapters/level_zero/context.hpp" "../unified_runtime/ur/adapters/level_zero/device.hpp" "../unified_runtime/ur/adapters/level_zero/event.hpp" - "../unified_runtime/ur/adapters/level_zero/memory.hpp" + "../unified_runtime/ur/adapters/level_zero/image.hpp" "../unified_runtime/ur/adapters/level_zero/kernel.hpp" + "../unified_runtime/ur/adapters/level_zero/memory.hpp" "../unified_runtime/ur/adapters/level_zero/platform.hpp" "../unified_runtime/ur/adapters/level_zero/program.hpp" "../unified_runtime/ur/adapters/level_zero/queue.hpp" @@ -120,8 +121,9 @@ add_sycl_plugin(level_zero "../unified_runtime/ur/adapters/level_zero/context.cpp" "../unified_runtime/ur/adapters/level_zero/device.cpp" "../unified_runtime/ur/adapters/level_zero/event.cpp" - "../unified_runtime/ur/adapters/level_zero/memory.cpp" + "../unified_runtime/ur/adapters/level_zero/image.cpp" "../unified_runtime/ur/adapters/level_zero/kernel.cpp" + "../unified_runtime/ur/adapters/level_zero/memory.cpp" "../unified_runtime/ur/adapters/level_zero/platform.cpp" "../unified_runtime/ur/adapters/level_zero/program.cpp" "../unified_runtime/ur/adapters/level_zero/queue.cpp" diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index c165d7b08c5c1..99fc21cece3b9 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -383,6 +383,140 @@ pi_result piKernelGetInfo(pi_kernel Kernel, pi_kernel_info ParamName, ParamValueSizeRet); } +__SYCL_EXPORT pi_result piextMemImageAllocate(pi_context Context, + pi_device Device, + pi_image_format *ImageFormat, + pi_image_desc *ImageDesc, + pi_image_mem_handle *RetMem) { + return pi2ur::piextMemImageAllocate(Context, Device, ImageFormat, ImageDesc, + RetMem); +} + +__SYCL_EXPORT pi_result piextMemUnsampledImageCreate( + pi_context Context, pi_device Device, pi_image_mem_handle ImgMem, + pi_image_format *ImageFormat, pi_image_desc *ImageDesc, pi_mem *RetMem, + pi_image_handle *RetHandle) { + return pi2ur::piextMemUnsampledImageCreate( + Context, Device, ImgMem, ImageFormat, ImageDesc, RetMem, RetHandle); +} + +__SYCL_EXPORT pi_result piextMemSampledImageCreate( + pi_context Context, pi_device Device, pi_image_mem_handle ImgMem, + pi_image_format *ImageFormat, pi_image_desc *ImageDesc, pi_sampler Sampler, + pi_mem *RetMem, pi_image_handle *RetHandle) { + return pi2ur::piextMemSampledImageCreate(Context, Device, ImgMem, ImageFormat, + ImageDesc, Sampler, RetMem, + RetHandle); +} + +__SYCL_EXPORT pi_result piextBindlessImageSamplerCreate( + pi_context Context, const pi_sampler_properties *SamplerProperties, + float MinMipmapLevelClamp, float MaxMipmapLevelClamp, float MaxAnisotropy, + pi_sampler *RetSampler) { + return pi2ur::piextBindlessImageSamplerCreate( + Context, SamplerProperties, MinMipmapLevelClamp, MaxMipmapLevelClamp, + MaxAnisotropy, RetSampler); +} + +__SYCL_EXPORT pi_result piextMemMipmapGetLevel(pi_context Context, + pi_device Device, + pi_image_mem_handle MipMem, + unsigned int Level, + pi_image_mem_handle *RetMem) { + return pi2ur::piextMemMipmapGetLevel(Context, Device, MipMem, Level, RetMem); +} + +__SYCL_EXPORT pi_result piextMemImageFree(pi_context Context, pi_device Device, + pi_image_mem_handle MemoryHandle) { + return pi2ur::piextMemImageFree(Context, Device, MemoryHandle); +} + +__SYCL_EXPORT pi_result piextMemMipmapFree(pi_context Context, pi_device Device, + pi_image_mem_handle MemoryHandle) { + return pi2ur::piextMemMipmapFree(Context, Device, MemoryHandle); +} + +__SYCL_EXPORT pi_result piextMemImageCopy( + pi_queue Queue, void *DstPtr, void *SrcPtr, + const pi_image_format *ImageFormat, const pi_image_desc *ImageDesc, + const pi_image_copy_flags Flags, pi_image_offset SrcOffset, + pi_image_offset DstOffset, pi_image_region CopyExtent, + pi_image_region HostExtent, pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *Event) { + return pi2ur::piextMemImageCopy(Queue, DstPtr, SrcPtr, ImageFormat, ImageDesc, + Flags, SrcOffset, DstOffset, CopyExtent, + HostExtent, NumEventsInWaitList, + EventWaitList, Event); +} + +__SYCL_EXPORT pi_result piextMemUnsampledImageHandleDestroy( + pi_context Context, pi_device Device, pi_image_handle Handle) { + return pi2ur::piextMemUnsampledImageHandleDestroy(Context, Device, Handle); +} + +__SYCL_EXPORT pi_result piextMemSampledImageHandleDestroy( + pi_context Context, pi_device Device, pi_image_handle Handle) { + return pi2ur::piextMemSampledImageHandleDestroy(Context, Device, Handle); +} + +__SYCL_EXPORT pi_result piextMemImageGetInfo(pi_image_mem_handle MemHandle, + pi_image_info ParamName, + void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piextMemImageGetInfo(MemHandle, ParamName, ParamValue, + ParamValueSizeRet); +} + +__SYCL_EXPORT pi_result +piextMemImportOpaqueFD(pi_context Context, pi_device Device, size_t Size, + int FileDescriptor, pi_interop_mem_handle *RetHandle) { + return pi2ur::piextMemImportOpaqueFD(Context, Device, Size, FileDescriptor, + RetHandle); +} + +__SYCL_EXPORT pi_result piextMemMapExternalArray( + pi_context Context, pi_device Device, pi_image_format *ImageFormat, + pi_image_desc *ImageDesc, pi_interop_mem_handle MemHandle, + pi_image_mem_handle *RetMem) { + return pi2ur::piextMemMapExternalArray(Context, Device, ImageFormat, + ImageDesc, MemHandle, RetMem); +} + +__SYCL_EXPORT pi_result piextMemReleaseInterop(pi_context Context, + pi_device Device, + pi_interop_mem_handle ExtMem) { + return pi2ur::piextMemReleaseInterop(Context, Device, ExtMem); +} + +__SYCL_EXPORT pi_result piextImportExternalSemaphoreOpaqueFD( + pi_context Context, pi_device Device, int FileDescriptor, + pi_interop_semaphore_handle *RetHandle) { + return pi2ur::piextImportExternalSemaphoreOpaqueFD(Context, Device, + FileDescriptor, RetHandle); +} + +__SYCL_EXPORT pi_result +piextDestroyExternalSemaphore(pi_context Context, pi_device Device, + pi_interop_semaphore_handle SemHandle) { + return pi2ur::piextDestroyExternalSemaphore(Context, Device, SemHandle); +} + +__SYCL_EXPORT pi_result piextWaitExternalSemaphore( + pi_queue Queue, pi_interop_semaphore_handle SemHandle, + pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, + pi_event *Event) { + return pi2ur::piextWaitExternalSemaphore( + Queue, SemHandle, NumEventsInWaitList, EventWaitList, Event); +} + +__SYCL_EXPORT pi_result piextSignalExternalSemaphore( + pi_queue Queue, pi_interop_semaphore_handle SemHandle, + pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, + pi_event *Event) { + return pi2ur::piextSignalExternalSemaphore( + Queue, SemHandle, NumEventsInWaitList, EventWaitList, Event); +} + pi_result piKernelGetGroupInfo(pi_kernel Kernel, pi_device Device, pi_kernel_group_info ParamName, size_t ParamValueSize, void *ParamValue, @@ -742,6 +876,16 @@ pi_result piextUSMSharedAlloc(void **ResultPtr, pi_context Context, Size, Alignment); } +__SYCL_EXPORT pi_result piextUSMPitchedAlloc( + void **ResultPtr, size_t *ResultPitch, pi_context Context, pi_device Device, + pi_usm_mem_properties *Properties, size_t WidthInBytes, size_t Height, + unsigned int ElementSizeBytes) { + + return pi2ur::piextUSMPitchedAlloc(ResultPtr, ResultPitch, Context, Device, + Properties, WidthInBytes, Height, + ElementSizeBytes); +} + pi_result piextUSMHostAlloc(void **ResultPtr, pi_context Context, pi_usm_mem_properties *Properties, size_t Size, pi_uint32 Alignment) { diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index fb6e850b7329f..297c6cae91cd9 100755 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -4,7 +4,7 @@ if (NOT DEFINED UNIFIED_RUNTIME_LIBRARY OR NOT DEFINED UNIFIED_RUNTIME_INCLUDE_D include(FetchContent) set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") - set(UNIFIED_RUNTIME_TAG c7b0caf4e3ce5330bd0669db6e8e498b48e8ad27) + set(UNIFIED_RUNTIME_TAG 3c6f02c7a76a0448a83932d93c2dbeff25af70aa) message(STATUS "Will fetch Unified Runtime from ${UNIFIED_RUNTIME_REPO}") FetchContent_Declare(unified-runtime @@ -91,6 +91,8 @@ add_sycl_library("ur_adapter_level_zero" SHARED "ur/adapters/level_zero/context.hpp" "ur/adapters/level_zero/device.hpp" "ur/adapters/level_zero/event.hpp" + "ur/adapters/level_zero/image.cpp" + "ur/adapters/level_zero/image.hpp" "ur/adapters/level_zero/memory.hpp" "ur/adapters/level_zero/kernel.hpp" "ur/adapters/level_zero/platform.hpp" @@ -142,6 +144,8 @@ if ("cuda" IN_LIST SYCL_ENABLE_PLUGINS) "ur/adapters/cuda/enqueue.cpp" "ur/adapters/cuda/event.cpp" "ur/adapters/cuda/event.hpp" + "ur/adapters/cuda/image.cpp" + "ur/adapters/cuda/image.hpp" "ur/adapters/cuda/kernel.cpp" "ur/adapters/cuda/kernel.hpp" "ur/adapters/cuda/memory.cpp" diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index eea9855860bd9..8fca2d3953417 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -786,335 +786,264 @@ inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, ur_device_info_t InfoType; switch (ParamName) { - case PI_DEVICE_INFO_TYPE: - InfoType = UR_DEVICE_INFO_TYPE; - break; - case PI_DEVICE_INFO_PARENT_DEVICE: - InfoType = UR_DEVICE_INFO_PARENT_DEVICE; - break; - case PI_DEVICE_INFO_PLATFORM: - InfoType = UR_DEVICE_INFO_PLATFORM; - break; - case PI_DEVICE_INFO_VENDOR_ID: - InfoType = UR_DEVICE_INFO_VENDOR_ID; - break; - case PI_DEVICE_INFO_UUID: - InfoType = UR_DEVICE_INFO_UUID; - break; - case PI_DEVICE_INFO_ATOMIC_64: - InfoType = UR_DEVICE_INFO_ATOMIC_64; - break; - case PI_DEVICE_INFO_EXTENSIONS: - InfoType = UR_DEVICE_INFO_EXTENSIONS; - break; - case PI_DEVICE_INFO_NAME: - InfoType = UR_DEVICE_INFO_NAME; - break; - case PI_DEVICE_INFO_COMPILER_AVAILABLE: - InfoType = UR_DEVICE_INFO_COMPILER_AVAILABLE; - break; - case PI_DEVICE_INFO_LINKER_AVAILABLE: - InfoType = UR_DEVICE_INFO_LINKER_AVAILABLE; - break; - case PI_DEVICE_INFO_MAX_COMPUTE_UNITS: - InfoType = UR_DEVICE_INFO_MAX_COMPUTE_UNITS; - break; - case PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS: - InfoType = UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS; - break; - case PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE: - InfoType = UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE; - break; - case PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES: - InfoType = UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES; - break; - case PI_DEVICE_INFO_MAX_CLOCK_FREQUENCY: - InfoType = UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY; - break; - case PI_DEVICE_INFO_ADDRESS_BITS: - InfoType = UR_DEVICE_INFO_ADDRESS_BITS; - break; - case PI_DEVICE_INFO_MAX_MEM_ALLOC_SIZE: - InfoType = UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE; - break; - case PI_DEVICE_INFO_GLOBAL_MEM_SIZE: - InfoType = UR_DEVICE_INFO_GLOBAL_MEM_SIZE; - break; - case PI_DEVICE_INFO_LOCAL_MEM_SIZE: - InfoType = UR_DEVICE_INFO_LOCAL_MEM_SIZE; - break; - case PI_DEVICE_INFO_IMAGE_SUPPORT: - InfoType = UR_DEVICE_INFO_IMAGE_SUPPORTED; - break; - case PI_DEVICE_INFO_HOST_UNIFIED_MEMORY: - InfoType = UR_DEVICE_INFO_HOST_UNIFIED_MEMORY; - break; - case PI_DEVICE_INFO_AVAILABLE: - InfoType = UR_DEVICE_INFO_AVAILABLE; - break; - case PI_DEVICE_INFO_VENDOR: - InfoType = UR_DEVICE_INFO_VENDOR; - break; - case PI_DEVICE_INFO_DRIVER_VERSION: - InfoType = UR_DEVICE_INFO_DRIVER_VERSION; - break; - case PI_DEVICE_INFO_VERSION: - InfoType = UR_DEVICE_INFO_VERSION; - break; - case PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES: - InfoType = UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES; - break; - case PI_DEVICE_INFO_REFERENCE_COUNT: - InfoType = UR_DEVICE_INFO_REFERENCE_COUNT; - break; - case PI_DEVICE_INFO_PARTITION_PROPERTIES: - InfoType = UR_DEVICE_INFO_SUPPORTED_PARTITIONS; - break; - case PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: - InfoType = UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN; - break; - case PI_DEVICE_INFO_PARTITION_TYPE: - InfoType = UR_DEVICE_INFO_PARTITION_TYPE; - break; - case PI_DEVICE_INFO_OPENCL_C_VERSION: - InfoType = UR_EXT_DEVICE_INFO_OPENCL_C_VERSION; - break; - case PI_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC: - InfoType = UR_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC; - break; - case PI_DEVICE_INFO_PRINTF_BUFFER_SIZE: - InfoType = UR_DEVICE_INFO_PRINTF_BUFFER_SIZE; - break; - case PI_DEVICE_INFO_PROFILE: - InfoType = UR_DEVICE_INFO_PROFILE; - break; - case PI_DEVICE_INFO_BUILT_IN_KERNELS: - InfoType = UR_DEVICE_INFO_BUILT_IN_KERNELS; - break; - case PI_DEVICE_INFO_QUEUE_PROPERTIES: - InfoType = UR_DEVICE_INFO_QUEUE_PROPERTIES; - break; - case PI_DEVICE_INFO_EXECUTION_CAPABILITIES: - InfoType = UR_DEVICE_INFO_EXECUTION_CAPABILITIES; - break; - case PI_DEVICE_INFO_ENDIAN_LITTLE: - InfoType = UR_DEVICE_INFO_ENDIAN_LITTLE; - break; - case PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: - InfoType = UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT; - break; - case PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION: - InfoType = UR_DEVICE_INFO_PROFILING_TIMER_RESOLUTION; - break; - case PI_DEVICE_INFO_LOCAL_MEM_TYPE: - InfoType = UR_DEVICE_INFO_LOCAL_MEM_TYPE; - break; - case PI_DEVICE_INFO_MAX_CONSTANT_ARGS: - InfoType = UR_DEVICE_INFO_MAX_CONSTANT_ARGS; - break; - case PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE: - InfoType = UR_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE; - break; - case PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: - InfoType = UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE; - break; - case PI_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE: - InfoType = UR_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE; - break; - case PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE: - InfoType = UR_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE; - break; - case PI_DEVICE_INFO_MAX_PARAMETER_SIZE: - InfoType = UR_DEVICE_INFO_MAX_PARAMETER_SIZE; - break; - case PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN: - InfoType = UR_DEVICE_INFO_MEM_BASE_ADDR_ALIGN; - break; - case PI_DEVICE_INFO_MAX_SAMPLERS: - InfoType = UR_DEVICE_INFO_MAX_SAMPLERS; - break; - case PI_DEVICE_INFO_MAX_READ_IMAGE_ARGS: - InfoType = UR_DEVICE_INFO_MAX_READ_IMAGE_ARGS; - break; - case PI_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS: - InfoType = UR_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS; - break; - case PI_DEVICE_INFO_SINGLE_FP_CONFIG: - InfoType = UR_DEVICE_INFO_SINGLE_FP_CONFIG; - break; - case PI_DEVICE_INFO_HALF_FP_CONFIG: - InfoType = UR_DEVICE_INFO_HALF_FP_CONFIG; - break; - case PI_DEVICE_INFO_DOUBLE_FP_CONFIG: - InfoType = UR_DEVICE_INFO_DOUBLE_FP_CONFIG; - break; - case PI_DEVICE_INFO_IMAGE2D_MAX_WIDTH: - InfoType = UR_DEVICE_INFO_IMAGE2D_MAX_WIDTH; - break; - case PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT: - InfoType = UR_DEVICE_INFO_IMAGE2D_MAX_HEIGHT; - break; - case PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH: - InfoType = UR_DEVICE_INFO_IMAGE3D_MAX_WIDTH; - break; - case PI_DEVICE_INFO_IMAGE3D_MAX_HEIGHT: - InfoType = UR_DEVICE_INFO_IMAGE3D_MAX_HEIGHT; - break; - case PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH: - InfoType = UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH; - break; - case PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE: - InfoType = UR_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE; - break; - case PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR: - InfoType = UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR; - break; - case PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR: - InfoType = UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR; - break; - case PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT: - InfoType = UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT; - break; - case PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT: - InfoType = UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT; - break; - case PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT: - InfoType = UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT; - break; - case PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT: - InfoType = UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT; - break; - case PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG: - InfoType = UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG; - break; - case PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG: - InfoType = UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG; - break; - case PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT: - InfoType = UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT; - break; - case PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT: - InfoType = UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT; - break; - case PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE: - InfoType = UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE; - break; - case PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE: - InfoType = UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE; - break; - case PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF: - InfoType = UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF; - break; - case PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF: - InfoType = UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF; - break; - case PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS: - InfoType = UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS; - break; - case PI_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: - InfoType = UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS; - break; - case PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: - InfoType = UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL; - break; - case PI_DEVICE_INFO_IL_VERSION: - InfoType = UR_DEVICE_INFO_IL_VERSION; - break; - case PI_DEVICE_INFO_USM_HOST_SUPPORT: - InfoType = UR_DEVICE_INFO_USM_HOST_SUPPORT; - break; - case PI_DEVICE_INFO_USM_DEVICE_SUPPORT: - InfoType = UR_DEVICE_INFO_USM_DEVICE_SUPPORT; - break; - case PI_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT: - InfoType = UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT; - break; - case PI_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT: - InfoType = UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT; - break; - case PI_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT: - InfoType = UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT; - break; - case PI_DEVICE_INFO_PCI_ADDRESS: - InfoType = UR_DEVICE_INFO_PCI_ADDRESS; - break; - case PI_DEVICE_INFO_GPU_EU_COUNT: - InfoType = UR_DEVICE_INFO_GPU_EU_COUNT; - break; - case PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH: - InfoType = UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH; - break; - case PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE: - InfoType = UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE; - break; - case PI_EXT_ONEAPI_DEVICE_INFO_IP_VERSION: - InfoType = UR_DEVICE_INFO_IP_VERSION; - break; - case PI_DEVICE_INFO_BUILD_ON_SUBDEVICE: - InfoType = UR_DEVICE_INFO_BUILD_ON_SUBDEVICE; - break; - case PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D: - InfoType = UR_DEVICE_INFO_MAX_WORK_GROUPS_3D; - break; - case PI_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE: - InfoType = UR_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE; - break; - case PI_DEVICE_INFO_DEVICE_ID: - InfoType = UR_DEVICE_INFO_DEVICE_ID; - break; - case PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY: - InfoType = UR_DEVICE_INFO_GLOBAL_MEM_FREE; - break; - case PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE: - InfoType = UR_DEVICE_INFO_MEMORY_CLOCK_RATE; - break; - case PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH: - InfoType = UR_DEVICE_INFO_MEMORY_BUS_WIDTH; - break; - case PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES: - InfoType = UR_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES; - break; - case PI_DEVICE_INFO_GPU_SLICES: - InfoType = UR_DEVICE_INFO_GPU_EU_SLICES; - break; - case PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE: - InfoType = UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE; - break; - case PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU: - InfoType = UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU; - break; - case PI_DEVICE_INFO_MAX_MEM_BANDWIDTH: - InfoType = UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH; - break; - case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS: - InfoType = UR_DEVICE_INFO_BFLOAT16; - break; - case PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: - InfoType = UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES; - break; - case PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: - InfoType = UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES; - break; - case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: - InfoType = UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES; - break; - case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: - InfoType = UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES; - break; - case PI_EXT_INTEL_DEVICE_INFO_MEM_CHANNEL_SUPPORT: - InfoType = UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT; - break; - case PI_DEVICE_INFO_IMAGE_SRGB: - InfoType = UR_DEVICE_INFO_IMAGE_SRGB; - break; - case PI_DEVICE_INFO_BACKEND_VERSION: { - InfoType = UR_DEVICE_INFO_BACKEND_RUNTIME_VERSION; - break; - } - case PI_EXT_CODEPLAY_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP: { - InfoType = UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP; - break; - } +#define PI_TO_UR_MAP_DEVICE_INFO(FROM, TO) \ + case FROM: { \ + InfoType = TO; \ + break; \ + } + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_TYPE, UR_DEVICE_INFO_TYPE) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PARENT_DEVICE, + UR_DEVICE_INFO_PARENT_DEVICE) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PLATFORM, UR_DEVICE_INFO_PLATFORM) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_VENDOR_ID, UR_DEVICE_INFO_VENDOR_ID) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_UUID, UR_DEVICE_INFO_UUID) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_ATOMIC_64, UR_DEVICE_INFO_ATOMIC_64) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_EXTENSIONS, + UR_DEVICE_INFO_EXTENSIONS) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_NAME, UR_DEVICE_INFO_NAME) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_COMPILER_AVAILABLE, + UR_DEVICE_INFO_COMPILER_AVAILABLE) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_LINKER_AVAILABLE, + UR_DEVICE_INFO_LINKER_AVAILABLE) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MAX_COMPUTE_UNITS, + UR_DEVICE_INFO_MAX_COMPUTE_UNITS) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS, + UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE, + UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES, + UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MAX_CLOCK_FREQUENCY, + UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_ADDRESS_BITS, + UR_DEVICE_INFO_ADDRESS_BITS) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MAX_MEM_ALLOC_SIZE, + UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_GLOBAL_MEM_SIZE, + UR_DEVICE_INFO_GLOBAL_MEM_SIZE) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_LOCAL_MEM_SIZE, + UR_DEVICE_INFO_LOCAL_MEM_SIZE) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_IMAGE_SUPPORT, + UR_DEVICE_INFO_IMAGE_SUPPORTED) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_HOST_UNIFIED_MEMORY, + UR_DEVICE_INFO_HOST_UNIFIED_MEMORY) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_AVAILABLE, UR_DEVICE_INFO_AVAILABLE) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_VENDOR, UR_DEVICE_INFO_VENDOR) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_DRIVER_VERSION, + UR_DEVICE_INFO_DRIVER_VERSION) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_VERSION, UR_DEVICE_INFO_VERSION) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES, + UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_REFERENCE_COUNT, + UR_DEVICE_INFO_REFERENCE_COUNT) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PARTITION_PROPERTIES, + UR_DEVICE_INFO_SUPPORTED_PARTITIONS) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN, + UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PARTITION_TYPE, + UR_DEVICE_INFO_PARTITION_TYPE) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_OPENCL_C_VERSION, + UR_EXT_DEVICE_INFO_OPENCL_C_VERSION) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC, + UR_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PRINTF_BUFFER_SIZE, + UR_DEVICE_INFO_PRINTF_BUFFER_SIZE) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PROFILE, UR_DEVICE_INFO_PROFILE) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_BUILT_IN_KERNELS, + UR_DEVICE_INFO_BUILT_IN_KERNELS) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_QUEUE_PROPERTIES, + UR_DEVICE_INFO_QUEUE_PROPERTIES) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_EXECUTION_CAPABILITIES, + UR_DEVICE_INFO_EXECUTION_CAPABILITIES) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_ENDIAN_LITTLE, + UR_DEVICE_INFO_ENDIAN_LITTLE) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT, + UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION, + UR_DEVICE_INFO_PROFILING_TIMER_RESOLUTION) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_LOCAL_MEM_TYPE, + UR_DEVICE_INFO_LOCAL_MEM_TYPE) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MAX_CONSTANT_ARGS, + UR_DEVICE_INFO_MAX_CONSTANT_ARGS) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE, + UR_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE, + UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE, + UR_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE, + UR_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MAX_PARAMETER_SIZE, + UR_DEVICE_INFO_MAX_PARAMETER_SIZE) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN, + UR_DEVICE_INFO_MEM_BASE_ADDR_ALIGN) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MAX_SAMPLERS, + UR_DEVICE_INFO_MAX_SAMPLERS) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MAX_READ_IMAGE_ARGS, + UR_DEVICE_INFO_MAX_READ_IMAGE_ARGS) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS, + UR_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_SINGLE_FP_CONFIG, + UR_DEVICE_INFO_SINGLE_FP_CONFIG) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_HALF_FP_CONFIG, + UR_DEVICE_INFO_HALF_FP_CONFIG) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_DOUBLE_FP_CONFIG, + UR_DEVICE_INFO_DOUBLE_FP_CONFIG) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_IMAGE2D_MAX_WIDTH, + UR_DEVICE_INFO_IMAGE2D_MAX_WIDTH) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT, + UR_DEVICE_INFO_IMAGE2D_MAX_HEIGHT) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH, + UR_DEVICE_INFO_IMAGE3D_MAX_WIDTH) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_IMAGE3D_MAX_HEIGHT, + UR_DEVICE_INFO_IMAGE3D_MAX_HEIGHT) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH, + UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE, + UR_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR, + UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR, + UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT, + UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT, + UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT, + UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT, + UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG, + UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG, + UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT, + UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT, + UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE, + UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE, + UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF, + UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF, + UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS, + UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS) + PI_TO_UR_MAP_DEVICE_INFO( + PI_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, + UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL, + UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_IL_VERSION, + UR_DEVICE_INFO_IL_VERSION) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_USM_HOST_SUPPORT, + UR_DEVICE_INFO_USM_HOST_SUPPORT) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_USM_DEVICE_SUPPORT, + UR_DEVICE_INFO_USM_DEVICE_SUPPORT) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT, + UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT, + UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT, + UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PCI_ADDRESS, + UR_DEVICE_INFO_PCI_ADDRESS) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_GPU_EU_COUNT, + UR_DEVICE_INFO_GPU_EU_COUNT) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH, + UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE, + UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE) + PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_IP_VERSION, + UR_DEVICE_INFO_IP_VERSION) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_BUILD_ON_SUBDEVICE, + UR_DEVICE_INFO_BUILD_ON_SUBDEVICE) + PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D, + UR_DEVICE_INFO_MAX_WORK_GROUPS_3D) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE, + UR_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_DEVICE_ID, UR_DEVICE_INFO_DEVICE_ID) + PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY, + UR_DEVICE_INFO_GLOBAL_MEM_FREE) + PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE, + UR_DEVICE_INFO_MEMORY_CLOCK_RATE) + PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH, + UR_DEVICE_INFO_MEMORY_BUS_WIDTH) + PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES, + UR_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_GPU_SLICES, + UR_DEVICE_INFO_GPU_EU_SLICES) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE, + UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU, + UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MAX_MEM_BANDWIDTH, + UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH) + PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS, + UR_DEVICE_INFO_BFLOAT16) + PI_TO_UR_MAP_DEVICE_INFO( + PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES, + UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES) + PI_TO_UR_MAP_DEVICE_INFO( + PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES, + UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES) + PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES, + UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) + PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES, + UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES) + PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_INTEL_DEVICE_INFO_MEM_CHANNEL_SUPPORT, + UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_IMAGE_SRGB, + UR_DEVICE_INFO_IMAGE_SRGB) + PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_BACKEND_VERSION, + UR_DEVICE_INFO_BACKEND_RUNTIME_VERSION) + PI_TO_UR_MAP_DEVICE_INFO( + PI_EXT_CODEPLAY_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP, + UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP) + PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT, + UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP) + PI_TO_UR_MAP_DEVICE_INFO( + PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_SHARED_USM_SUPPORT, + UR_DEVICE_INFO_BINDLESS_IMAGES_SHARED_USM_SUPPORT_EXP) + PI_TO_UR_MAP_DEVICE_INFO( + PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_1D_USM_SUPPORT, + UR_DEVICE_INFO_BINDLESS_IMAGES_1D_USM_SUPPORT_EXP) + PI_TO_UR_MAP_DEVICE_INFO( + PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_2D_USM_SUPPORT, + UR_DEVICE_INFO_BINDLESS_IMAGES_2D_USM_SUPPORT_EXP) + PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_IMAGE_PITCH_ALIGN, + UR_DEVICE_INFO_IMAGE_PITCH_ALIGN_EXP) + PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_MAX_IMAGE_LINEAR_WIDTH, + UR_DEVICE_INFO_MAX_IMAGE_LINEAR_WIDTH_EXP) + PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_MAX_IMAGE_LINEAR_HEIGHT, + UR_DEVICE_INFO_MAX_IMAGE_LINEAR_HEIGHT_EXP) + PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_MAX_IMAGE_LINEAR_PITCH, + UR_DEVICE_INFO_MAX_IMAGE_LINEAR_PITCH_EXP) + PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_SUPPORT, + UR_DEVICE_INFO_MIPMAP_SUPPORT_EXP) + PI_TO_UR_MAP_DEVICE_INFO( + PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_ANISOTROPY_SUPPORT, + UR_DEVICE_INFO_MIPMAP_ANISOTROPY_SUPPORT_EXP) + PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_MAX_ANISOTROPY, + UR_DEVICE_INFO_MIPMAP_MAX_ANISOTROPY_EXP) + PI_TO_UR_MAP_DEVICE_INFO( + PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_LEVEL_REFERENCE_SUPPORT, + UR_DEVICE_INFO_MIPMAP_LEVEL_REFERENCE_SUPPORT_EXP) + PI_TO_UR_MAP_DEVICE_INFO( + PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_MEMORY_IMPORT_SUPPORT, + UR_DEVICE_INFO_INTEROP_MEMORY_IMPORT_SUPPORT_EXP) + PI_TO_UR_MAP_DEVICE_INFO( + PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_MEMORY_EXPORT_SUPPORT, + UR_DEVICE_INFO_INTEROP_MEMORY_EXPORT_SUPPORT_EXP) + PI_TO_UR_MAP_DEVICE_INFO( + PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_SEMAPHORE_IMPORT_SUPPORT, + UR_DEVICE_INFO_INTEROP_SEMAPHORE_IMPORT_SUPPORT_EXP) + PI_TO_UR_MAP_DEVICE_INFO( + PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT, + UR_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP) +#undef PI_TO_UR_MAP_DEVICE_INFO default: return PI_ERROR_UNKNOWN; }; @@ -2560,131 +2489,83 @@ static void pi2urImageDesc(const pi_image_format *ImageFormat, ur_image_desc_t *UrDesc) { switch (ImageFormat->image_channel_data_type) { - case PI_IMAGE_CHANNEL_TYPE_SNORM_INT8: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_SNORM_INT8; - break; - } - case PI_IMAGE_CHANNEL_TYPE_SNORM_INT16: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_SNORM_INT16; - break; - } - case PI_IMAGE_CHANNEL_TYPE_UNORM_INT8: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_UNORM_INT8; - break; - } - case PI_IMAGE_CHANNEL_TYPE_UNORM_INT16: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_UNORM_INT16; - break; - } - case PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565; - break; - } - case PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555; - break; - } - case PI_IMAGE_CHANNEL_TYPE_UNORM_INT_101010: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_INT_101010; - break; - } - case PI_IMAGE_CHANNEL_TYPE_SIGNED_INT8: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8; - break; - } - case PI_IMAGE_CHANNEL_TYPE_SIGNED_INT16: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16; - break; - } - case PI_IMAGE_CHANNEL_TYPE_SIGNED_INT32: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32; - break; - } - case PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8; - break; - } - case PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16; - break; - } - case PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32; - break; - } - case PI_IMAGE_CHANNEL_TYPE_HALF_FLOAT: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT; - break; - } - case PI_IMAGE_CHANNEL_TYPE_FLOAT: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_FLOAT; - break; - } +#define PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(FROM, TO) \ + case FROM: { \ + UrFormat->channelType = TO; \ + break; \ + } + PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_SNORM_INT8, + UR_IMAGE_CHANNEL_TYPE_SNORM_INT8) + PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_SNORM_INT16, + UR_IMAGE_CHANNEL_TYPE_SNORM_INT16) + PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_UNORM_INT8, + UR_IMAGE_CHANNEL_TYPE_UNORM_INT8) + PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_UNORM_INT16, + UR_IMAGE_CHANNEL_TYPE_UNORM_INT16) + PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565, + UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565) + PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555, + UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555) + PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_UNORM_INT_101010, + UR_IMAGE_CHANNEL_TYPE_INT_101010) + PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_SIGNED_INT8, + UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8) + PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_SIGNED_INT16, + UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16) + PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_SIGNED_INT32, + UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32) + PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8, + UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8) + PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16, + UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16) + PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32, + UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32) + PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_HALF_FLOAT, + UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT) + PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_FLOAT, + UR_IMAGE_CHANNEL_TYPE_FLOAT) +#undef PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE default: { die("piMemImageCreate: unsuppported image_channel_data_type."); } } switch (ImageFormat->image_channel_order) { - case PI_IMAGE_CHANNEL_ORDER_A: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_A; - break; - } - case PI_IMAGE_CHANNEL_ORDER_R: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_R; - break; - } - case PI_IMAGE_CHANNEL_ORDER_RG: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_RG; - break; - } - case PI_IMAGE_CHANNEL_ORDER_RA: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_RA; - break; - } - case PI_IMAGE_CHANNEL_ORDER_RGB: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_RGB; - break; - } - case PI_IMAGE_CHANNEL_ORDER_RGBA: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_RGBA; - break; - } - case PI_IMAGE_CHANNEL_ORDER_BGRA: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_BGRA; - break; - } - case PI_IMAGE_CHANNEL_ORDER_ARGB: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_ARGB; - break; - } - case PI_IMAGE_CHANNEL_ORDER_ABGR: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_ABGR; - break; - } - case PI_IMAGE_CHANNEL_ORDER_INTENSITY: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_INTENSITY; - break; - } - case PI_IMAGE_CHANNEL_ORDER_LUMINANCE: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_LUMINANCE; - break; - } - case PI_IMAGE_CHANNEL_ORDER_Rx: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_RX; - break; - } - case PI_IMAGE_CHANNEL_ORDER_RGx: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_RGX; - break; - } - case PI_IMAGE_CHANNEL_ORDER_RGBx: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_RGBX; - break; - } - case PI_IMAGE_CHANNEL_ORDER_sRGBA: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_SRGBA; - break; - } +#define PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(FROM, TO) \ + case FROM: { \ + UrFormat->channelOrder = TO; \ + break; \ + } + PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_A, + UR_IMAGE_CHANNEL_ORDER_A) + PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_R, + UR_IMAGE_CHANNEL_ORDER_R) + PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_RG, + UR_IMAGE_CHANNEL_ORDER_RG) + PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_RA, + UR_IMAGE_CHANNEL_ORDER_RA) + PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_RGB, + UR_IMAGE_CHANNEL_ORDER_RGB) + PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_RGBA, + UR_IMAGE_CHANNEL_ORDER_RGBA) + PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_BGRA, + UR_IMAGE_CHANNEL_ORDER_BGRA) + PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_ARGB, + UR_IMAGE_CHANNEL_ORDER_ARGB) + PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_ABGR, + UR_IMAGE_CHANNEL_ORDER_ABGR) + PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_INTENSITY, + UR_IMAGE_CHANNEL_ORDER_INTENSITY) + PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_LUMINANCE, + UR_IMAGE_CHANNEL_ORDER_LUMINANCE) + PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_Rx, + UR_IMAGE_CHANNEL_ORDER_RX) + PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_RGx, + UR_IMAGE_CHANNEL_ORDER_RGX) + PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_RGBx, + UR_IMAGE_CHANNEL_ORDER_RGBX) + PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_sRGBA, + UR_IMAGE_CHANNEL_ORDER_SRGBA) +#undef PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER default: { die("piMemImageCreate: unsuppported image_channel_data_type."); } @@ -2699,34 +2580,22 @@ static void pi2urImageDesc(const pi_image_format *ImageFormat, UrDesc->rowPitch = ImageDesc->image_row_pitch; UrDesc->slicePitch = ImageDesc->image_slice_pitch; switch (ImageDesc->image_type) { - case PI_MEM_TYPE_BUFFER: { - UrDesc->type = UR_MEM_TYPE_BUFFER; - break; - } - case PI_MEM_TYPE_IMAGE2D: { - UrDesc->type = UR_MEM_TYPE_IMAGE2D; - break; - } - case PI_MEM_TYPE_IMAGE3D: { - UrDesc->type = UR_MEM_TYPE_IMAGE3D; - break; - } - case PI_MEM_TYPE_IMAGE2D_ARRAY: { - UrDesc->type = UR_MEM_TYPE_IMAGE2D_ARRAY; - break; - } - case PI_MEM_TYPE_IMAGE1D: { - UrDesc->type = UR_MEM_TYPE_IMAGE1D; - break; - } - case PI_MEM_TYPE_IMAGE1D_ARRAY: { - UrDesc->type = UR_MEM_TYPE_IMAGE1D_ARRAY; - break; - } - case PI_MEM_TYPE_IMAGE1D_BUFFER: { - UrDesc->type = UR_MEM_TYPE_IMAGE1D_BUFFER; - break; - } +#define PI_TO_UR_MAP_IMAGE_TYPE(FROM, TO) \ + case FROM: { \ + UrDesc->type = TO; \ + break; \ + } + PI_TO_UR_MAP_IMAGE_TYPE(PI_MEM_TYPE_BUFFER, UR_MEM_TYPE_BUFFER) + PI_TO_UR_MAP_IMAGE_TYPE(PI_MEM_TYPE_IMAGE2D, UR_MEM_TYPE_IMAGE2D) + PI_TO_UR_MAP_IMAGE_TYPE(PI_MEM_TYPE_IMAGE3D, UR_MEM_TYPE_IMAGE3D) + PI_TO_UR_MAP_IMAGE_TYPE(PI_MEM_TYPE_IMAGE2D_ARRAY, + UR_MEM_TYPE_IMAGE2D_ARRAY) + PI_TO_UR_MAP_IMAGE_TYPE(PI_MEM_TYPE_IMAGE1D, UR_MEM_TYPE_IMAGE1D) + PI_TO_UR_MAP_IMAGE_TYPE(PI_MEM_TYPE_IMAGE1D_ARRAY, + UR_MEM_TYPE_IMAGE1D_ARRAY) + PI_TO_UR_MAP_IMAGE_TYPE(PI_MEM_TYPE_IMAGE1D_BUFFER, + UR_MEM_TYPE_IMAGE1D_BUFFER) +#undef PI_TO_UR_MAP_IMAGE_TYPE default: { die("piMemImageCreate: unsuppported image_type."); } @@ -2736,6 +2605,93 @@ static void pi2urImageDesc(const pi_image_format *ImageFormat, UrDesc->arraySize = ImageDesc->image_array_size; } +static void ur2piImageFormat(const ur_image_format_t *UrFormat, + pi_image_format *PiFormat) { + switch (UrFormat->channelOrder) { +#define UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(FROM, TO) \ + case FROM: { \ + PiFormat->image_channel_order = TO; \ + break; \ + } + UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_A, + PI_IMAGE_CHANNEL_ORDER_A) + UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_R, + PI_IMAGE_CHANNEL_ORDER_R) + UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_RG, + PI_IMAGE_CHANNEL_ORDER_RG) + UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_RA, + PI_IMAGE_CHANNEL_ORDER_RA) + UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_RGB, + PI_IMAGE_CHANNEL_ORDER_RGB) + UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_RGBA, + PI_IMAGE_CHANNEL_ORDER_RGBA) + UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_BGRA, + PI_IMAGE_CHANNEL_ORDER_BGRA) + UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_ARGB, + PI_IMAGE_CHANNEL_ORDER_ARGB) + UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_ABGR, + PI_IMAGE_CHANNEL_ORDER_ABGR) + UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_INTENSITY, + PI_IMAGE_CHANNEL_ORDER_INTENSITY) + UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_LUMINANCE, + PI_IMAGE_CHANNEL_ORDER_LUMINANCE) + UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_RX, + PI_IMAGE_CHANNEL_ORDER_Rx) + UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_RGX, + PI_IMAGE_CHANNEL_ORDER_RGx) + UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_RGBX, + PI_IMAGE_CHANNEL_ORDER_RGBx) + UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_SRGBA, + PI_IMAGE_CHANNEL_ORDER_sRGBA) +#undef UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER + default: { + die("ur2piImageFormat: unsuppported channelOrder."); + } + } + + switch (UrFormat->channelType) { +#define UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(FROM, TO) \ + case FROM: { \ + PiFormat->image_channel_data_type = TO; \ + break; \ + } + UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_SNORM_INT8, + PI_IMAGE_CHANNEL_TYPE_SNORM_INT8) + UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_SNORM_INT16, + PI_IMAGE_CHANNEL_TYPE_SNORM_INT16) + UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_UNORM_INT8, + PI_IMAGE_CHANNEL_TYPE_UNORM_INT8) + UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_UNORM_INT16, + PI_IMAGE_CHANNEL_TYPE_UNORM_INT16) + UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565, + PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565) + UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555, + PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555) + UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_INT_101010, + PI_IMAGE_CHANNEL_TYPE_UNORM_INT_101010) + UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8, + PI_IMAGE_CHANNEL_TYPE_SIGNED_INT8) + UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16, + PI_IMAGE_CHANNEL_TYPE_SIGNED_INT16) + UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32, + PI_IMAGE_CHANNEL_TYPE_SIGNED_INT32) + UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8, + PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8) + UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16, + PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16) + UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32, + PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32) + UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT, + PI_IMAGE_CHANNEL_TYPE_HALF_FLOAT) + UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_FLOAT, + PI_IMAGE_CHANNEL_TYPE_FLOAT) +#undef UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE + default: { + die("ur2piImageFormat: unsuppported channelType."); + } + } +} + inline pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags, const pi_image_format *ImageFormat, const pi_image_desc *ImageDesc, void *HostPtr, @@ -2939,6 +2895,27 @@ inline pi_result piextUSMDeviceAlloc(void **ResultPtr, pi_context Context, return PI_SUCCESS; } +inline pi_result piextUSMPitchedAlloc(void **ResultPtr, size_t *ResultPitch, + pi_context Context, pi_device Device, + pi_usm_mem_properties *Properties, + size_t WidthInBytes, size_t Height, + unsigned int ElementSizeBytes) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); + + auto UrContext = reinterpret_cast(Context); + auto UrDevice = reinterpret_cast(Device); + std::ignore = Properties; + ur_usm_desc_t USMDesc{}; + ur_usm_pool_handle_t Pool{}; + + HANDLE_ERRORS(urUSMPitchedAllocExp(UrContext, UrDevice, &USMDesc, Pool, + WidthInBytes, Height, ElementSizeBytes, + ResultPtr, ResultPitch)); + + return PI_SUCCESS; +} + inline pi_result piextUSMSharedAlloc(void **ResultPtr, pi_context Context, pi_device Device, pi_usm_mem_properties *Properties, @@ -3922,6 +3899,7 @@ inline pi_result piSamplerCreate(pi_context Context, reinterpret_cast(Context); ur_sampler_desc_t UrProps{}; UrProps.stype = UR_STRUCTURE_TYPE_SAMPLER_DESC; + const pi_sampler_properties *CurProperty = SamplerProperties; while (*CurProperty != 0) { switch (*CurProperty) { @@ -4347,4 +4325,505 @@ piextPeerAccessGetInfo(pi_device command_device, pi_device peer_device, // usm-p2p /////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +// Bindless Images Extension + +inline pi_result piextMemImageAllocate(pi_context Context, pi_device Device, + pi_image_format *ImageFormat, + pi_image_desc *ImageDesc, + pi_image_mem_handle *RetMem) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); + + auto UrContext = reinterpret_cast(Context); + auto UrDevice = reinterpret_cast(Device); + + ur_image_format_t UrFormat{}; + ur_image_desc_t UrDesc{}; + pi2urImageDesc(ImageFormat, ImageDesc, &UrFormat, &UrDesc); + + ur_exp_image_mem_handle_t *UrRetMem = + reinterpret_cast(RetMem); + + HANDLE_ERRORS(urBindlessImagesImageAllocateExp(UrContext, UrDevice, &UrFormat, + &UrDesc, UrRetMem)); + + return PI_SUCCESS; +} + +inline pi_result piextMemUnsampledImageCreate( + pi_context Context, pi_device Device, pi_image_mem_handle ImgMem, + pi_image_format *ImageFormat, pi_image_desc *ImageDesc, pi_mem *RetMem, + pi_image_handle *RetHandle) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); + PI_ASSERT(RetMem, PI_ERROR_INVALID_MEM_OBJECT); + + auto UrContext = reinterpret_cast(Context); + auto UrDevice = reinterpret_cast(Device); + auto UrImgMem = reinterpret_cast(ImgMem); + + ur_image_format_t UrFormat{}; + ur_image_desc_t UrDesc{}; + pi2urImageDesc(ImageFormat, ImageDesc, &UrFormat, &UrDesc); + + ur_mem_handle_t *UrRetMem = reinterpret_cast(RetMem); + ur_exp_image_handle_t *UrRetHandle = + reinterpret_cast(RetHandle); + + HANDLE_ERRORS(urBindlessImagesUnsampledImageCreateExp( + UrContext, UrDevice, UrImgMem, &UrFormat, &UrDesc, UrRetMem, + UrRetHandle)); + + return PI_SUCCESS; +} + +inline pi_result piextMemSampledImageCreate( + pi_context Context, pi_device Device, pi_image_mem_handle ImgMem, + pi_image_format *ImageFormat, pi_image_desc *ImageDesc, pi_sampler Sampler, + pi_mem *RetMem, pi_image_handle *RetHandle) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); + PI_ASSERT(RetMem, PI_ERROR_INVALID_MEM_OBJECT); + PI_ASSERT(Sampler, PI_ERROR_INVALID_SAMPLER); + + auto UrContext = reinterpret_cast(Context); + auto UrDevice = reinterpret_cast(Device); + auto UrImgMem = reinterpret_cast(ImgMem); + + ur_image_format_t UrFormat{}; + ur_image_desc_t UrDesc{}; + pi2urImageDesc(ImageFormat, ImageDesc, &UrFormat, &UrDesc); + + auto UrSampler = reinterpret_cast(Sampler); + ur_mem_handle_t *UrRetMem = reinterpret_cast(RetMem); + ur_exp_image_handle_t *UrRetHandle = + reinterpret_cast(RetHandle); + + HANDLE_ERRORS(urBindlessImagesSampledImageCreateExp( + UrContext, UrDevice, UrImgMem, &UrFormat, &UrDesc, UrSampler, UrRetMem, + UrRetHandle)); + + return PI_SUCCESS; +} + +inline pi_result piextBindlessImageSamplerCreate( + pi_context Context, const pi_sampler_properties *SamplerProperties, + float MinMipmapLevelClamp, float MaxMipmapLevelClamp, float MaxAnisotropy, + pi_sampler *RetSampler) { + + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(RetSampler, PI_ERROR_INVALID_VALUE); + + auto UrContext = reinterpret_cast(Context); + ur_sampler_desc_t UrProps{}; + UrProps.stype = UR_STRUCTURE_TYPE_SAMPLER_DESC; + + ur_exp_sampler_mip_properties_t UrMipProps{}; + UrMipProps.stype = UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES; + UrMipProps.minMipmapLevelClamp = MinMipmapLevelClamp; + UrMipProps.maxMipmapLevelClamp = MaxMipmapLevelClamp; + UrMipProps.maxAnisotropy = MaxAnisotropy; + UrProps.pNext = &UrMipProps; + + const pi_sampler_properties *CurProperty = SamplerProperties; + while (*CurProperty != 0) { + switch (*CurProperty) { + case PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS: { + UrProps.normalizedCoords = ur_cast(*(++CurProperty)); + } break; + + case PI_SAMPLER_PROPERTIES_ADDRESSING_MODE: { + pi_sampler_addressing_mode CurValueAddressingMode = + ur_cast( + ur_cast(*(++CurProperty))); + + if (CurValueAddressingMode == PI_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT) + UrProps.addressingMode = UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT; + else if (CurValueAddressingMode == PI_SAMPLER_ADDRESSING_MODE_REPEAT) + UrProps.addressingMode = UR_SAMPLER_ADDRESSING_MODE_REPEAT; + else if (CurValueAddressingMode == + PI_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE) + UrProps.addressingMode = UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE; + else if (CurValueAddressingMode == PI_SAMPLER_ADDRESSING_MODE_CLAMP) + UrProps.addressingMode = UR_SAMPLER_ADDRESSING_MODE_CLAMP; + else if (CurValueAddressingMode == PI_SAMPLER_ADDRESSING_MODE_NONE) + UrProps.addressingMode = UR_SAMPLER_ADDRESSING_MODE_NONE; + } break; + + case PI_SAMPLER_PROPERTIES_FILTER_MODE: { + pi_sampler_filter_mode CurValueFilterMode = + ur_cast(ur_cast(*(++CurProperty))); + + if (CurValueFilterMode == PI_SAMPLER_FILTER_MODE_NEAREST) + UrProps.filterMode = UR_SAMPLER_FILTER_MODE_NEAREST; + else if (CurValueFilterMode == PI_SAMPLER_FILTER_MODE_LINEAR) + UrProps.filterMode = UR_SAMPLER_FILTER_MODE_LINEAR; + } break; + + case PI_SAMPLER_PROPERTIES_MIP_FILTER_MODE: { + pi_sampler_filter_mode CurValueFilterMode = + ur_cast(ur_cast(*(++CurProperty))); + + if (CurValueFilterMode == PI_SAMPLER_FILTER_MODE_NEAREST) + UrMipProps.mipFilterMode = UR_SAMPLER_FILTER_MODE_NEAREST; + else if (CurValueFilterMode == PI_SAMPLER_FILTER_MODE_LINEAR) + UrMipProps.mipFilterMode = UR_SAMPLER_FILTER_MODE_LINEAR; + } break; + + default: + break; + } + CurProperty++; + } + + ur_sampler_handle_t *UrSampler = + reinterpret_cast(RetSampler); + + HANDLE_ERRORS(urSamplerCreate(UrContext, &UrProps, UrSampler)); + + return PI_SUCCESS; +} + +inline pi_result piextMemMipmapGetLevel(pi_context Context, pi_device Device, + pi_image_mem_handle MipMem, + unsigned int Level, + pi_image_mem_handle *RetMem) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); + + auto UrContext = reinterpret_cast(Context); + auto UrDevice = reinterpret_cast(Device); + auto UrMipMem = reinterpret_cast(MipMem); + ur_exp_image_mem_handle_t *UrRetMem = + reinterpret_cast(RetMem); + + HANDLE_ERRORS(urBindlessImagesMipmapGetLevelExp(UrContext, UrDevice, UrMipMem, + Level, UrRetMem)); + + return PI_SUCCESS; +} + +inline pi_result piextMemImageFree(pi_context Context, pi_device Device, + pi_image_mem_handle MemoryHandle) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); + + auto UrContext = reinterpret_cast(Context); + auto UrDevice = reinterpret_cast(Device); + auto UrMemoryHandle = + reinterpret_cast(MemoryHandle); + + HANDLE_ERRORS( + urBindlessImagesImageFreeExp(UrContext, UrDevice, UrMemoryHandle)); + + return PI_SUCCESS; +} + +inline pi_result piextMemMipmapFree(pi_context Context, pi_device Device, + pi_image_mem_handle MemoryHandle) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); + + auto UrContext = reinterpret_cast(Context); + auto UrDevice = reinterpret_cast(Device); + auto UrMemoryHandle = + reinterpret_cast(MemoryHandle); + + HANDLE_ERRORS( + urBindlessImagesMipmapFreeExp(UrContext, UrDevice, UrMemoryHandle)); + + return PI_SUCCESS; +} + +static void pi2urImageCopyFlags(const pi_image_copy_flags PiFlags, + ur_exp_image_copy_flags_t *UrFlags) { + switch (PiFlags) { + case PI_IMAGE_COPY_HOST_TO_DEVICE: + *UrFlags = UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE; + break; + case PI_IMAGE_COPY_DEVICE_TO_HOST: + *UrFlags = UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST; + break; + case PI_IMAGE_COPY_DEVICE_TO_DEVICE: + *UrFlags = UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE; + break; + default: + die("pi2urImageCopyFlags: Unsupported use case"); + } +} + +inline pi_result +piextMemImageCopy(pi_queue Queue, void *DstPtr, void *SrcPtr, + const pi_image_format *ImageFormat, + const pi_image_desc *ImageDesc, + const pi_image_copy_flags Flags, pi_image_offset SrcOffset, + pi_image_offset DstOffset, pi_image_region CopyExtent, + pi_image_region HostExtent, pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *Event) { + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + auto UrQueue = reinterpret_cast(Queue); + + ur_image_format_t UrFormat{}; + ur_image_desc_t UrDesc{}; + pi2urImageDesc(ImageFormat, ImageDesc, &UrFormat, &UrDesc); + + ur_exp_image_copy_flags_t UrFlags; + pi2urImageCopyFlags(Flags, &UrFlags); + + ur_rect_offset_t UrSrcOffset{SrcOffset->x, SrcOffset->y, SrcOffset->z}; + ur_rect_offset_t UrDstOffset{DstOffset->x, DstOffset->y, DstOffset->z}; + ur_rect_region_t UrCopyExtent{}; + UrCopyExtent.depth = CopyExtent->depth; + UrCopyExtent.height = CopyExtent->height; + UrCopyExtent.width = CopyExtent->width; + ur_rect_region_t UrHostExtent{}; + UrHostExtent.depth = HostExtent->depth; + UrHostExtent.height = HostExtent->height; + UrHostExtent.width = HostExtent->width; + + const ur_event_handle_t *UrEventWaitList = + reinterpret_cast(EventWaitList); + ur_event_handle_t *UrEvent = reinterpret_cast(Event); + + HANDLE_ERRORS(urBindlessImagesImageCopyExp( + UrQueue, DstPtr, SrcPtr, &UrFormat, &UrDesc, UrFlags, UrSrcOffset, + UrDstOffset, UrCopyExtent, UrHostExtent, NumEventsInWaitList, + UrEventWaitList, UrEvent)); + + return PI_SUCCESS; +} + +inline pi_result piextMemUnsampledImageHandleDestroy(pi_context Context, + pi_device Device, + pi_image_handle Handle) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); + + auto UrContext = reinterpret_cast(Context); + auto UrDevice = reinterpret_cast(Device); + auto UrHandle = reinterpret_cast(Handle); + + HANDLE_ERRORS(urBindlessImagesUnsampledImageHandleDestroyExp( + UrContext, UrDevice, UrHandle)); + + return PI_SUCCESS; +} + +inline pi_result piextMemSampledImageHandleDestroy(pi_context Context, + pi_device Device, + pi_image_handle Handle) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); + + auto UrContext = reinterpret_cast(Context); + auto UrDevice = reinterpret_cast(Device); + auto UrHandle = reinterpret_cast(Handle); + + HANDLE_ERRORS(urBindlessImagesSampledImageHandleDestroyExp( + UrContext, UrDevice, UrHandle)); + + return PI_SUCCESS; +} + +static void pi2urImageInfoFlags(const pi_image_info PiFlags, + ur_image_info_t *UrFlags) { + switch (PiFlags) { +#define PI_TO_UR_IMAGE_INFO(FROM, TO) \ + case FROM: { \ + *UrFlags = TO; \ + return; \ + } + PI_TO_UR_IMAGE_INFO(PI_IMAGE_INFO_FORMAT, UR_IMAGE_INFO_FORMAT) + PI_TO_UR_IMAGE_INFO(PI_IMAGE_INFO_ELEMENT_SIZE, UR_IMAGE_INFO_ELEMENT_SIZE) + PI_TO_UR_IMAGE_INFO(PI_IMAGE_INFO_ROW_PITCH, UR_IMAGE_INFO_ROW_PITCH) + PI_TO_UR_IMAGE_INFO(PI_IMAGE_INFO_SLICE_PITCH, UR_IMAGE_INFO_SLICE_PITCH) + PI_TO_UR_IMAGE_INFO(PI_IMAGE_INFO_WIDTH, UR_IMAGE_INFO_WIDTH) + PI_TO_UR_IMAGE_INFO(PI_IMAGE_INFO_HEIGHT, UR_IMAGE_INFO_HEIGHT) + PI_TO_UR_IMAGE_INFO(PI_IMAGE_INFO_DEPTH, UR_IMAGE_INFO_DEPTH) +#undef PI_TO_UR_IMAGE_INFO + default: + die("pi2urImageInfoFlags: Unsupported use case"); + } +} + +inline pi_result piextMemImageGetInfo(pi_image_mem_handle MemHandle, + pi_image_info ParamName, void *ParamValue, + size_t *ParamValueSizeRet) { + auto UrMemHandle = reinterpret_cast(MemHandle); + + ur_image_info_t UrParamName{}; + pi2urImageInfoFlags(ParamName, &UrParamName); + + HANDLE_ERRORS(urBindlessImagesImageGetInfoExp(UrMemHandle, UrParamName, + ParamValue, ParamValueSizeRet)); + + if (ParamName == pi_image_info::PI_IMAGE_INFO_FORMAT && ParamValue) { + pi_image_format PiFormat; + ur2piImageFormat(reinterpret_cast(ParamValue), + &PiFormat); + reinterpret_cast(ParamValue)->image_channel_data_type = + PiFormat.image_channel_data_type; + reinterpret_cast(ParamValue)->image_channel_order = + PiFormat.image_channel_order; + if (ParamValueSizeRet) { + *ParamValueSizeRet = sizeof(pi_image_format); + } + } + + return PI_SUCCESS; +} + +inline pi_result piextMemImportOpaqueFD(pi_context Context, pi_device Device, + size_t Size, int FileDescriptor, + pi_interop_mem_handle *RetHandle) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); + + auto UrContext = reinterpret_cast(Context); + auto UrDevice = reinterpret_cast(Device); + ur_exp_interop_mem_handle_t *UrRetHandle = + reinterpret_cast(RetHandle); + + ur_exp_file_descriptor_t PosixFD{}; + PosixFD.stype = UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR; + PosixFD.fd = FileDescriptor; + + ur_exp_interop_mem_desc_t InteropMemDesc{}; + InteropMemDesc.stype = UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC; + InteropMemDesc.pNext = &PosixFD; + + HANDLE_ERRORS(urBindlessImagesImportOpaqueFDExp( + UrContext, UrDevice, Size, &InteropMemDesc, UrRetHandle)); + + return PI_SUCCESS; +} + +inline pi_result piextMemMapExternalArray(pi_context Context, pi_device Device, + pi_image_format *ImageFormat, + pi_image_desc *ImageDesc, + pi_interop_mem_handle MemHandle, + pi_image_mem_handle *RetMem) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); + + auto UrContext = reinterpret_cast(Context); + auto UrDevice = reinterpret_cast(Device); + + ur_image_format_t UrFormat{}; + ur_image_desc_t UrDesc{}; + pi2urImageDesc(ImageFormat, ImageDesc, &UrFormat, &UrDesc); + + auto UrMemHandle = reinterpret_cast(MemHandle); + ur_exp_image_mem_handle_t *UrRetMem = + reinterpret_cast(RetMem); + + HANDLE_ERRORS(urBindlessImagesMapExternalArrayExp( + UrContext, UrDevice, &UrFormat, &UrDesc, UrMemHandle, UrRetMem)); + + return PI_SUCCESS; +} + +inline pi_result piextMemReleaseInterop(pi_context Context, pi_device Device, + pi_interop_mem_handle ExtMem) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); + + auto UrContext = reinterpret_cast(Context); + auto UrDevice = reinterpret_cast(Device); + auto UrExtMem = reinterpret_cast(ExtMem); + + HANDLE_ERRORS( + urBindlessImagesReleaseInteropExp(UrContext, UrDevice, UrExtMem)); + + return PI_SUCCESS; +} + +inline pi_result +piextImportExternalSemaphoreOpaqueFD(pi_context Context, pi_device Device, + int FileDescriptor, + pi_interop_semaphore_handle *RetHandle) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); + + auto UrContext = reinterpret_cast(Context); + auto UrDevice = reinterpret_cast(Device); + ur_exp_interop_semaphore_handle_t *UrRetHandle = + reinterpret_cast(RetHandle); + + ur_exp_file_descriptor_t PosixFD{}; + PosixFD.stype = UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR; + PosixFD.fd = FileDescriptor; + + ur_exp_interop_semaphore_desc_t InteropSemDesc{}; + InteropSemDesc.stype = UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC; + InteropSemDesc.pNext = &PosixFD; + + HANDLE_ERRORS(urBindlessImagesImportExternalSemaphoreOpaqueFDExp( + UrContext, UrDevice, &InteropSemDesc, UrRetHandle)); + + return PI_SUCCESS; +} + +inline pi_result +piextDestroyExternalSemaphore(pi_context Context, pi_device Device, + pi_interop_semaphore_handle SemHandle) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); + + auto UrContext = reinterpret_cast(Context); + auto UrDevice = reinterpret_cast(Device); + auto UrSemHandle = + reinterpret_cast(SemHandle); + + HANDLE_ERRORS(urBindlessImagesDestroyExternalSemaphoreExp(UrContext, UrDevice, + UrSemHandle)); + + return PI_SUCCESS; +} + +inline pi_result +piextWaitExternalSemaphore(pi_queue Queue, + pi_interop_semaphore_handle SemHandle, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *Event) { + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + auto UrQueue = reinterpret_cast(Queue); + auto UrSemHandle = + reinterpret_cast(SemHandle); + const ur_event_handle_t *UrEventWaitList = + reinterpret_cast(EventWaitList); + ur_event_handle_t *UrEvent = reinterpret_cast(Event); + + HANDLE_ERRORS(urBindlessImagesWaitExternalSemaphoreExp( + UrQueue, UrSemHandle, NumEventsInWaitList, UrEventWaitList, UrEvent)); + + return PI_SUCCESS; +} + +inline pi_result +piextSignalExternalSemaphore(pi_queue Queue, + pi_interop_semaphore_handle SemHandle, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *Event) { + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + auto UrQueue = reinterpret_cast(Queue); + auto UrSemHandle = + reinterpret_cast(SemHandle); + const ur_event_handle_t *UrEventWaitList = + reinterpret_cast(EventWaitList); + ur_event_handle_t *UrEvent = reinterpret_cast(Event); + + HANDLE_ERRORS(urBindlessImagesSignalExternalSemaphoreExp( + UrQueue, UrSemHandle, NumEventsInWaitList, UrEventWaitList, UrEvent)); + + return PI_SUCCESS; +} + +// Bindless Images Extension +/////////////////////////////////////////////////////////////////////////////// + } // namespace pi2ur diff --git a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp index 5fb983122845c..8bd24e7b33133 100644 --- a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp +++ b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp @@ -354,6 +354,16 @@ __SYCL_EXPORT pi_result piextUSMSharedAlloc(void **ResultPtr, Size, Alignment); } +__SYCL_EXPORT pi_result piextUSMPitchedAlloc( + void **ResultPtr, size_t *ResultPitch, pi_context Context, pi_device Device, + pi_usm_mem_properties *Properties, size_t WidthInBytes, size_t Height, + unsigned int ElementSizeBytes) { + + return pi2ur::piextUSMPitchedAlloc(ResultPtr, ResultPitch, Context, Device, + Properties, WidthInBytes, Height, + ElementSizeBytes); +} + __SYCL_EXPORT pi_result piextUSMFree(pi_context Context, void *Ptr) { return pi2ur::piextUSMFree(Context, Ptr); } @@ -1102,6 +1112,140 @@ __SYCL_EXPORT pi_result piextPeerAccessGetInfo( ParamValueSizeRet); } +__SYCL_EXPORT pi_result piextMemImageAllocate(pi_context Context, + pi_device Device, + pi_image_format *ImageFormat, + pi_image_desc *ImageDesc, + pi_image_mem_handle *RetMem) { + return pi2ur::piextMemImageAllocate(Context, Device, ImageFormat, ImageDesc, + RetMem); +} + +__SYCL_EXPORT pi_result piextMemUnsampledImageCreate( + pi_context Context, pi_device Device, pi_image_mem_handle ImgMem, + pi_image_format *ImageFormat, pi_image_desc *ImageDesc, pi_mem *RetMem, + pi_image_handle *RetHandle) { + return pi2ur::piextMemUnsampledImageCreate( + Context, Device, ImgMem, ImageFormat, ImageDesc, RetMem, RetHandle); +} + +__SYCL_EXPORT pi_result piextMemSampledImageCreate( + pi_context Context, pi_device Device, pi_image_mem_handle ImgMem, + pi_image_format *ImageFormat, pi_image_desc *ImageDesc, pi_sampler Sampler, + pi_mem *RetMem, pi_image_handle *RetHandle) { + return pi2ur::piextMemSampledImageCreate(Context, Device, ImgMem, ImageFormat, + ImageDesc, Sampler, RetMem, + RetHandle); +} + +__SYCL_EXPORT pi_result piextBindlessImageSamplerCreate( + pi_context Context, const pi_sampler_properties *SamplerProperties, + float MinMipmapLevelClamp, float MaxMipmapLevelClamp, float MaxAnisotropy, + pi_sampler *RetSampler) { + return pi2ur::piextBindlessImageSamplerCreate( + Context, SamplerProperties, MinMipmapLevelClamp, MaxMipmapLevelClamp, + MaxAnisotropy, RetSampler); +} + +__SYCL_EXPORT pi_result piextMemMipmapGetLevel(pi_context Context, + pi_device Device, + pi_image_mem_handle MipMem, + unsigned int Level, + pi_image_mem_handle *RetMem) { + return pi2ur::piextMemMipmapGetLevel(Context, Device, MipMem, Level, RetMem); +} + +__SYCL_EXPORT pi_result piextMemImageFree(pi_context Context, pi_device Device, + pi_image_mem_handle MemoryHandle) { + return pi2ur::piextMemImageFree(Context, Device, MemoryHandle); +} + +__SYCL_EXPORT pi_result piextMemMipmapFree(pi_context Context, pi_device Device, + pi_image_mem_handle MemoryHandle) { + return pi2ur::piextMemMipmapFree(Context, Device, MemoryHandle); +} + +__SYCL_EXPORT pi_result piextMemImageCopy( + pi_queue Queue, void *DstPtr, void *SrcPtr, + const pi_image_format *ImageFormat, const pi_image_desc *ImageDesc, + const pi_image_copy_flags Flags, pi_image_offset SrcOffset, + pi_image_offset DstOffset, pi_image_region CopyExtent, + pi_image_region HostExtent, pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *Event) { + return pi2ur::piextMemImageCopy(Queue, DstPtr, SrcPtr, ImageFormat, ImageDesc, + Flags, SrcOffset, DstOffset, CopyExtent, + HostExtent, NumEventsInWaitList, + EventWaitList, Event); +} + +__SYCL_EXPORT pi_result piextMemUnsampledImageHandleDestroy( + pi_context Context, pi_device Device, pi_image_handle Handle) { + return pi2ur::piextMemUnsampledImageHandleDestroy(Context, Device, Handle); +} + +__SYCL_EXPORT pi_result piextMemSampledImageHandleDestroy( + pi_context Context, pi_device Device, pi_image_handle Handle) { + return pi2ur::piextMemSampledImageHandleDestroy(Context, Device, Handle); +} + +__SYCL_EXPORT pi_result piextMemImageGetInfo(pi_image_mem_handle MemHandle, + pi_image_info ParamName, + void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piextMemImageGetInfo(MemHandle, ParamName, ParamValue, + ParamValueSizeRet); +} + +__SYCL_EXPORT pi_result +piextMemImportOpaqueFD(pi_context Context, pi_device Device, size_t Size, + int FileDescriptor, pi_interop_mem_handle *RetHandle) { + return pi2ur::piextMemImportOpaqueFD(Context, Device, Size, FileDescriptor, + RetHandle); +} + +__SYCL_EXPORT pi_result piextMemMapExternalArray( + pi_context Context, pi_device Device, pi_image_format *ImageFormat, + pi_image_desc *ImageDesc, pi_interop_mem_handle MemHandle, + pi_image_mem_handle *RetMem) { + return pi2ur::piextMemMapExternalArray(Context, Device, ImageFormat, + ImageDesc, MemHandle, RetMem); +} + +__SYCL_EXPORT pi_result piextMemReleaseInterop(pi_context Context, + pi_device Device, + pi_interop_mem_handle ExtMem) { + return pi2ur::piextMemReleaseInterop(Context, Device, ExtMem); +} + +__SYCL_EXPORT pi_result piextImportExternalSemaphoreOpaqueFD( + pi_context Context, pi_device Device, int FileDescriptor, + pi_interop_semaphore_handle *RetHandle) { + return pi2ur::piextImportExternalSemaphoreOpaqueFD(Context, Device, + FileDescriptor, RetHandle); +} + +__SYCL_EXPORT pi_result +piextDestroyExternalSemaphore(pi_context Context, pi_device Device, + pi_interop_semaphore_handle SemHandle) { + return pi2ur::piextDestroyExternalSemaphore(Context, Device, SemHandle); +} + +__SYCL_EXPORT pi_result piextWaitExternalSemaphore( + pi_queue Queue, pi_interop_semaphore_handle SemHandle, + pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, + pi_event *Event) { + return pi2ur::piextWaitExternalSemaphore( + Queue, SemHandle, NumEventsInWaitList, EventWaitList, Event); +} + +__SYCL_EXPORT pi_result piextSignalExternalSemaphore( + pi_queue Queue, pi_interop_semaphore_handle SemHandle, + pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, + pi_event *Event) { + return pi2ur::piextSignalExternalSemaphore( + Queue, SemHandle, NumEventsInWaitList, EventWaitList, Event); +} + // This interface is not in Unified Runtime currently __SYCL_EXPORT pi_result piTearDown(void *PluginParameter) { return pi2ur::piTearDown(PluginParameter); diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/common.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/common.cpp index 031453ffbd101..756b6ae52e4a3 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/common.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/common.cpp @@ -62,6 +62,29 @@ ur_result_t checkErrorUR(CUresult Result, const char *Function, int Line, throw mapErrorUR(Result); } +ur_result_t checkErrorUR(ur_result_t Result, const char *Function, int Line, + const char *File) { + if (Result == UR_RESULT_SUCCESS) { + return UR_RESULT_SUCCESS; + } + + if (std::getenv("SYCL_PI_SUPPRESS_ERROR_MESSAGE") == nullptr) { + std::stringstream SS; + SS << "\nUR ERROR:" + << "\n\tValue: " << Result + << "\n\tFunction: " << Function << "\n\tSource Location: " << File + << ":" << Line << "\n" + << std::endl; + std::cerr << SS.str(); + } + + if (std::getenv("PI_CUDA_ABORT") != nullptr) { + std::abort(); + } + + throw Result; +} + std::string getCudaVersionString() { int driver_version = 0; cuDriverGetVersion(&driver_version); diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/common.hpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/common.hpp index db428a7441a46..69e9ac9b37dec 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/common.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/common.hpp @@ -22,6 +22,9 @@ ur_result_t mapErrorUR(CUresult Result); ur_result_t checkErrorUR(CUresult Result, const char *Function, int Line, const char *File); +ur_result_t checkErrorUR(ur_result_t Result, const char *Function, int Line, + const char *File); + #define UR_CHECK_ERROR(Result) \ checkErrorUR(Result, __func__, __LINE__, __FILE__) diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/device.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/device.cpp index 98ee2f1fbad54..9144f5f5f0eec 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/device.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/device.cpp @@ -871,6 +871,89 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES: { return ReturnValue(int32_t{1}); } + case UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP: { + // On CUDA bindless images are supported. + return ReturnValue(true); + } + case UR_DEVICE_INFO_BINDLESS_IMAGES_SHARED_USM_SUPPORT_EXP: { + // On CUDA bindless images can be backed by shared (managed) USM. + return ReturnValue(true); + } + case UR_DEVICE_INFO_BINDLESS_IMAGES_1D_USM_SUPPORT_EXP: { + // On CUDA 1D bindless image USM is not supported. + // More specifically, linear filtering is not supported. + return ReturnValue(false); + } + case UR_DEVICE_INFO_BINDLESS_IMAGES_2D_USM_SUPPORT_EXP: { + // On CUDA 2D bindless image USM is supported. + return ReturnValue(true); + } + case UR_DEVICE_INFO_IMAGE_PITCH_ALIGN_EXP: { + int32_t tex_pitch_align = 0; + detail::ur::assertion( + cuDeviceGetAttribute(&tex_pitch_align, + CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, + hDevice->get()) == CUDA_SUCCESS); + return ReturnValue(tex_pitch_align); + } + case UR_DEVICE_INFO_MAX_IMAGE_LINEAR_WIDTH_EXP: { + int32_t tex_max_linear_width = 0; + detail::ur::assertion( + cuDeviceGetAttribute(&tex_max_linear_width, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH, + hDevice->get()) == CUDA_SUCCESS); + return ReturnValue(tex_max_linear_width); + } + case UR_DEVICE_INFO_MAX_IMAGE_LINEAR_HEIGHT_EXP: { + int32_t tex_max_linear_height = 0; + detail::ur::assertion( + cuDeviceGetAttribute( + &tex_max_linear_height, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT, + hDevice->get()) == CUDA_SUCCESS); + return ReturnValue(tex_max_linear_height); + } + case UR_DEVICE_INFO_MAX_IMAGE_LINEAR_PITCH_EXP: { + int32_t tex_max_linear_pitch = 0; + detail::ur::assertion( + cuDeviceGetAttribute(&tex_max_linear_pitch, + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH, + hDevice->get()) == CUDA_SUCCESS); + return ReturnValue(tex_max_linear_pitch); + } + case UR_DEVICE_INFO_MIPMAP_SUPPORT_EXP: { + // CUDA supports mipmaps. + return ReturnValue(true); + } + case UR_DEVICE_INFO_MIPMAP_ANISOTROPY_SUPPORT_EXP: { + // CUDA supports anisotropic filtering. + return ReturnValue(true); + } + case UR_DEVICE_INFO_MIPMAP_MAX_ANISOTROPY_EXP: { + // CUDA has no query for this, but documentation states max value is 16. + return ReturnValue(16.f); + } + case UR_DEVICE_INFO_MIPMAP_LEVEL_REFERENCE_SUPPORT_EXP: { + // CUDA supports creation of images from individual mipmap levels. + return ReturnValue(true); + } + + case UR_DEVICE_INFO_INTEROP_MEMORY_IMPORT_SUPPORT_EXP: { + // CUDA supports importing external memory. + return ReturnValue(true); + } + case UR_DEVICE_INFO_INTEROP_MEMORY_EXPORT_SUPPORT_EXP: { + // CUDA does not support exporting it's own device memory. + return ReturnValue(false); + } + case UR_DEVICE_INFO_INTEROP_SEMAPHORE_IMPORT_SUPPORT_EXP: { + // CUDA supports importing external semaphores. + return ReturnValue(true); + } + case UR_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP: { + // CUDA does not support exporting semaphores or events. + return ReturnValue(false); + } case UR_DEVICE_INFO_DEVICE_ID: { int Value = 0; detail::ur::assertion( diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/enqueue.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/enqueue.cpp index b411f8ce98a65..3136a3b6c4bff 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/enqueue.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/enqueue.cpp @@ -6,6 +6,7 @@ // //===-----------------------------------------------------------------===// +#include "enqueue.hpp" #include "common.hpp" #include "context.hpp" #include "event.hpp" diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/enqueue.hpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/enqueue.hpp new file mode 100644 index 0000000000000..393085ce42eb2 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/enqueue.hpp @@ -0,0 +1,16 @@ +//===--------- enqueue.hpp - CUDA Adapter ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// +#pragma once + +#include +#include +#include + +ur_result_t enqueueEventsWait(ur_queue_handle_t CommandQueue, CUstream Stream, + uint32_t NumEventsInWaitList, + const ur_event_handle_t *EventWaitList); diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/image.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/image.cpp new file mode 100644 index 0000000000000..85fbdd092d941 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/image.cpp @@ -0,0 +1,960 @@ +//===--------- image.cpp - CUDA Adapter ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include + +#include "common.hpp" +#include "context.hpp" +#include "enqueue.hpp" +#include "event.hpp" +#include "image.hpp" +#include "memory.hpp" +#include "queue.hpp" +#include "sampler.hpp" +#include "ur/ur.hpp" +#include "ur_api.h" + +ur_result_t urCalculateNumChannels(ur_image_channel_order_t order, + unsigned int *NumChannels) { + switch (order) { + case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_A: + case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_R: + *NumChannels = 1; + return UR_RESULT_SUCCESS; + case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_RG: + case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_RA: + *NumChannels = 2; + return UR_RESULT_SUCCESS; + case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_RGB: + return UR_RESULT_ERROR_IMAGE_FORMAT_NOT_SUPPORTED; + case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_RGBA: + case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_ARGB: + case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_BGRA: + case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_ABGR: + *NumChannels = 4; + return UR_RESULT_SUCCESS; + case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_RX: + case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_RGX: + case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_RGBX: + case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_SRGBA: + case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_INTENSITY: + case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_LUMINANCE: + default: + return UR_RESULT_ERROR_IMAGE_FORMAT_NOT_SUPPORTED; + } +} + +/// Convert a UR image format to a CUDA image format and +/// get the pixel size in bytes. +/// /param image_channel_type is the ur_image_channel_type_t. +/// /param return_cuda_format will be set to the equivalent cuda +/// format if not nullptr. +/// /param return_pixel_types_size_bytes will be set to the pixel +/// byte size if not nullptr. +ur_result_t +urToCudaImageChannelFormat(ur_image_channel_type_t image_channel_type, + CUarray_format *return_cuda_format, + size_t *return_pixel_types_size_bytes) { + + CUarray_format cuda_format; + size_t PixelTypeSizeBytes; + + switch (image_channel_type) { +#define CASE(FROM, TO, SIZE) \ + case FROM: { \ + cuda_format = TO; \ + PixelTypeSizeBytes = SIZE; \ + break; \ + } + CASE(UR_IMAGE_CHANNEL_TYPE_UNORM_INT8, CU_AD_FORMAT_UNORM_INT8X1, 1) + CASE(UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8, CU_AD_FORMAT_UNSIGNED_INT8, 1) + CASE(UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8, CU_AD_FORMAT_SIGNED_INT8, 1) + CASE(UR_IMAGE_CHANNEL_TYPE_UNORM_INT16, CU_AD_FORMAT_UNORM_INT16X1, 2) + CASE(UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16, CU_AD_FORMAT_UNSIGNED_INT16, 2) + CASE(UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16, CU_AD_FORMAT_SIGNED_INT16, 2) + CASE(UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT, CU_AD_FORMAT_HALF, 2) + CASE(UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32, CU_AD_FORMAT_UNSIGNED_INT32, 4) + CASE(UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32, CU_AD_FORMAT_SIGNED_INT32, 4) + CASE(UR_IMAGE_CHANNEL_TYPE_FLOAT, CU_AD_FORMAT_FLOAT, 4) +#undef CASE + default: + return UR_RESULT_ERROR_IMAGE_FORMAT_NOT_SUPPORTED; + } + + if (return_cuda_format) { + *return_cuda_format = cuda_format; + } + if (return_pixel_types_size_bytes) { + *return_pixel_types_size_bytes = PixelTypeSizeBytes; + } + return UR_RESULT_SUCCESS; +} + +ur_result_t +cudaToUrImageChannelFormat(CUarray_format cuda_format, + ur_image_channel_type_t *return_image_channel_type) { + + switch (cuda_format) { +#define CUDA_TO_UR_IMAGE_CHANNEL_TYPE(FROM, TO) \ + case FROM: { \ + *return_image_channel_type = TO; \ + return UR_RESULT_SUCCESS; \ + } + CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_UNSIGNED_INT8, + UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8); + CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_UNSIGNED_INT16, + UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16); + CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_UNSIGNED_INT32, + UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32); + CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_SIGNED_INT8, + UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8); + CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_SIGNED_INT16, + UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16); + CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_SIGNED_INT32, + UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32); + CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_HALF, + UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT); + CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_FLOAT, + UR_IMAGE_CHANNEL_TYPE_FLOAT); + CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_UNORM_INT8X1, + UR_IMAGE_CHANNEL_TYPE_UNORM_INT8); + CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_UNORM_INT16X1, + UR_IMAGE_CHANNEL_TYPE_UNORM_INT16); +#undef MAP + default: + return UR_RESULT_ERROR_IMAGE_FORMAT_NOT_SUPPORTED; + } +} + +ur_result_t urTextureCreate(ur_context_handle_t hContext, + ur_sampler_handle_t hSampler, + const ur_image_desc_t *pImageDesc, + CUDA_RESOURCE_DESC ResourceDesc, + ur_exp_image_handle_t *phRetImage) { + + try { + /// pi_sampler_properties + /// | | + /// ----------------------------------- + /// | 31 30 ... 6 | N/A + /// | 5 | mip filter mode + /// | 4 3 2 | addressing mode + /// | 1 | filter mode + /// | 0 | normalize coords + CUDA_TEXTURE_DESC ImageTexDesc = {}; + CUaddress_mode AddrMode; + ur_sampler_addressing_mode_t AddrModeProp = hSampler->getAddressingMode(); + if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE - + UR_SAMPLER_ADDRESSING_MODE_NONE)) { + AddrMode = CU_TR_ADDRESS_MODE_CLAMP; + } else if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_CLAMP - + UR_SAMPLER_ADDRESSING_MODE_NONE)) { + AddrMode = CU_TR_ADDRESS_MODE_BORDER; + } else if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_REPEAT - + UR_SAMPLER_ADDRESSING_MODE_NONE)) { + AddrMode = CU_TR_ADDRESS_MODE_WRAP; + } else if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT - + UR_SAMPLER_ADDRESSING_MODE_NONE)) { + AddrMode = CU_TR_ADDRESS_MODE_MIRROR; + } + CUfilter_mode FilterMode; + ur_sampler_filter_mode_t FilterModeProp = hSampler->getFilterMode(); + FilterMode = + FilterModeProp ? CU_TR_FILTER_MODE_LINEAR : CU_TR_FILTER_MODE_POINT; + ImageTexDesc.filterMode = FilterMode; + + // Mipmap attributes + CUfilter_mode MipFilterMode; + ur_sampler_filter_mode_t MipFilterModeProp = hSampler->getMipFilterMode(); + MipFilterMode = + MipFilterModeProp ? CU_TR_FILTER_MODE_LINEAR : CU_TR_FILTER_MODE_POINT; + ImageTexDesc.mipmapFilterMode = MipFilterMode; + ImageTexDesc.maxMipmapLevelClamp = hSampler->MaxMipmapLevelClamp; + ImageTexDesc.minMipmapLevelClamp = hSampler->MinMipmapLevelClamp; + ImageTexDesc.maxAnisotropy = hSampler->MaxAnisotropy; + + // The address modes can interfere with other dimensionsenqueueEventsWait + // e.g. 1D texture sampling can be interfered with when setting other + // dimension address modes despite their nonexistence. + ImageTexDesc.addressMode[0] = AddrMode; // 1D + ImageTexDesc.addressMode[1] = + pImageDesc->height > 0 ? AddrMode : ImageTexDesc.addressMode[1]; // 2D + ImageTexDesc.addressMode[2] = + pImageDesc->depth > 0 ? AddrMode : ImageTexDesc.addressMode[2]; // 3D + + // flags takes the normalized coordinates setting -- unnormalized is default + ImageTexDesc.flags = (hSampler->isNormalizedCoords()) + ? CU_TRSF_NORMALIZED_COORDINATES + : ImageTexDesc.flags; + + // CUDA default promotes 8-bit and 16-bit integers to float between [0,1] + // This flag prevents this behaviour. + ImageTexDesc.flags |= CU_TRSF_READ_AS_INTEGER; + + CUtexObject Texture; + UR_CHECK_ERROR( + cuTexObjectCreate(&Texture, &ResourceDesc, &ImageTexDesc, nullptr)); + *phRetImage = (ur_exp_image_handle_t)Texture; + } catch (ur_result_t Err) { + return Err; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urUSMPitchedAllocExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t pool, + size_t widthInBytes, size_t height, size_t elementSizeBytes, void **ppMem, + size_t *pResultPitch) { + UR_ASSERT((hContext->getDevice()->get() == hDevice->get()), + UR_RESULT_ERROR_INVALID_CONTEXT); + std::ignore = pUSMDesc; + std::ignore = pool; + + UR_ASSERT((widthInBytes > 0), UR_RESULT_ERROR_INVALID_VALUE); + UR_ASSERT((height > 0), UR_RESULT_ERROR_INVALID_VALUE); + UR_ASSERT((elementSizeBytes > 0), UR_RESULT_ERROR_INVALID_VALUE); + + // elementSizeBytes can only take on values of 4, 8, or 16. + // small data types need to be minimised to 4. + if (elementSizeBytes < 4) { + elementSizeBytes = 4; + } + UR_ASSERT((elementSizeBytes == 4 || elementSizeBytes == 8 || + elementSizeBytes == 16), + UR_RESULT_ERROR_INVALID_VALUE); + ur_result_t Result = UR_RESULT_SUCCESS; + try { + ScopedContext Active(hDevice->getContext()); + Result = + UR_CHECK_ERROR(cuMemAllocPitch((CUdeviceptr *)ppMem, pResultPitch, + widthInBytes, height, elementSizeBytes)); + } catch (ur_result_t error) { + Result = error; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + + return Result; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urBindlessImagesUnsampledImageHandleDestroyExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + ur_exp_image_handle_t hImage) { + UR_ASSERT((hContext->getDevice()->get() == hDevice->get()), + UR_RESULT_ERROR_INVALID_CONTEXT); + + return UR_CHECK_ERROR(cuSurfObjectDestroy((CUsurfObject)hImage)); +} + +UR_APIEXPORT ur_result_t UR_APICALL +urBindlessImagesSampledImageHandleDestroyExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + ur_exp_image_handle_t hImage) { + UR_ASSERT((hContext->getDevice()->get() == hDevice->get()), + UR_RESULT_ERROR_INVALID_CONTEXT); + + return UR_CHECK_ERROR(cuTexObjectDestroy((CUtexObject)hImage)); +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageAllocateExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + ur_exp_image_mem_handle_t *phImageMem) { + UR_ASSERT((hContext->getDevice()->get() == hDevice->get()), + UR_RESULT_ERROR_INVALID_CONTEXT); + + // Populate descriptor + CUDA_ARRAY3D_DESCRIPTOR array_desc = {}; + + UR_CHECK_ERROR(urCalculateNumChannels(pImageFormat->channelOrder, + &array_desc.NumChannels)); + + UR_CHECK_ERROR(urToCudaImageChannelFormat(pImageFormat->channelType, + &array_desc.Format, nullptr)); + + array_desc.Flags = 0; // No flags required + array_desc.Width = pImageDesc->width; + if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) { + array_desc.Height = 0; + array_desc.Depth = 0; + } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) { + array_desc.Height = pImageDesc->height; + array_desc.Depth = 0; + } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE3D) { + array_desc.Height = pImageDesc->height; + array_desc.Depth = pImageDesc->depth; + } + + ScopedContext Active(hDevice->getContext()); + + // Allocate a cuArray + if (pImageDesc->numMipLevel == 1) { + CUarray ImageArray; + + try { + UR_CHECK_ERROR(cuArray3DCreate(&ImageArray, &array_desc)); + *phImageMem = (ur_exp_image_mem_handle_t)ImageArray; + } catch (ur_result_t Err) { + cuArrayDestroy(ImageArray); + return Err; + } catch (...) { + cuArrayDestroy(ImageArray); + return UR_RESULT_ERROR_UNKNOWN; + } + } else // Allocate a cuMipmappedArray + { + CUmipmappedArray mip_array; + array_desc.Flags = CUDA_ARRAY3D_SURFACE_LDST; + + try { + UR_CHECK_ERROR(cuMipmappedArrayCreate(&mip_array, &array_desc, + pImageDesc->numMipLevel)); + *phImageMem = (ur_exp_image_mem_handle_t)mip_array; + } catch (ur_result_t Err) { + cuMipmappedArrayDestroy(mip_array); + return Err; + } catch (...) { + cuMipmappedArrayDestroy(mip_array); + return UR_RESULT_ERROR_UNKNOWN; + } + } + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageFreeExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_mem_handle_t hImageMem) { + UR_ASSERT((hContext->getDevice()->get() == hDevice->get()), + UR_RESULT_ERROR_INVALID_CONTEXT); + + ScopedContext Active(hDevice->getContext()); + try { + UR_CHECK_ERROR(cuArrayDestroy((CUarray)hImageMem)); + } catch (ur_result_t Err) { + return Err; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesUnsampledImageCreateExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_mem_handle_t hImageMem, const ur_image_format_t *pImageFormat, + const ur_image_desc_t *pImageDesc, ur_mem_handle_t *phMem, + ur_exp_image_handle_t *phImage) { + UR_ASSERT((hContext->getDevice()->get() == hDevice->get()), + UR_RESULT_ERROR_INVALID_CONTEXT); + + unsigned int NumChannels = 0; + UR_CHECK_ERROR( + urCalculateNumChannels(pImageFormat->channelOrder, &NumChannels)); + + CUarray_format format; + size_t PixelTypeSizeBytes; + UR_CHECK_ERROR(urToCudaImageChannelFormat(pImageFormat->channelType, &format, + &PixelTypeSizeBytes)); + + try { + + ScopedContext Active(hDevice->getContext()); + + CUDA_RESOURCE_DESC image_res_desc = {}; + + // We have a CUarray + image_res_desc.resType = CU_RESOURCE_TYPE_ARRAY; + image_res_desc.res.array.hArray = (CUarray)hImageMem; + + // We create surfaces in the unsampled images case as it conforms to how + // CUDA deals with unsampled images. + CUsurfObject surface; + UR_CHECK_ERROR(cuSurfObjectCreate(&surface, &image_res_desc)); + *phImage = (ur_exp_image_handle_t)surface; + + auto urMemObj = std::unique_ptr(new ur_mem_handle_t_{ + hContext, (CUarray)hImageMem, surface, pImageDesc->type}); + + if (urMemObj == nullptr) { + return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + *phMem = urMemObj.release(); + + } catch (ur_result_t Err) { + return Err; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_mem_handle_t hImageMem, const ur_image_format_t *pImageFormat, + const ur_image_desc_t *pImageDesc, ur_sampler_handle_t hSampler, + ur_mem_handle_t *phMem, ur_exp_image_handle_t *phImage) { + UR_ASSERT((hContext->getDevice()->get() == hDevice->get()), + UR_RESULT_ERROR_INVALID_CONTEXT); + + ScopedContext Active(hDevice->getContext()); + + unsigned int NumChannels = 0; + UR_CHECK_ERROR( + urCalculateNumChannels(pImageFormat->channelOrder, &NumChannels)); + + CUarray_format format; + size_t PixelTypeSizeBytes; + UR_CHECK_ERROR(urToCudaImageChannelFormat(pImageFormat->channelType, &format, + &PixelTypeSizeBytes)); + + try { + CUDA_RESOURCE_DESC image_res_desc = {}; + + unsigned int mem_type; + // If this function doesn't return successfully, we assume that hImageMem is + // a CUarray or CUmipmappedArray. If this function returns successfully, we + // check whether hImageMem is device memory (even managed memory isn't + // considered shared). + CUresult Err = cuPointerGetAttribute( + &mem_type, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, (CUdeviceptr)hImageMem); + if (Err != CUDA_SUCCESS) { + // We have a CUarray + if (pImageDesc->numMipLevel == 1) { + image_res_desc.resType = CU_RESOURCE_TYPE_ARRAY; + image_res_desc.res.array.hArray = (CUarray)hImageMem; + } + // We have a CUmipmappedArray + else { + image_res_desc.resType = CU_RESOURCE_TYPE_MIPMAPPED_ARRAY; + image_res_desc.res.mipmap.hMipmappedArray = (CUmipmappedArray)hImageMem; + } + } else if (mem_type == CU_MEMORYTYPE_DEVICE) { + // We have a USM pointer + if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) { + image_res_desc.resType = CU_RESOURCE_TYPE_LINEAR; + image_res_desc.res.linear.devPtr = (CUdeviceptr)hImageMem; + image_res_desc.res.linear.format = format; + image_res_desc.res.linear.numChannels = NumChannels; + image_res_desc.res.linear.sizeInBytes = + pImageDesc->width * PixelTypeSizeBytes * NumChannels; + } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) { + image_res_desc.resType = CU_RESOURCE_TYPE_PITCH2D; + image_res_desc.res.pitch2D.devPtr = (CUdeviceptr)hImageMem; + image_res_desc.res.pitch2D.format = format; + image_res_desc.res.pitch2D.numChannels = NumChannels; + image_res_desc.res.pitch2D.width = pImageDesc->width; + image_res_desc.res.pitch2D.height = pImageDesc->height; + image_res_desc.res.pitch2D.pitchInBytes = pImageDesc->rowPitch; + } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE3D) { + // Cannot create 3D image from USM. + return UR_RESULT_ERROR_INVALID_VALUE; + } + } else { + // Unknown image memory type. + return UR_RESULT_ERROR_INVALID_VALUE; + } + + UR_CHECK_ERROR(urTextureCreate(hContext, hSampler, pImageDesc, + image_res_desc, phImage)); + + auto urMemObj = std::unique_ptr(new ur_mem_handle_t_{ + hContext, (CUarray)hImageMem, (CUtexObject)*phImage, hSampler, + pImageDesc->type}); + + if (urMemObj == nullptr) { + return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + *phMem = urMemObj.release(); + } catch (ur_result_t Err) { + return Err; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( + ur_queue_handle_t hQueue, void *pDst, void *pSrc, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + ur_exp_image_copy_flags_t imageCopyFlags, ur_rect_offset_t srcOffset, + ur_rect_offset_t dstOffset, ur_rect_region_t copyExtent, + ur_rect_region_t hostExtent, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + UR_ASSERT((imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE || + imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST || + imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE), + UR_RESULT_ERROR_INVALID_VALUE); + + unsigned int NumChannels = 0; + size_t PixelTypeSizeBytes = 0; + + UR_CHECK_ERROR( + urCalculateNumChannels(pImageFormat->channelOrder, &NumChannels)); + + // We need to get this now in bytes for calculating the total image size + // later. + UR_CHECK_ERROR(urToCudaImageChannelFormat(pImageFormat->channelType, nullptr, + &PixelTypeSizeBytes)); + + size_t PixelSizeBytes = PixelTypeSizeBytes * NumChannels; + + try { + ScopedContext Active(hQueue->getContext()); + CUstream Stream = hQueue->getNextTransferStream(); + enqueueEventsWait(hQueue, Stream, numEventsInWaitList, phEventWaitList); + // We have to use a different copy function for each image dimensionality. + + if (imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE) { + if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) { + size_t CopyExtentBytes = PixelSizeBytes * copyExtent.width; + char *SrcWithOffset = (char *)pSrc + (srcOffset.x * PixelSizeBytes); + UR_CHECK_ERROR( + cuMemcpyHtoAAsync((CUarray)pDst, dstOffset.x * PixelSizeBytes, + (void *)SrcWithOffset, CopyExtentBytes, Stream)); + } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) { + CUDA_MEMCPY2D cpy_desc = {}; + cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST; + cpy_desc.srcHost = pSrc; + cpy_desc.srcXInBytes = srcOffset.x * PixelSizeBytes; + cpy_desc.srcY = srcOffset.y; + cpy_desc.dstXInBytes = dstOffset.x * PixelSizeBytes; + cpy_desc.dstY = dstOffset.y; + cpy_desc.srcPitch = hostExtent.width * PixelSizeBytes; + if (pImageDesc->rowPitch == 0) { + cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; + cpy_desc.dstArray = (CUarray)pDst; + } else { + // Pitched memory + cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_DEVICE; + cpy_desc.dstDevice = (CUdeviceptr)pDst; + cpy_desc.dstPitch = pImageDesc->rowPitch; + } + cpy_desc.WidthInBytes = PixelSizeBytes * copyExtent.width; + cpy_desc.Height = copyExtent.height; + UR_CHECK_ERROR(cuMemcpy2DAsync(&cpy_desc, Stream)); + } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE3D) { + CUDA_MEMCPY3D cpy_desc = {}; + cpy_desc.srcXInBytes = srcOffset.x * PixelSizeBytes; + cpy_desc.srcY = srcOffset.y; + cpy_desc.srcZ = srcOffset.z; + cpy_desc.dstXInBytes = dstOffset.x * PixelSizeBytes; + cpy_desc.dstY = dstOffset.y; + cpy_desc.dstZ = dstOffset.z; + cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST; + cpy_desc.srcHost = pSrc; + cpy_desc.srcPitch = hostExtent.width * PixelSizeBytes; + cpy_desc.srcHeight = hostExtent.height; + cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; + cpy_desc.dstArray = (CUarray)pDst; + cpy_desc.WidthInBytes = PixelSizeBytes * copyExtent.width; + cpy_desc.Height = copyExtent.height; + cpy_desc.Depth = copyExtent.depth; + UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream)); + } + } else if (imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) { + if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) { + size_t CopyExtentBytes = PixelSizeBytes * copyExtent.width; + size_t src_offset_bytes = PixelSizeBytes * srcOffset.x; + void *dst_with_offset = + (void *)((char *)pDst + (PixelSizeBytes * dstOffset.x)); + UR_CHECK_ERROR(cuMemcpyAtoHAsync(dst_with_offset, (CUarray)pSrc, + src_offset_bytes, CopyExtentBytes, + Stream)); + } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) { + CUDA_MEMCPY2D cpy_desc = {}; + cpy_desc.srcXInBytes = srcOffset.x; + cpy_desc.srcY = srcOffset.y; + cpy_desc.dstXInBytes = dstOffset.x; + cpy_desc.dstY = dstOffset.y; + cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST; + cpy_desc.dstHost = pDst; + if (pImageDesc->rowPitch == 0) { + cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; + cpy_desc.srcArray = (CUarray)pSrc; + } else { + // Pitched memory + cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_DEVICE; + cpy_desc.srcPitch = pImageDesc->rowPitch; + cpy_desc.srcDevice = (CUdeviceptr)pSrc; + } + cpy_desc.WidthInBytes = PixelSizeBytes * copyExtent.width; + cpy_desc.Height = copyExtent.height; + UR_CHECK_ERROR(cuMemcpy2DAsync(&cpy_desc, Stream)); + } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE3D) { + CUDA_MEMCPY3D cpy_desc = {}; + cpy_desc.srcXInBytes = srcOffset.x; + cpy_desc.srcY = srcOffset.y; + cpy_desc.srcZ = srcOffset.z; + cpy_desc.dstXInBytes = dstOffset.x; + cpy_desc.dstY = dstOffset.y; + cpy_desc.dstZ = dstOffset.z; + cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; + cpy_desc.srcArray = (CUarray)pSrc; + cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST; + cpy_desc.dstHost = pDst; + cpy_desc.WidthInBytes = PixelSizeBytes * copyExtent.width; + cpy_desc.Height = copyExtent.height; + cpy_desc.Depth = copyExtent.depth; + UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream)); + } + } else { + /// imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE + /// TODO: implemet device to device copy + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + if (phEvent) { + auto NewEvent = ur_event_handle_t_::makeNative(UR_COMMAND_MEM_IMAGE_COPY, + hQueue, Stream); + NewEvent->record(); + *phEvent = NewEvent; + } + } catch (ur_result_t Err) { + return Err; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageGetInfoExp( + ur_exp_image_mem_handle_t hImageMem, ur_image_info_t propName, + void *pPropValue, size_t *pPropSizeRet) { + + CUDA_ARRAY3D_DESCRIPTOR ArrayDesc; + UR_CHECK_ERROR(cuArray3DGetDescriptor(&ArrayDesc, (CUarray)hImageMem)); + switch (propName) { + case UR_IMAGE_INFO_WIDTH: + if (pPropValue) { + *(size_t *)pPropValue = ArrayDesc.Width; + } + if (pPropSizeRet) { + *pPropSizeRet = sizeof(size_t); + } + return UR_RESULT_SUCCESS; + case UR_IMAGE_INFO_HEIGHT: + if (pPropValue) { + *(size_t *)pPropValue = ArrayDesc.Height; + } + if (pPropSizeRet) { + *pPropSizeRet = sizeof(size_t); + } + return UR_RESULT_SUCCESS; + case UR_IMAGE_INFO_DEPTH: + if (pPropValue) { + *(size_t *)pPropValue = ArrayDesc.Depth; + } + if (pPropSizeRet) { + *pPropSizeRet = sizeof(size_t); + } + return UR_RESULT_SUCCESS; + case UR_IMAGE_INFO_FORMAT: + ur_image_channel_type_t ChannelType; + ur_image_channel_order_t ChannelOrder; + UR_CHECK_ERROR(cudaToUrImageChannelFormat(ArrayDesc.Format, &ChannelType)); + // CUDA does not have a notion of channel "order" in the same way that + // SYCL 1.2.1 does. + switch (ArrayDesc.NumChannels) { + case 1: + ChannelOrder = UR_IMAGE_CHANNEL_ORDER_R; + break; + case 2: + ChannelOrder = UR_IMAGE_CHANNEL_ORDER_RG; + break; + case 4: + ChannelOrder = UR_IMAGE_CHANNEL_ORDER_RGBA; + break; + } + if (pPropValue) { + ((ur_image_format_t *)pPropValue)->channelType = ChannelType; + ((ur_image_format_t *)pPropValue)->channelOrder = ChannelOrder; + } + if (pPropSizeRet) { + *pPropSizeRet = sizeof(ur_image_format_t); + } + return UR_RESULT_SUCCESS; + default: + return UR_RESULT_ERROR_INVALID_VALUE; + } +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMipmapGetLevelExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_mem_handle_t hImageMem, uint32_t mipmapLevel, + ur_exp_image_mem_handle_t *phImageMem) { + UR_ASSERT((hContext->getDevice()->get() == hDevice->get()), + UR_RESULT_ERROR_INVALID_CONTEXT); + + try { + ScopedContext Active(hDevice->getContext()); + CUarray ImageArray; + UR_CHECK_ERROR(cuMipmappedArrayGetLevel( + &ImageArray, (CUmipmappedArray)hImageMem, mipmapLevel)); + *phImageMem = (ur_exp_image_mem_handle_t)ImageArray; + } catch (ur_result_t Err) { + return Err; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMipmapFreeExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_mem_handle_t hMem) { + UR_ASSERT((hContext->getDevice()->get() == hDevice->get()), + UR_RESULT_ERROR_INVALID_CONTEXT); + + ScopedContext Active(hDevice->getContext()); + try { + UR_CHECK_ERROR(cuMipmappedArrayDestroy((CUmipmappedArray)hMem)); + } catch (ur_result_t Err) { + return Err; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportOpaqueFDExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, + ur_exp_interop_mem_desc_t *pInteropMemDesc, + ur_exp_interop_mem_handle_t *phInteropMem) { + UR_ASSERT((hContext->getDevice()->get() == hDevice->get()), + UR_RESULT_ERROR_INVALID_CONTEXT); + + try { + ScopedContext Active(hDevice->getContext()); + + CUDA_EXTERNAL_MEMORY_HANDLE_DESC extMemDesc = {}; + extMemDesc.size = size; + + void *pNext = const_cast(pInteropMemDesc->pNext); + while (pNext != nullptr) { + const ur_base_desc_t *BaseDesc = + reinterpret_cast(pNext); + if (BaseDesc->stype == UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR) { + const ur_exp_file_descriptor_t *FileDescriptor = + reinterpret_cast(pNext); + + extMemDesc.handle.fd = FileDescriptor->fd; + extMemDesc.type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD; + } else if (BaseDesc->stype == UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + pNext = const_cast(BaseDesc->pNext); + } + + CUexternalMemory extMem; + UR_CHECK_ERROR(cuImportExternalMemory(&extMem, &extMemDesc)); + *phInteropMem = (ur_exp_interop_mem_handle_t)extMem; + + } catch (ur_result_t Err) { + return Err; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + ur_exp_interop_mem_handle_t hInteropMem, + ur_exp_image_mem_handle_t *phImageMem) { + UR_ASSERT((hContext->getDevice()->get() == hDevice->get()), + UR_RESULT_ERROR_INVALID_CONTEXT); + + unsigned int NumChannels = 0; + UR_CHECK_ERROR( + urCalculateNumChannels(pImageFormat->channelOrder, &NumChannels)); + + CUarray_format format; + UR_CHECK_ERROR( + urToCudaImageChannelFormat(pImageFormat->channelType, &format, nullptr)); + + try { + ScopedContext Active(hDevice->getContext()); + + CUDA_ARRAY3D_DESCRIPTOR ArrayDesc = {}; + ArrayDesc.Width = pImageDesc->width; + ArrayDesc.Height = pImageDesc->height; + ArrayDesc.Depth = pImageDesc->depth; + ArrayDesc.NumChannels = NumChannels; + ArrayDesc.Format = format; + + CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC mipmapDesc = {}; + mipmapDesc.numLevels = 1; + mipmapDesc.arrayDesc = ArrayDesc; + + CUmipmappedArray memMipMap; + UR_CHECK_ERROR(cuExternalMemoryGetMappedMipmappedArray( + &memMipMap, (CUexternalMemory)hInteropMem, &mipmapDesc)); + + CUarray memArray; + UR_CHECK_ERROR(cuMipmappedArrayGetLevel(&memArray, memMipMap, 0)); + + *phImageMem = (ur_exp_image_mem_handle_t)memArray; + + } catch (ur_result_t Err) { + return Err; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseInteropExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_interop_mem_handle_t hInteropMem) { + UR_ASSERT((hContext->getDevice()->get() == hDevice->get()), + UR_RESULT_ERROR_INVALID_CONTEXT); + + try { + ScopedContext Active(hDevice->getContext()); + UR_CHECK_ERROR(cuDestroyExternalMemory((CUexternalMemory)hInteropMem)); + } catch (ur_result_t Err) { + return Err; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urBindlessImagesImportExternalSemaphoreOpaqueFDExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_interop_semaphore_desc_t *pInteropSemaphoreDesc, + ur_exp_interop_semaphore_handle_t *phInteropSemaphoreHandle) { + UR_ASSERT((hContext->getDevice()->get() == hDevice->get()), + UR_RESULT_ERROR_INVALID_CONTEXT); + + try { + ScopedContext Active(hDevice->getContext()); + + CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC extSemDesc = {}; + + void *pNext = const_cast(pInteropSemaphoreDesc->pNext); + while (pNext != nullptr) { + const ur_base_desc_t *BaseDesc = + reinterpret_cast(pNext); + if (BaseDesc->stype == UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR) { + const ur_exp_file_descriptor_t *FileDescriptor = + reinterpret_cast(pNext); + + extSemDesc.handle.fd = FileDescriptor->fd; + extSemDesc.type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD; + } else if (BaseDesc->stype == UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + pNext = const_cast(BaseDesc->pNext); + } + + CUexternalSemaphore semaphore; + UR_CHECK_ERROR(cuImportExternalSemaphore(&semaphore, &extSemDesc)); + + *phInteropSemaphoreHandle = (ur_exp_interop_semaphore_handle_t)semaphore; + } catch (ur_result_t Err) { + return Err; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesDestroyExternalSemaphoreExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_interop_semaphore_handle_t hInteropSemaphore) { + UR_ASSERT((hContext->getDevice()->get() == hDevice->get()), + UR_RESULT_ERROR_INVALID_CONTEXT); + + try { + ScopedContext Active(hDevice->getContext()); + UR_CHECK_ERROR( + cuDestroyExternalSemaphore((CUexternalSemaphore)hInteropSemaphore)); + } catch (ur_result_t Err) { + return Err; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp( + ur_queue_handle_t hQueue, ur_exp_interop_semaphore_handle_t hSemaphore, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + try { + ScopedContext Active(hQueue->getContext()); + CUstream Stream = hQueue->getNextTransferStream(); + + enqueueEventsWait(hQueue, Stream, numEventsInWaitList, phEventWaitList); + + CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS SemWaitParams = {}; + + // Wait for one external semaphore + UR_CHECK_ERROR(cuWaitExternalSemaphoresAsync( + (CUexternalSemaphore *)&hSemaphore, &SemWaitParams, 1 /* numExtSems */, + Stream)); + + if (phEvent) { + auto NewEvent = ur_event_handle_t_::makeNative( + UR_COMMAND_INTEROP_SEMAPHORE_WAIT_EXP, hQueue, Stream); + NewEvent->record(); + *phEvent = NewEvent; + } + } catch (ur_result_t Err) { + return Err; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( + ur_queue_handle_t hQueue, ur_exp_interop_semaphore_handle_t hSemaphore, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + try { + ScopedContext Active(hQueue->getContext()); + CUstream Stream = hQueue->getNextTransferStream(); + + enqueueEventsWait(hQueue, Stream, numEventsInWaitList, phEventWaitList); + + CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS SemSignalParams = {}; + + // Signal one external semaphore + UR_CHECK_ERROR(cuSignalExternalSemaphoresAsync( + (CUexternalSemaphore *)&hSemaphore, &SemSignalParams, + 1 /* numExtSems */, Stream)); + + if (phEvent) { + auto NewEvent = ur_event_handle_t_::makeNative( + UR_COMMAND_INTEROP_SEMAPHORE_SIGNAL_EXP, hQueue, Stream); + NewEvent->record(); + *phEvent = NewEvent; + } + } catch (ur_result_t Err) { + return Err; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + return UR_RESULT_SUCCESS; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/image.hpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/image.hpp new file mode 100644 index 0000000000000..35d71b01e2022 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/image.hpp @@ -0,0 +1,31 @@ +//===--------- image.hpp - CUDA Adapter ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// +#pragma once + +#include +#include +#include + +#include "common.hpp" +ur_result_t urCalculateNumChannels(ur_image_channel_order_t order, + unsigned int *num_channels); + +ur_result_t +urToCudaImageChannelFormat(ur_image_channel_type_t image_channel_type, + CUarray_format *return_cuda_format, + size_t *return_pixel_types_size_bytes); + +ur_result_t +cudaToUrImageChannelFormat(CUarray_format cuda_format, + ur_image_channel_type_t *return_image_channel_type); + +ur_result_t urTextureCreate(ur_context_handle_t hContext, + ur_sampler_desc_t SamplerDesc, + const ur_image_desc_t *pImageDesc, + CUDA_RESOURCE_DESC ResourceDesc, + ur_exp_image_handle_t *phRetImage); diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/memory.hpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/memory.hpp index a986607a65d5e..0aa1e90f75ad6 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/memory.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/memory.hpp @@ -23,7 +23,7 @@ struct ur_mem_handle_t_ { /// Reference counting of the handler std::atomic_uint32_t RefCount; - enum class Type { Buffer, Surface } MemType; + enum class Type { Buffer, Surface, Texture } MemType; // Original mem flags passed ur_mem_flags_t MemFlags; @@ -122,6 +122,21 @@ struct ur_mem_handle_t_ { ur_mem_type_t getImageType() const noexcept { return ImageType; } } SurfaceMem; + + struct ImageMem { + CUarray Array; + void *Handle; + ur_mem_type_t ImageType; + ur_sampler_handle_t Sampler; + + CUarray get_array() const noexcept { return Array; } + + void *get_handle() const noexcept { return Handle; } + + ur_mem_type_t get_image_type() const noexcept { return ImageType; } + + ur_sampler_handle_t get_sampler() const noexcept { return Sampler; } + } ImageMem; } Mem; /// Constructs the UR mem handler for a non-typed allocation ("buffer") @@ -159,6 +174,30 @@ struct ur_mem_handle_t_ { urContextRetain(Context); } + /// Constructs the UR allocation for an unsampled image object + ur_mem_handle_t_(ur_context_handle_t Context, CUarray Array, + CUsurfObject Surf, ur_mem_type_t ImageType) + : Context{Context}, RefCount{1}, MemType{Type::Surface} { + + Mem.ImageMem.Array = Array; + Mem.ImageMem.Handle = (void *)Surf; + Mem.ImageMem.ImageType = ImageType; + Mem.ImageMem.Sampler = nullptr; + urContextRetain(Context); + } + + /// Constructs the UR allocation for a sampled image object + ur_mem_handle_t_(ur_context_handle_t Context, CUarray Array, CUtexObject Tex, + ur_sampler_handle_t Sampler, ur_mem_type_t ImageType) + : Context{Context}, RefCount{1}, MemType{Type::Texture} { + + Mem.ImageMem.Array = Array; + Mem.ImageMem.Handle = (void *)Tex; + Mem.ImageMem.ImageType = ImageType; + Mem.ImageMem.Sampler = Sampler; + urContextRetain(Context); + } + ~ur_mem_handle_t_() { if (isBuffer() && isSubBuffer()) { urMemRelease(Mem.BufferMem.Parent); diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/sampler.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/sampler.cpp index 1de5786ca2e03..0fe6e2c0a3cea 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/sampler.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/sampler.cpp @@ -25,6 +25,21 @@ urSamplerCreate(ur_context_handle_t hContext, const ur_sampler_desc_t *pDesc, Sampler->Props |= UR_SAMPLER_ADDRESSING_MODE_CLAMP << 2; } + void *pNext = const_cast(pDesc->pNext); + while (pNext != nullptr) { + const ur_base_desc_t *BaseDesc = + reinterpret_cast(pNext); + if (BaseDesc->stype == UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES) { + const ur_exp_sampler_mip_properties_t *SamplerMipProperties = + reinterpret_cast(pNext); + Sampler->MaxMipmapLevelClamp = SamplerMipProperties->maxMipmapLevelClamp; + Sampler->MinMipmapLevelClamp = SamplerMipProperties->minMipmapLevelClamp; + Sampler->MaxAnisotropy = SamplerMipProperties->maxAnisotropy; + Sampler->Props |= SamplerMipProperties->mipFilterMode << 5; + } + pNext = const_cast(BaseDesc->pNext); + } + *phSampler = Sampler.release(); return UR_RESULT_SUCCESS; } @@ -40,17 +55,15 @@ urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName, case UR_SAMPLER_INFO_CONTEXT: return ReturnValue(hSampler->Context); case UR_SAMPLER_INFO_NORMALIZED_COORDS: { - bool NormCoordsProp = static_cast(hSampler->Props); + bool NormCoordsProp = hSampler->isNormalizedCoords(); return ReturnValue(NormCoordsProp); } case UR_SAMPLER_INFO_FILTER_MODE: { - auto FilterProp = - static_cast((hSampler->Props >> 1) & 0x1); + ur_sampler_filter_mode_t FilterProp = hSampler->getFilterMode(); return ReturnValue(FilterProp); } case UR_SAMPLER_INFO_ADDRESSING_MODE: { - auto AddressingProp = - static_cast(hSampler->Props >> 2); + ur_sampler_addressing_mode_t AddressingProp = hSampler->getAddressingMode(); return ReturnValue(AddressingProp); } default: diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/sampler.hpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/sampler.hpp index 6dbbb124ffc3e..47af74296cb95 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/sampler.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/sampler.hpp @@ -11,19 +11,44 @@ /// Implementation of samplers for CUDA /// /// Sampler property layout: -/// | 31 30 ... 6 5 | 4 3 2 | 1 | 0 | -/// | N/A | addressing mode | fiter mode | normalize coords | +/// | | +/// ----------------------------------- +/// | 31 30 ... 6 | N/A +/// | 5 | mip filter mode +/// | 4 3 2 | addressing mode +/// | 1 | filter mode +/// | 0 | normalize coords struct ur_sampler_handle_t_ { std::atomic_uint32_t RefCount; uint32_t Props; + float MinMipmapLevelClamp; + float MaxMipmapLevelClamp; + float MaxAnisotropy; ur_context_handle_t Context; ur_sampler_handle_t_(ur_context_handle_t Context) - : RefCount(1), Props(0), Context(Context) {} + : RefCount(1), Props(0), MinMipmapLevelClamp(0.0f), + MaxMipmapLevelClamp(0.0f), MaxAnisotropy(0.0f), Context(Context) {} uint32_t incrementReferenceCount() noexcept { return ++RefCount; } uint32_t decrementReferenceCount() noexcept { return --RefCount; } uint32_t getReferenceCount() const noexcept { return RefCount; } + + ur_bool_t isNormalizedCoords() const noexcept { + return static_cast(Props & 0b1); + } + + ur_sampler_filter_mode_t getFilterMode() const noexcept { + return static_cast((Props >> 1) & 0b1); + } + + ur_sampler_addressing_mode_t getAddressingMode() const noexcept { + return static_cast((Props >> 2) & 0b111); + } + + ur_sampler_filter_mode_t getMipFilterMode() const noexcept { + return static_cast((Props >> 5) & 0b1); + } }; diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/ur_interface_loader.cpp index 9c5934c0ae9c1..119bde5955f5c 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/ur_interface_loader.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/ur_interface_loader.cpp @@ -301,6 +301,49 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( return retVal; } +UR_DLLEXPORT ur_result_t UR_APICALL urGetBindlessImagesExpProcAddrTable( + ur_api_version_t version, ur_bindless_images_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + pDdiTable->pfnUnsampledImageHandleDestroyExp = + urBindlessImagesUnsampledImageHandleDestroyExp; + pDdiTable->pfnSampledImageHandleDestroyExp = + urBindlessImagesSampledImageHandleDestroyExp; + pDdiTable->pfnImageAllocateExp = urBindlessImagesImageAllocateExp; + pDdiTable->pfnImageFreeExp = urBindlessImagesImageFreeExp; + pDdiTable->pfnUnsampledImageCreateExp = + urBindlessImagesUnsampledImageCreateExp; + pDdiTable->pfnSampledImageCreateExp = urBindlessImagesSampledImageCreateExp; + pDdiTable->pfnImageCopyExp = urBindlessImagesImageCopyExp; + pDdiTable->pfnImageGetInfoExp = urBindlessImagesImageGetInfoExp; + pDdiTable->pfnMipmapGetLevelExp = urBindlessImagesMipmapGetLevelExp; + pDdiTable->pfnMipmapFreeExp = urBindlessImagesMipmapFreeExp; + pDdiTable->pfnImportOpaqueFDExp = urBindlessImagesImportOpaqueFDExp; + pDdiTable->pfnMapExternalArrayExp = urBindlessImagesMapExternalArrayExp; + pDdiTable->pfnReleaseInteropExp = urBindlessImagesReleaseInteropExp; + pDdiTable->pfnImportExternalSemaphoreOpaqueFDExp = + urBindlessImagesImportExternalSemaphoreOpaqueFDExp; + pDdiTable->pfnDestroyExternalSemaphoreExp = + urBindlessImagesDestroyExternalSemaphoreExp; + pDdiTable->pfnWaitExternalSemaphoreExp = + urBindlessImagesWaitExternalSemaphoreExp; + pDdiTable->pfnSignalExternalSemaphoreExp = + urBindlessImagesSignalExternalSemaphoreExp; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable( + ur_api_version_t version, ur_usm_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + pDdiTable->pfnPitchedAllocExp = urUSMPitchedAllocExp; + return UR_RESULT_SUCCESS; +} + #if defined(__cplusplus) } // extern "C" #endif diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/image.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/image.cpp new file mode 100644 index 0000000000000..a2e55b9fb5c3a --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/image.cpp @@ -0,0 +1,251 @@ +//===--------- image.cpp - Level Zero Adapter ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "image.hpp" +#include "common.hpp" + +UR_APIEXPORT ur_result_t UR_APICALL urUSMPitchedAllocExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t pool, + size_t widthInBytes, size_t height, size_t elementSizeBytes, void **ppMem, + size_t *pResultPitch) { + std::ignore = hContext; + std::ignore = hDevice; + std::ignore = pUSMDesc; + std::ignore = pool; + std::ignore = widthInBytes; + std::ignore = height; + std::ignore = elementSizeBytes; + std::ignore = ppMem; + std::ignore = pResultPitch; + urPrint("[UR][L0] %s function not implemented!\n", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urBindlessImagesUnsampledImageHandleDestroyExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + ur_exp_image_handle_t hImage) { + std::ignore = hContext; + std::ignore = hDevice; + std::ignore = hImage; + urPrint("[UR][L0] %s function not implemented!\n", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urBindlessImagesSampledImageHandleDestroyExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + ur_exp_image_handle_t hImage) { + std::ignore = hContext; + std::ignore = hDevice; + std::ignore = hImage; + urPrint("[UR][L0] %s function not implemented!\n", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageAllocateExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + ur_exp_image_mem_handle_t *phImageMem) { + std::ignore = hContext; + std::ignore = hDevice; + std::ignore = pImageFormat; + std::ignore = pImageDesc; + std::ignore = phImageMem; + urPrint("[UR][L0] %s function not implemented!\n", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageFreeExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_mem_handle_t hImageMem) { + std::ignore = hContext; + std::ignore = hDevice; + std::ignore = hImageMem; + urPrint("[UR][L0] %s function not implemented!\n", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesUnsampledImageCreateExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_mem_handle_t hImageMem, const ur_image_format_t *pImageFormat, + const ur_image_desc_t *pImageDesc, ur_mem_handle_t *phMem, + ur_exp_image_handle_t *phImage) { + std::ignore = hContext; + std::ignore = hDevice; + std::ignore = hImageMem; + std::ignore = pImageFormat; + std::ignore = pImageDesc; + std::ignore = phMem; + std::ignore = phImage; + urPrint("[UR][L0] %s function not implemented!\n", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_mem_handle_t hImageMem, const ur_image_format_t *pImageFormat, + const ur_image_desc_t *pImageDesc, ur_sampler_handle_t hSampler, + ur_mem_handle_t *phMem, ur_exp_image_handle_t *phImage) { + std::ignore = hContext; + std::ignore = hDevice; + std::ignore = hImageMem; + std::ignore = pImageFormat; + std::ignore = pImageDesc; + std::ignore = hSampler; + std::ignore = phMem; + std::ignore = phImage; + urPrint("[UR][L0] %s function not implemented!\n", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( + ur_queue_handle_t hQueue, void *pDst, void *pSrc, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + ur_exp_image_copy_flags_t imageCopyFlags, ur_rect_offset_t srcOffset, + ur_rect_offset_t dstOffset, ur_rect_region_t copyExtent, + ur_rect_region_t hostExtent, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hQueue; + std::ignore = pDst; + std::ignore = pSrc; + std::ignore = pImageFormat; + std::ignore = pImageDesc; + std::ignore = imageCopyFlags; + std::ignore = srcOffset; + std::ignore = dstOffset; + std::ignore = copyExtent; + std::ignore = hostExtent; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + urPrint("[UR][L0] %s function not implemented!\n", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageGetInfoExp( + ur_exp_image_mem_handle_t hImageMem, ur_image_info_t propName, + void *pPropValue, size_t *pPropSizeRet) { + std::ignore = hImageMem; + std::ignore = propName; + std::ignore = pPropValue; + std::ignore = pPropSizeRet; + urPrint("[UR][L0] %s function not implemented!\n", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMipmapGetLevelExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_mem_handle_t hImageMem, uint32_t mipmapLevel, + ur_exp_image_mem_handle_t *phImageMem) { + std::ignore = hContext; + std::ignore = hDevice; + std::ignore = hImageMem; + std::ignore = mipmapLevel; + std::ignore = phImageMem; + urPrint("[UR][L0] %s function not implemented!\n", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMipmapFreeExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_mem_handle_t hMem) { + std::ignore = hContext; + std::ignore = hDevice; + std::ignore = hMem; + urPrint("[UR][L0] %s function not implemented!\n", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportOpaqueFDExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, + ur_exp_interop_mem_desc_t *pInteropMemDesc, + ur_exp_interop_mem_handle_t *phInteropMem) { + std::ignore = hContext; + std::ignore = hDevice; + std::ignore = size; + std::ignore = pInteropMemDesc; + std::ignore = phInteropMem; + urPrint("[UR][L0] %s function not implemented!\n", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + ur_exp_interop_mem_handle_t hInteropMem, + ur_exp_image_mem_handle_t *phImageMem) { + std::ignore = hContext; + std::ignore = hDevice; + std::ignore = pImageFormat; + std::ignore = pImageDesc; + std::ignore = hInteropMem; + std::ignore = phImageMem; + urPrint("[UR][L0] %s function not implemented!\n", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseInteropExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_interop_mem_handle_t hInteropMem) { + std::ignore = hContext; + std::ignore = hDevice; + std::ignore = hInteropMem; + urPrint("[UR][L0] %s function not implemented!\n", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urBindlessImagesImportExternalSemaphoreOpaqueFDExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_interop_semaphore_desc_t *pInteropSemaphoreDesc, + ur_exp_interop_semaphore_handle_t *phInteropSemaphoreHandle) { + std::ignore = hContext; + std::ignore = hDevice; + std::ignore = pInteropSemaphoreDesc; + std::ignore = phInteropSemaphoreHandle; + urPrint("[UR][L0] %s function not implemented!\n", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesDestroyExternalSemaphoreExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_interop_semaphore_handle_t hInteropSemaphore) { + std::ignore = hContext; + std::ignore = hDevice; + std::ignore = hInteropSemaphore; + urPrint("[UR][L0] %s function not implemented!\n", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp( + ur_queue_handle_t hQueue, ur_exp_interop_semaphore_handle_t hSemaphore, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = hQueue; + std::ignore = hSemaphore; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + urPrint("[UR][L0] %s function not implemented!\n", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( + ur_queue_handle_t hQueue, ur_exp_interop_semaphore_handle_t hSemaphore, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = hQueue; + std::ignore = hSemaphore; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + urPrint("[UR][L0] %s function not implemented!\n", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/image.hpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/image.hpp new file mode 100644 index 0000000000000..9ef417318aa00 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/image.hpp @@ -0,0 +1,13 @@ +//===--------- image.hpp - Level Zero Adapter ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// +#pragma once + +#include +#include +#include +#include diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_interface_loader.cpp index d56448ca35e12..9c330b5b20bfb 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_interface_loader.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_interface_loader.cpp @@ -343,3 +343,46 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( return retVal; } + +UR_DLLEXPORT ur_result_t UR_APICALL urGetBindlessImagesExpProcAddrTable( + ur_api_version_t version, ur_bindless_images_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + pDdiTable->pfnUnsampledImageHandleDestroyExp = + urBindlessImagesUnsampledImageHandleDestroyExp; + pDdiTable->pfnSampledImageHandleDestroyExp = + urBindlessImagesSampledImageHandleDestroyExp; + pDdiTable->pfnImageAllocateExp = urBindlessImagesImageAllocateExp; + pDdiTable->pfnImageFreeExp = urBindlessImagesImageFreeExp; + pDdiTable->pfnUnsampledImageCreateExp = + urBindlessImagesUnsampledImageCreateExp; + pDdiTable->pfnSampledImageCreateExp = urBindlessImagesSampledImageCreateExp; + pDdiTable->pfnImageCopyExp = urBindlessImagesImageCopyExp; + pDdiTable->pfnImageGetInfoExp = urBindlessImagesImageGetInfoExp; + pDdiTable->pfnMipmapGetLevelExp = urBindlessImagesMipmapGetLevelExp; + pDdiTable->pfnMipmapFreeExp = urBindlessImagesMipmapFreeExp; + pDdiTable->pfnImportOpaqueFDExp = urBindlessImagesImportOpaqueFDExp; + pDdiTable->pfnMapExternalArrayExp = urBindlessImagesMapExternalArrayExp; + pDdiTable->pfnReleaseInteropExp = urBindlessImagesReleaseInteropExp; + pDdiTable->pfnImportExternalSemaphoreOpaqueFDExp = + urBindlessImagesImportExternalSemaphoreOpaqueFDExp; + pDdiTable->pfnDestroyExternalSemaphoreExp = + urBindlessImagesDestroyExternalSemaphoreExp; + pDdiTable->pfnWaitExternalSemaphoreExp = + urBindlessImagesWaitExternalSemaphoreExp; + pDdiTable->pfnSignalExternalSemaphoreExp = + urBindlessImagesSignalExternalSemaphoreExp; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable( + ur_api_version_t version, ur_usm_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + pDdiTable->pfnPitchedAllocExp = urUSMPitchedAllocExp; + return UR_RESULT_SUCCESS; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.hpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.hpp index 1c503134a0e86..28db7d9c79ade 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.hpp @@ -24,6 +24,7 @@ #include "context.hpp" #include "device.hpp" #include "event.hpp" +#include "image.hpp" #include "kernel.hpp" #include "memory.hpp" #include "platform.hpp" diff --git a/sycl/test/abi/pi_level_zero_symbol_check.dump b/sycl/test/abi/pi_level_zero_symbol_check.dump index bbb0003a1f012..dc008b9f54330 100644 --- a/sycl/test/abi/pi_level_zero_symbol_check.dump +++ b/sycl/test/abi/pi_level_zero_symbol_check.dump @@ -81,6 +81,7 @@ piSamplerGetInfo piSamplerRelease piSamplerRetain piTearDown +piextBindlessImageSamplerCreate piextCommandBufferCreate piextCommandBufferFinalize piextCommandBufferMemBufferCopy @@ -96,6 +97,7 @@ piextCommandBufferRetain piextContextCreateWithNativeHandle piextContextGetNativeHandle piextContextSetExtendedDeleter +piextDestroyExternalSemaphore piextDeviceCreateWithNativeHandle piextDeviceGetNativeHandle piextDeviceSelectBinary @@ -107,6 +109,7 @@ piextEnqueueWriteHostPipe piextEventCreateWithNativeHandle piextEventGetNativeHandle piextGetDeviceFunctionPointer +piextImportExternalSemaphoreOpaqueFD piextKernelCreateWithNativeHandle piextKernelGetNativeHandle piextKernelSetArgMemObj @@ -114,7 +117,20 @@ piextKernelSetArgPointer piextKernelSetArgSampler piextMemCreateWithNativeHandle piextMemGetNativeHandle +piextMemImageAllocate +piextMemImageCopy piextMemImageCreateWithNativeHandle +piextMemImageFree +piextMemImageGetInfo +piextMemImportOpaqueFD +piextMemMapExternalArray +piextMemMipmapFree +piextMemMipmapGetLevel +piextMemReleaseInterop +piextMemSampledImageCreate +piextMemSampledImageHandleDestroy +piextMemUnsampledImageCreate +piextMemUnsampledImageHandleDestroy piextPeerAccessGetInfo piextPlatformCreateWithNativeHandle piextPlatformGetNativeHandle @@ -125,6 +141,7 @@ piextProgramSetSpecializationConstant piextQueueCreate piextQueueCreateWithNativeHandle piextQueueGetNativeHandle +piextSignalExternalSemaphore piextUSMDeviceAlloc piextUSMEnqueueFill2D piextUSMEnqueueMemAdvise @@ -136,4 +153,6 @@ piextUSMEnqueuePrefetch piextUSMFree piextUSMGetMemAllocInfo piextUSMHostAlloc +piextUSMPitchedAlloc piextUSMSharedAlloc +piextWaitExternalSemaphore diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp index b6f8842d2188b..a8e01a2996d2a 100644 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ b/sycl/unittests/helpers/PiMockPlugin.hpp @@ -432,6 +432,145 @@ inline pi_result mock_piMemImageCreate(pi_context context, pi_mem_flags flags, return PI_SUCCESS; } +inline pi_result +mock_piextMemUnsampledImageHandleDestroy(pi_context context, pi_device device, + pi_image_handle handle) { + return PI_SUCCESS; +} + +inline pi_result +mock_piextMemSampledImageHandleDestroy(pi_context context, pi_device device, + pi_image_handle handle) { + return PI_SUCCESS; +} + +inline pi_result mock_piextMemImageAllocate(pi_context context, + pi_device device, + pi_image_format *image_format, + pi_image_desc *image_desc, + pi_image_mem_handle *ret_mem) { + return PI_SUCCESS; +} + +inline pi_result mock_piextMemMipmapGetLevel(pi_context context, + pi_device device, + pi_image_mem_handle mip_mem, + unsigned int level, + pi_image_mem_handle *ret_mem) { + return PI_SUCCESS; +} + +inline pi_result mock_piextMemImageFree(pi_context context, pi_device device, + pi_image_mem_handle memory_handle) { + return PI_SUCCESS; +} + +inline pi_result mock_piextMemMipmapFree(pi_context context, pi_device device, + pi_image_mem_handle memory_handle) { + return PI_SUCCESS; +} + +inline pi_result mock_piextMemUnsampledImageCreate( + pi_context context, pi_device device, pi_image_mem_handle img_mem, + pi_image_format *image_format, pi_image_desc *desc, pi_mem *ret_mem, + pi_image_handle *ret_handle) { + return PI_SUCCESS; +} + +inline pi_result +mock_piextMemImportOpaqueFD(pi_context context, pi_device device, size_t size, + int file_descriptor, + pi_interop_mem_handle *ret_handle) { + return PI_SUCCESS; +} + +inline pi_result mock_piextMemMapExternalArray(pi_context context, + pi_device device, + pi_image_format *image_format, + pi_image_desc *image_desc, + pi_interop_mem_handle mem_handle, + pi_image_mem_handle *ret_mem) { + return PI_SUCCESS; +} + +inline pi_result mock_piextMemReleaseInterop(pi_context context, + pi_device device, + pi_interop_mem_handle ext_mem) { + return PI_SUCCESS; +} + +inline pi_result mock_piextImportExternalSemaphoreOpaqueFD( + pi_context context, pi_device device, int file_descriptor, + pi_interop_semaphore_handle *ret_handle) { + return PI_SUCCESS; +} + +inline pi_result +mock_piextDestroyExternalSemaphore(pi_context context, pi_device device, + pi_interop_semaphore_handle sem_handle) { + return PI_SUCCESS; +} + +inline pi_result mock_piextWaitExternalSemaphore( + pi_queue command_queue, pi_interop_semaphore_handle sem_handle, + pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, + pi_event *event) { + return PI_SUCCESS; +} + +inline pi_result mock_piextSignalExternalSemaphore( + pi_queue command_queue, pi_interop_semaphore_handle sem_handle, + pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, + pi_event *event) { + return PI_SUCCESS; +} + +inline pi_result mock_piextMemUnsampledImageCreateInterop( + pi_context context, pi_device device, pi_image_format *image_format, + pi_image_desc *desc, pi_interop_mem_handle ext_mem_handle, + pi_image_handle *ret_img_handle) { + return PI_SUCCESS; +} + +inline pi_result mock_piextMemSampledImageCreateInterop( + pi_context context, pi_device device, pi_image_format *image_format, + pi_image_desc *desc, pi_sampler sampler, + pi_interop_mem_handle ext_mem_handle, pi_image_handle *ret_img_handle) { + return PI_SUCCESS; +} + +inline pi_result mock_piextMemSampledImageCreate( + pi_context context, pi_device device, pi_image_mem_handle img_mem, + pi_image_format *image_format, pi_image_desc *desc, pi_sampler sampler, + pi_mem *ret_mem, pi_image_handle *ret_handle) { + return PI_SUCCESS; +} + +inline pi_result mock_piextBindlessImageSamplerCreate( + pi_context context, const pi_sampler_properties *sampler_properties, + const float minMipmapLevelClamp, const float maxMipmapLevelClamp, + const float maxAnisotropy, pi_sampler *result_sampler) { + *result_sampler = createDummyHandle(); + return PI_SUCCESS; +} + +inline pi_result mock_piextMemImageCopy( + pi_queue command_queue, void *dst_ptr, void *src_ptr, + const pi_image_format *image_format, const pi_image_desc *image_desc, + const pi_image_copy_flags flags, pi_image_offset src_offset, + pi_image_offset dst_offset, pi_image_region copy_extent, + pi_image_region host_extent, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event) { + return PI_SUCCESS; +} + +inline pi_result mock_piextMemImageGetInfo(const pi_image_mem_handle mem_handle, + pi_image_info param_name, + void *param_value, + size_t *param_value_size_ret) { + return PI_SUCCESS; +} + inline pi_result mock_piMemGetInfo(pi_mem mem, pi_mem_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { @@ -1022,6 +1161,16 @@ inline pi_result mock_piextUSMSharedAlloc(void **result_ptr, pi_context context, return PI_SUCCESS; } +inline pi_result mock_piextUSMPitchedAlloc(void **result_ptr, + size_t *result_pitch, + pi_context context, pi_device device, + pi_usm_mem_properties *properties, + size_t width_in_bytes, size_t height, + unsigned int element_size_bytes) { + *result_ptr = createDummyHandle(width_in_bytes * height); + return PI_SUCCESS; +} + inline pi_result mock_piextUSMFree(pi_context context, void *ptr) { return PI_SUCCESS; }