diff --git a/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled.cpp b/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled.cpp index 024d265e860af..b57afab1518c1 100644 --- a/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled.cpp +++ b/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled.cpp @@ -7,8 +7,176 @@ #pragma clang diagnostic ignored "-Waddress-of-temporary" #include "read_write_unsampled.h" +#include "../helpers/common.hpp" + +static DXGI_FORMAT toDXGIFormat(int NChannels, + sycl::image_channel_type channelType) { + switch (channelType) { + case sycl::image_channel_type::snorm_int8: + switch (NChannels) { + case 1: + return DXGI_FORMAT_R8_SNORM; + case 2: + return DXGI_FORMAT_R8G8_SNORM; + case 4: + return DXGI_FORMAT_R8G8B8A8_SNORM; + default: + break; + } + case sycl::image_channel_type::snorm_int16: + switch (NChannels) { + case 1: + return DXGI_FORMAT_R16_SNORM; + case 2: + return DXGI_FORMAT_R16G16_SNORM; + case 4: + return DXGI_FORMAT_R16G16B16A16_SNORM; + default: + break; + } + case sycl::image_channel_type::unorm_int8: + switch (NChannels) { + case 1: + return DXGI_FORMAT_R8_UNORM; + case 2: + return DXGI_FORMAT_R8G8_UNORM; + case 4: + return DXGI_FORMAT_R8G8B8A8_UNORM; + default: + break; + } + case sycl::image_channel_type::unorm_int16: + switch (NChannels) { + case 1: + return DXGI_FORMAT_R16_UNORM; + case 2: + return DXGI_FORMAT_R16G16_UNORM; + case 4: + return DXGI_FORMAT_R16G16B16A16_UNORM; + default: + break; + } + case sycl::image_channel_type::unorm_short_565: + return DXGI_FORMAT_B5G6R5_UNORM; + case sycl::image_channel_type::unorm_short_555: + return DXGI_FORMAT_B5G5R5A1_UNORM; + case sycl::image_channel_type::unorm_int_101010: + return DXGI_FORMAT_R10G10B10A2_UNORM; + case sycl::image_channel_type::signed_int8: + switch (NChannels) { + case 1: + return DXGI_FORMAT_R8_SINT; + case 2: + return DXGI_FORMAT_R8G8_SINT; + case 4: + return DXGI_FORMAT_R8G8B8A8_SINT; + default: + break; + } + case sycl::image_channel_type::signed_int16: + switch (NChannels) { + case 1: + return DXGI_FORMAT_R16_SINT; + case 2: + return DXGI_FORMAT_R16G16_SINT; + case 4: + return DXGI_FORMAT_R16G16B16A16_SINT; + default: + break; + } + case sycl::image_channel_type::signed_int32: + switch (NChannels) { + case 1: + return DXGI_FORMAT_R32_SINT; + case 2: + return DXGI_FORMAT_R32G32_SINT; + case 4: + return DXGI_FORMAT_R32G32B32A32_SINT; + default: + break; + } + case sycl::image_channel_type::unsigned_int8: + switch (NChannels) { + case 1: + return DXGI_FORMAT_R8_UINT; + case 2: + return DXGI_FORMAT_R8G8_UINT; + case 4: + return DXGI_FORMAT_R8G8B8A8_UINT; + default: + break; + } + case sycl::image_channel_type::unsigned_int16: + switch (NChannels) { + case 1: + return DXGI_FORMAT_R16_UINT; + case 2: + return DXGI_FORMAT_R16G16_UINT; + case 4: + return DXGI_FORMAT_R16G16B16A16_UINT; + default: + break; + } + case sycl::image_channel_type::unsigned_int32: + switch (NChannels) { + case 1: + return DXGI_FORMAT_R32_UINT; + case 2: + return DXGI_FORMAT_R32G32_UINT; + case 4: + return DXGI_FORMAT_R32G32B32A32_UINT; + default: + break; + } + case sycl::image_channel_type::fp16: + switch (NChannels) { + case 1: + return DXGI_FORMAT_R16_FLOAT; + case 2: + return DXGI_FORMAT_R16G16_FLOAT; + case 4: + return DXGI_FORMAT_R16G16B16A16_FLOAT; + default: + break; + } + case sycl::image_channel_type::fp32: + switch (NChannels) { + case 1: + return DXGI_FORMAT_R32_FLOAT; + case 2: + return DXGI_FORMAT_R32G32_FLOAT; + case 4: + return DXGI_FORMAT_R32G32B32A32_FLOAT; + default: + break; + } + default: + break; + } + std::cerr << "Unsupported image_channel_type in toDXGIFormat\n"; + exit(-1); +} + +template +DX12InteropTest::DX12InteropTest( + sycl::image_channel_type channelType, sycl::range globalSize, + sycl::range localSize) + : m_channelType(channelType), m_globalSize(globalSize), + m_localSize(localSize) { + m_width = m_globalSize[0]; + m_height = 1; + m_depth = 1; + if constexpr (NDims > 1) { + m_height = m_globalSize[1]; + if constexpr (NDims > 2) + m_depth = m_globalSize[2]; + } + m_numElems = m_width * m_height * m_depth * NChannels; + m_syclQueue = sycl::queue{m_syclDevice, {sycl::property::queue::in_order{}}}; +} -void DX12InteropTest::initDX12Device() { +template +void DX12InteropTest::initDX12Device() { // Create DXGI factory. ThrowIfFailed(CreateDXGIFactory2(0 /* dxgiFactoryFlags */, IID_PPV_ARGS(&m_dx12Factory))); @@ -22,7 +190,8 @@ void DX12InteropTest::initDX12Device() { IID_PPV_ARGS(&m_dx12Device))); } -void DX12InteropTest::initDX12CommandList() { +template +void DX12InteropTest::initDX12CommandList() { // Describe and create the command queue. D3D12_COMMAND_QUEUE_DESC queueDesc = {D3D12_COMMAND_LIST_TYPE_DIRECT, 0, D3D12_COMMAND_QUEUE_FLAG_NONE, 0}; @@ -39,7 +208,8 @@ void DX12InteropTest::initDX12CommandList() { IID_PPV_ARGS(&m_dx12CommandList))); } -void DX12InteropTest::initDX12Resources() { +template +void DX12InteropTest::initDX12Resources() { // Define default heap properties. D3D12_HEAP_PROPERTIES defaultHeapProperties = {}; @@ -49,15 +219,20 @@ void DX12InteropTest::initDX12Resources() { defaultHeapProperties.CreationNodeMask = 1; defaultHeapProperties.VisibleNodeMask = 1; - // Define texture resource descriptor (1D, 32-bit integer). + // Define texture resource descriptor. D3D12_RESOURCE_DESC textureResourceDesc = {}; - textureResourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE1D; + if constexpr (NDims == 1) + textureResourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE1D; + else if constexpr (NDims == 2) + textureResourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + else + textureResourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE3D; textureResourceDesc.Alignment = 0; textureResourceDesc.Width = m_width; - textureResourceDesc.Height = 1; - textureResourceDesc.DepthOrArraySize = 1; + textureResourceDesc.Height = m_height; + textureResourceDesc.DepthOrArraySize = m_depth; textureResourceDesc.MipLevels = 0; - textureResourceDesc.Format = DXGI_FORMAT_R32_UINT; + textureResourceDesc.Format = toDXGIFormat(NChannels, m_channelType); textureResourceDesc.SampleDesc = DXGI_SAMPLE_DESC{1, 0}; textureResourceDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; textureResourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; @@ -83,6 +258,7 @@ void DX12InteropTest::initDX12Resources() { // Create the DX12 fence and map to a SYCL semaphore. ThrowIfFailed(m_dx12Device->CreateFence( m_sharedFenceValue, D3D12_FENCE_FLAG_SHARED, IID_PPV_ARGS(&m_dx12Fence))); + m_sharedFenceValue++; #ifdef TEST_SEMAPHORE_IMPORT ThrowIfFailed(m_dx12Device->CreateSharedHandle(m_dx12Fence.Get(), nullptr, @@ -102,7 +278,9 @@ void DX12InteropTest::initDX12Resources() { populateDX12Texture(); } -void DX12InteropTest::importDX12SharedMemoryHandle(size_t allocationSize) { +template +void DX12InteropTest::importDX12SharedMemoryHandle( + size_t allocationSize) { syclexp::external_mem_descriptor extMemDesc{ m_sharedMemoryHandle, syclexp::external_mem_handle_type::win32_nt_dx12_resource, @@ -111,14 +289,18 @@ void DX12InteropTest::importDX12SharedMemoryHandle(size_t allocationSize) { m_syclExternalMemHandle = syclexp::import_external_memory(extMemDesc, m_syclQueue); + syclexp::image_descriptor syclImageDesc{m_globalSize, NChannels, + m_channelType}; m_syclImageMemHandle = syclexp::map_external_image_memory( - m_syclExternalMemHandle, m_syclImageDesc, m_syclQueue); + m_syclExternalMemHandle, syclImageDesc, m_syclQueue); m_syclImageHandle = - syclexp::create_image(m_syclImageMemHandle, m_syclImageDesc, m_syclQueue); + syclexp::create_image(m_syclImageMemHandle, syclImageDesc, m_syclQueue); } -void DX12InteropTest::importDX12SharedSemaphoreHandle() { +template +void DX12InteropTest::importDX12SharedSemaphoreHandle() { syclexp::external_semaphore_descriptor extSemDesc{m_sharedSemaphoreHandle, syclexp::external_semaphore_handle_type::win32_nt_dx12_fence}; @@ -127,7 +309,8 @@ void DX12InteropTest::importDX12SharedSemaphoreHandle() { syclexp::import_external_semaphore(extSemDesc, m_syclQueue); } -void DX12InteropTest::callSYCLKernel() { +template +void DX12InteropTest::callSYCLKernel() { #ifdef TEST_SEMAPHORE_IMPORT // Wait for imported semaphore. This semaphore was signalled at the // end of `populateDX12Texture`. @@ -139,19 +322,40 @@ void DX12InteropTest::callSYCLKernel() { // If we do the kernel will crash. auto imgHandle = m_syclImageHandle; + using VecType = sycl::vec; + // Submit our SYCL kernel. All we do is double the value of each pixel in the // texture. try { m_syclQueue.submit([&](sycl::handler &cgh) { - cgh.parallel_for( - sycl::nd_range<1>{{m_width}, {1}}, [=](sycl::nd_item<1> it) { - size_t dim0 = it.get_global_id(0); - - uint32_t px = syclexp::fetch_image(imgHandle, int(dim0)); - - px *= 2; - - syclexp::write_image(imgHandle, int(dim0), px); + cgh.parallel_for( + sycl::nd_range{m_globalSize, m_localSize}, + [=](sycl::nd_item it) { + if constexpr (NDims == 3) { + size_t dim0 = it.get_global_id(0); + size_t dim1 = it.get_global_id(1); + size_t dim2 = it.get_global_id(2); + auto px = syclexp::fetch_image< + std::conditional_t>( + imgHandle, sycl::int3(dim0, dim1, dim2)); + px *= static_cast(2); + syclexp::write_image(imgHandle, sycl::int3(dim0, dim1, dim2), px); + } else if constexpr (NDims == 2) { + size_t dim0 = it.get_global_id(0); + size_t dim1 = it.get_global_id(1); + auto px = syclexp::fetch_image< + std::conditional_t>( + imgHandle, sycl::int2(dim0, dim1)); + px *= static_cast(2); + syclexp::write_image(imgHandle, sycl::int2(dim0, dim1), px); + } else { + size_t dim0 = it.get_global_id(0); + auto px = syclexp::fetch_image< + std::conditional_t>( + imgHandle, int(dim0)); + px *= static_cast(2); + syclexp::write_image(imgHandle, int(dim0), px); + } }); }); } catch (sycl::exception e) { @@ -179,12 +383,13 @@ void DX12InteropTest::callSYCLKernel() { #endif } -void DX12InteropTest::populateDX12Texture() { +template +void DX12InteropTest::populateDX12Texture() { // Set our texture data to upload. - std::vector uploadData(m_width); - for (int i = 0; i < m_width; ++i) { - uploadData[i] = i; + std::vector uploadData(m_numElems); + for (int i = 0; i < m_numElems; ++i) { + uploadData[i] = static_cast(i); } // Get required staging buffer size. @@ -223,12 +428,12 @@ void DX12InteropTest::populateDX12Texture() { // Map the upload staging buffer to host visible memory. D3D12_RANGE stagingBufferRange{0, stagingBufferSize}; - uint32_t *pStagingBufferData{}; + DType *pStagingBufferData{}; ThrowIfFailed(stagingBuffer->Map( 0, &stagingBufferRange, reinterpret_cast(&pStagingBufferData))); // Populate the staging buffer with our upload data. - for (int i = 0; i < m_width; ++i) { + for (int i = 0; i < m_numElems; ++i) { pStagingBufferData[i] = uploadData[i]; } @@ -242,10 +447,10 @@ void DX12InteropTest::populateDX12Texture() { // Set the copy source and destination footprint/locations. D3D12_PLACED_SUBRESOURCE_FOOTPRINT bufferFootprint = {}; bufferFootprint.Footprint.Width = m_width; - bufferFootprint.Footprint.Height = 1; - bufferFootprint.Footprint.Depth = 1; - bufferFootprint.Footprint.RowPitch = static_cast(stagingBufferSize); - bufferFootprint.Footprint.Format = DXGI_FORMAT_R32_UINT; + bufferFootprint.Footprint.Height = m_height; + bufferFootprint.Footprint.Depth = m_depth; + bufferFootprint.Footprint.RowPitch = m_width * sizeof(DType) * NChannels; + bufferFootprint.Footprint.Format = toDXGIFormat(NChannels, m_channelType); D3D12_TEXTURE_COPY_LOCATION copyDest = {}; copyDest.pResource = m_dx12Texture.Get(); @@ -281,11 +486,17 @@ void DX12InteropTest::populateDX12Texture() { ThrowIfFailed( m_dx12CommandQueue->Signal(m_dx12Fence.Get(), m_sharedFenceValue)); +#ifdef TEST_SEMAPHORE_IMPORT // Don't wait for the fence here. We will use the SYCL API to wait for this // fence in `callSYCLKernel`. +#else + waitDX12Fence(); + m_sharedFenceValue++; +#endif } -bool DX12InteropTest::validateOutput() { +template +bool DX12InteropTest::validateOutput() { // Reset the command list. ThrowIfFailed( @@ -328,11 +539,10 @@ bool DX12InteropTest::validateOutput() { // Set the copy source and destination footprint/locations. D3D12_PLACED_SUBRESOURCE_FOOTPRINT bufferFootprint = {}; bufferFootprint.Footprint.Width = m_width; - bufferFootprint.Footprint.Height = 1; - bufferFootprint.Footprint.Depth = 1; - bufferFootprint.Footprint.RowPitch = - static_cast(readbackBufferSize); - bufferFootprint.Footprint.Format = DXGI_FORMAT_R32_UINT; + bufferFootprint.Footprint.Height = m_height; + bufferFootprint.Footprint.Depth = m_depth; + bufferFootprint.Footprint.RowPitch = m_width * sizeof(DType) * NChannels; + bufferFootprint.Footprint.Format = toDXGIFormat(NChannels, m_channelType); D3D12_TEXTURE_COPY_LOCATION copyDest = {}; copyDest.pResource = readbackBuffer.Get(); @@ -361,8 +571,8 @@ bool DX12InteropTest::validateOutput() { m_sharedFenceValue++; // Map the readback buffer to host visible memory. - D3D12_RANGE readbackBufferRange{0, m_width}; - uint32_t *pReadbackBufferData{}; + D3D12_RANGE readbackBufferRange{0, m_numElems}; + DType *pReadbackBufferData{}; ThrowIfFailed( readbackBuffer->Map(0, &readbackBufferRange, reinterpret_cast(&pReadbackBufferData))); @@ -376,9 +586,9 @@ bool DX12InteropTest::validateOutput() { // Read back the updated texture data and validate it. bool validated = true; - for (int i = 0; i < m_width; ++i) { + for (int i = 0; i < m_numElems; ++i) { bool mismatch = false; - auto expected = i * 2; + auto expected = static_cast(i * 2); auto actual = pReadbackBufferData[i]; if (actual != expected) { @@ -407,7 +617,9 @@ bool DX12InteropTest::validateOutput() { return validated; } -void DX12InteropTest::waitDX12Fence(DWORD timeoutMilliseconds) { +template +void DX12InteropTest::waitDX12Fence( + DWORD timeoutMilliseconds) { // Check the current value of the fence to check if // GPU has finished executing the command list. if (m_dx12Fence->GetCompletedValue() < m_sharedFenceValue) { @@ -419,7 +631,8 @@ void DX12InteropTest::waitDX12Fence(DWORD timeoutMilliseconds) { } } -void DX12InteropTest::cleanupDX12() { +template +void DX12InteropTest::cleanupDX12() { // Wait for the command list to finish execution. waitDX12Fence(); @@ -432,8 +645,9 @@ void DX12InteropTest::cleanupDX12() { // ComPtr handles will be destroyed automatically. } -void DX12InteropTest::getDX12Adapter(IDXGIFactory2 *pFactory, - IDXGIAdapter1 **ppAdapter) { +template +void DX12InteropTest::getDX12Adapter( + IDXGIFactory2 *pFactory, IDXGIAdapter1 **ppAdapter) { ComPtr adapter; *ppAdapter = nullptr; @@ -464,23 +678,85 @@ void DX12InteropTest::getDX12Adapter(IDXGIFactory2 *pFactory, *ppAdapter = adapter.Detach(); } -int main() { - - bool validated = false; - - DX12InteropTest interopTestInstance(1024); +template +static bool runTest(sycl::image_channel_type channelType, + sycl::range globalSize, + sycl::range localSize) { + DX12InteropTest interopTestInstance( + channelType, globalSize, localSize); interopTestInstance.initDX12Device(); interopTestInstance.initDX12CommandList(); interopTestInstance.initDX12Resources(); interopTestInstance.callSYCLKernel(); - validated = interopTestInstance.validateOutput(); + bool validated = interopTestInstance.validateOutput(); interopTestInstance.cleanupDX12(); +#ifdef VERBOSE_PRINT + if (!validated) { + std::cerr << "\tTest failed: NDims " << NDims << " NChannels " << NChannels + << " image_channel_type " + << bindless_helpers::channelTypeToString(channelType) + << ", exiting\n"; + exit(-1); + } else { + std::cout << "\tTest passed: NDims " << NDims << " NChannels " << NChannels + << " image_channel_type " + << bindless_helpers::channelTypeToString(channelType) << "\n"; + } +#endif + + return validated; +} + +int main() { + + bool validated = true; + + sycl::range<1> globalSize1{1024}; + sycl::range<1> localSize1{1024}; + validated &= runTest<1, uint32_t, 1>(sycl::image_channel_type::unsigned_int32, + globalSize1, localSize1); + validated &= runTest<1, uint8_t, 4>(sycl::image_channel_type::unorm_int8, + globalSize1, localSize1); + validated &= runTest<1, float, 1>(sycl::image_channel_type::fp32, globalSize1, + localSize1); + validated &= runTest<1, sycl::half, 2>(sycl::image_channel_type::fp16, + globalSize1, localSize1); + validated &= runTest<1, sycl::half, 4>(sycl::image_channel_type::fp16, + globalSize1, localSize1); + + sycl::range<2> globalSize2{64, 64}; + sycl::range<2> localSize2{16, 16}; + validated &= runTest<2, uint32_t, 1>(sycl::image_channel_type::unsigned_int32, + globalSize2, localSize2); + validated &= runTest<2, uint8_t, 4>(sycl::image_channel_type::unorm_int8, + globalSize2, localSize2); + validated &= runTest<2, float, 1>(sycl::image_channel_type::fp32, globalSize2, + localSize2); + validated &= runTest<2, sycl::half, 2>(sycl::image_channel_type::fp16, + globalSize2, localSize2); + validated &= runTest<2, sycl::half, 4>(sycl::image_channel_type::fp16, + globalSize2, localSize2); + + sycl::range<3> globalSize3{64, 16, 4}; + sycl::range<3> localSize3{16, 16, 1}; + validated &= runTest<3, uint32_t, 1>(sycl::image_channel_type::unsigned_int32, + globalSize3, localSize3); + validated &= runTest<3, uint8_t, 4>(sycl::image_channel_type::unorm_int8, + globalSize3, localSize3); + validated &= runTest<3, float, 1>(sycl::image_channel_type::fp32, globalSize3, + localSize3); + validated &= runTest<3, sycl::half, 2>(sycl::image_channel_type::fp16, + globalSize3, localSize3); + validated &= runTest<3, sycl::half, 4>(sycl::image_channel_type::fp16, + globalSize3, localSize3); + if (validated) { std::cout << "Test passed!" << std::endl; return 0; } std::cerr << "Test failed!" << std::endl; + return 1; } diff --git a/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled.h b/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled.h index 7985072780a2c..1dd3c16ce75f1 100644 --- a/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled.h +++ b/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled.h @@ -37,15 +37,11 @@ inline void ThrowIfFailed(HRESULT result) { } } -class DX12InteropTest { +template class DX12InteropTest { public: - DX12InteropTest(uint32_t width) - : m_width(width), m_sharedFenceValue(1), - m_syclImageDesc({m_width}, 1, - sycl::image_channel_type::unsigned_int32) { - m_syclQueue = - sycl::queue{m_syclDevice, {sycl::property::queue::in_order{}}}; - } + DX12InteropTest(sycl::image_channel_type channelType, + sycl::range globalSize, sycl::range localSize); + ~DX12InteropTest() {} void initDX12Device(); @@ -66,6 +62,14 @@ class DX12InteropTest { // Dimensions of image uint32_t m_width; + uint32_t m_height; + uint32_t m_depth; + uint32_t m_numElems; + + sycl::image_channel_type m_channelType; + + sycl::range m_globalSize; + sycl::range m_localSize; // DX12 Objects ComPtr m_dx12Factory; @@ -79,14 +83,13 @@ class DX12InteropTest { HANDLE m_dx12FenceEvent; // Shared handles and values - uint64_t m_sharedFenceValue; - HANDLE m_sharedMemoryHandle; + uint64_t m_sharedFenceValue = 0; + HANDLE m_sharedMemoryHandle = INVALID_HANDLE_VALUE; HANDLE m_sharedSemaphoreHandle = INVALID_HANDLE_VALUE; // SYCL Objects sycl::queue m_syclQueue; sycl::device m_syclDevice; - syclexp::image_descriptor m_syclImageDesc; syclexp::external_mem m_syclExternalMemHandle; syclexp::external_semaphore m_syclExternalSemaphoreHandle; syclexp::image_mem_handle m_syclImageMemHandle; diff --git a/sycl/test-e2e/bindless_images/helpers/common.hpp b/sycl/test-e2e/bindless_images/helpers/common.hpp index b1c122867345a..e5ab1e02a8c65 100644 --- a/sycl/test-e2e/bindless_images/helpers/common.hpp +++ b/sycl/test-e2e/bindless_images/helpers/common.hpp @@ -1,6 +1,7 @@ #pragma once #include #include +#include template std::ostream &operator<<(std::ostream &os, @@ -37,6 +38,44 @@ static void printTestName(std::string name, sycl::range globalSize, #endif } +const char *channelTypeToString(sycl::image_channel_type type) { + switch (type) { + case sycl::image_channel_type::snorm_int8: + return "sycl::image_channel_type::snorm_int8"; + case sycl::image_channel_type::snorm_int16: + return "sycl::image_channel_type::snorm_int16"; + case sycl::image_channel_type::unorm_int8: + return "sycl::image_channel_type::unorm_int8"; + case sycl::image_channel_type::unorm_int16: + return "sycl::image_channel_type::unorm_int16"; + case sycl::image_channel_type::unorm_short_565: + return "sycl::image_channel_type::unorm_short_565"; + case sycl::image_channel_type::unorm_short_555: + return "sycl::image_channel_type::unorm_short_555"; + case sycl::image_channel_type::unorm_int_101010: + return "sycl::image_channel_type::unorm_int_101010"; + case sycl::image_channel_type::signed_int8: + return "sycl::image_channel_type::signed_int8"; + case sycl::image_channel_type::signed_int16: + return "sycl::image_channel_type::signed_int16"; + case sycl::image_channel_type::signed_int32: + return "sycl::image_channel_type::signed_int32"; + case sycl::image_channel_type::unsigned_int8: + return "sycl::image_channel_type::unsigned_int8"; + case sycl::image_channel_type::unsigned_int16: + return "sycl::image_channel_type::unsigned_int16"; + case sycl::image_channel_type::unsigned_int32: + return "sycl::image_channel_type::unsigned_int32"; + case sycl::image_channel_type::fp16: + return "sycl::image_channel_type::fp16"; + case sycl::image_channel_type::fp32: + return "sycl::image_channel_type::fp32"; + default: + std::cerr << "Unsupported image_channel_type in channelTypeToString\n"; + exit(-1); + } +} + template constexpr sycl::vec init_vector(DType val) { if constexpr (NChannel == 1) { diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp index 56514b1ecf30e..42ec0888ce091 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp @@ -1,7 +1,7 @@ // REQUIRES: cuda || (windows && level_zero && gpu-intel-dg2) // REQUIRES: vulkan -// RUN: %{build} %link-vulkan -o %t.out +// RUN: %{build} %link-vulkan -o %t.out %if any-device-is-level_zero %{ -Wno-ignored-attributes -DTEST_L0_SUPPORTED_VK_FORMAT %} // RUN: %{run} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out // Uncomment to print additional test information @@ -20,6 +20,14 @@ struct handles_t { syclexp::external_mem inputExternalMem; }; +template struct OutputType { + using type = DType; +}; + +template <> struct OutputType { + using type = float; +}; + template handles_t create_test_handles(sycl::context &ctxt, sycl::device &dev, const syclexp::bindless_image_sampler &samp, @@ -73,18 +81,23 @@ bool run_sycl(InteropHandleT inputInteropMemHandle, const size_t img_size = numElems * sizeof(DType) * NChannels; auto width = globalSize[0]; - auto height = globalSize[1]; + auto height = 1UL; auto depth = 1UL; sycl::range outBufferRange; if constexpr (NDims == 3) { + height = globalSize[1]; depth = globalSize[2]; outBufferRange = sycl::range{depth, height, width}; - } else { + } else if constexpr (NDims == 2) { + height = globalSize[1]; outBufferRange = sycl::range{height, width}; + } else { + outBufferRange = sycl::range{width}; } - using VecType = sycl::vec; + using OutType = typename OutputType::type; + using VecType = sycl::vec; auto handles = create_test_handles(ctxt, dev, samp, inputInteropMemHandle, desc, img_size); @@ -111,12 +124,12 @@ bool run_sycl(InteropHandleT inputInteropMemHandle, // Extension: sample image data from handle (Vulkan imported) VecType pixel; pixel = syclexp::sample_image< - std::conditional_t>( + std::conditional_t>( handles.imgInput, sycl::float3(fdim0, fdim1, fdim2)); - pixel *= static_cast(10.1f); + pixel /= static_cast(2.f); outAcc[sycl::id{dim2, dim1, dim0}] = pixel; - } else { + } else if constexpr (NDims == 2) { size_t dim0 = it.get_global_id(0); size_t dim1 = it.get_global_id(1); @@ -126,11 +139,24 @@ bool run_sycl(InteropHandleT inputInteropMemHandle, // Extension: sample image data from handle (Vulkan imported) VecType pixel = syclexp::sample_image< - std::conditional_t>( + std::conditional_t>( handles.imgInput, sycl::float2(fdim0, fdim1)); - pixel *= static_cast(10.1f); + pixel /= static_cast(2.f); outAcc[sycl::id{dim1, dim0}] = pixel; + } else { + size_t dim0 = it.get_global_id(0); + + // Normalize coordinates -- +0.5 to look towards centre of pixel + float fdim0 = float(dim0 + 0.5f) / (float)width; + + // Extension: sample image data from handle (Vulkan imported) + VecType pixel = syclexp::sample_image< + std::conditional_t>( + handles.imgInput, fdim0); + + pixel /= static_cast(2.f); + outAcc[dim0] = pixel; } }); }); @@ -152,9 +178,10 @@ bool run_sycl(InteropHandleT inputInteropMemHandle, bool validated = true; for (int i = 0; i < globalSize.size(); i++) { bool mismatch = false; - VecType expected = bindless_helpers::init_vector(i) * - static_cast(10.1f); - if (!bindless_helpers::equal_vec(out[i], expected)) { + VecType expected = + bindless_helpers::init_vector(static_cast( + CType == sycl::image_channel_type::unorm_int8 ? 0.5f : (i / 2.f))); + if (!bindless_helpers::equal_vec(out[i], expected)) { mismatch = true; validated = false; } @@ -169,7 +196,11 @@ bool run_sycl(InteropHandleT inputInteropMemHandle, } } if (validated) { - printString("Results are correct!\n"); +#ifdef VERBOSE_PRINT + std::cout << "\tTest passed: NDims " << NDims << " NChannels " << NChannels + << " image_channel_type " + << bindless_helpers::channelTypeToString(CType) << "\n"; +#endif } return validated; @@ -238,7 +269,9 @@ bool run_test(sycl::range dims, sycl::range localSize, imageSizeBytes, 0 /*flags*/, (void **)&inputStagingData)); for (int i = 0; i < numElems; ++i) { - inputStagingData[i] = bindless_helpers::init_vector(i); + inputStagingData[i] = + bindless_helpers::init_vector(static_cast( + CType == sycl::image_channel_type::unorm_int8 ? 255 : i)); } vkUnmapMemory(vk_device, inputStagingMemory); @@ -321,17 +354,58 @@ bool run_test(sycl::range dims, sycl::range localSize, } bool run_tests() { - bool valid = run_test<2, float, 4, sycl::image_channel_type::fp32, - sycl::image_channel_order::rgba, class float_2d>( - {16, 16}, {2, 2}, 0); + bool valid = true; +#ifdef TEST_L0_SUPPORTED_VK_FORMAT + valid &= + run_test<1, float, 1, sycl::image_channel_type::fp32, + sycl::image_channel_order::r, class fp32_1d_c1>({1024}, {4}, 0); + valid &= + run_test<1, sycl::half, 2, sycl::image_channel_type::fp16, + sycl::image_channel_order::rg, class fp16_1d_c2>({1024}, {4}, 0); + valid &= run_test<1, sycl::half, 4, sycl::image_channel_type::fp16, + sycl::image_channel_order::rgba, class fp16_1d_c4>({1024}, + {4}, 0); + valid &= run_test<1, uint8_t, 4, sycl::image_channel_type::unorm_int8, + sycl::image_channel_order::rgba, class unorm_int8_1d_c4>( + {1024}, {4}, 0); + + valid &= run_test<2, float, 1, sycl::image_channel_type::fp32, + sycl::image_channel_order::r, class fp32_2d_c1>({32, 32}, + {2, 2}, 0); + valid &= run_test<2, sycl::half, 2, sycl::image_channel_type::fp16, + sycl::image_channel_order::rg, class fp16_2d_c2>({32, 32}, + {2, 2}, 0); + valid &= run_test<2, sycl::half, 4, sycl::image_channel_type::fp16, + sycl::image_channel_order::rgba, class fp16_2d_c4>( + {32, 32}, {2, 2}, 0); + valid &= run_test<2, uint8_t, 4, sycl::image_channel_type::unorm_int8, + sycl::image_channel_order::rgba, class unorm_int8_2d_c4>( + {32, 32}, {2, 2}, 0); + + valid &= run_test<3, float, 1, sycl::image_channel_type::fp32, + sycl::image_channel_order::r, class fp32_3d_c1>( + {64, 16, 2}, {2, 2, 2}, 0); + valid &= run_test<3, sycl::half, 2, sycl::image_channel_type::fp16, + sycl::image_channel_order::rg, class fp16_3d_c2>( + {64, 16, 2}, {2, 2, 2}, 0); + valid &= run_test<3, sycl::half, 4, sycl::image_channel_type::fp16, + sycl::image_channel_order::rgba, class fp16_3d_c4>( + {64, 16, 2}, {2, 2, 2}, 0); + valid &= run_test<3, uint8_t, 4, sycl::image_channel_type::unorm_int8, + sycl::image_channel_order::rgba, class unorm_int8_3d_c4>( + {64, 16, 2}, {2, 2, 2}, 0); +#else + valid &= run_test<2, float, 4, sycl::image_channel_type::fp32, + sycl::image_channel_order::rgba, class float_2d>({16, 16}, + {2, 2}, 0); valid &= run_test<2, float, 2, sycl::image_channel_type::fp32, sycl::image_channel_order::rg, class float_2d_large>( {1024, 1024}, {4, 2}, 0); valid &= run_test<3, char, 2, sycl::image_channel_type::signed_int8, - sycl::image_channel_order::rg, class float_3d>( - {256, 16, 2}, {2, 2, 2}, 0); + sycl::image_channel_order::rg, class int8_3d>({256, 16, 2}, + {2, 2, 2}, 0); valid &= run_test<2, uint32_t, 1, sycl::image_channel_type::unsigned_int32, sycl::image_channel_order::r, class uint32_2d>({64, 32}, @@ -352,7 +426,7 @@ bool run_tests() { valid &= run_test<3, int16_t, 1, sycl::image_channel_type::signed_int16, sycl::image_channel_order::r, class int16_3d>({64, 32, 64}, {4, 2, 4}, 0); - +#endif return valid; } diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp index 50881b721f848..e79b633a8d67d 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp @@ -1,8 +1,8 @@ -// REQUIRES: cuda +// REQUIRES: cuda || (windows && level_zero && gpu-intel-dg2) // REQUIRES: vulkan -// RUN: %{build} %link-vulkan -o %t.out -// RUN: %{run} %t.out +// RUN: %{build} %link-vulkan -o %t.out %if any-device-is-level_zero %{ -Wno-ignored-attributes -DTEST_L0_SUPPORTED_VK_FORMAT %} +// RUN: %{run} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out // Uncomment to print additional test information // #define VERBOSE_PRINT @@ -29,15 +29,15 @@ struct handles_t { }; template -handles_t -create_test_handles(sycl::context &ctxt, sycl::device &dev, - InteropMemHandleT img_in_interop_handle_1, - InteropMemHandleT img_in_interop_handle_2, - InteropMemHandleT img_out_interop_handle, - InteropSemHandleT sycl_wait_semaphore_handle, - InteropSemHandleT sycl_done_semaphore_handle, - const size_t img_size, - sycl::ext::oneapi::experimental::image_descriptor &desc) { +handles_t create_test_handles( + sycl::context &ctxt, sycl::device &dev, + InteropMemHandleT img_in_interop_handle_1, + InteropMemHandleT img_in_interop_handle_2, + InteropMemHandleT img_out_interop_handle, + [[maybe_unused]] InteropSemHandleT sycl_wait_semaphore_handle, + [[maybe_unused]] InteropSemHandleT sycl_done_semaphore_handle, + const size_t img_size, + sycl::ext::oneapi::experimental::image_descriptor &desc) { // Extension: map the external memory descriptors #ifdef _WIN32 @@ -89,6 +89,7 @@ create_test_handles(sycl::context &ctxt, sycl::device &dev, syclexp::unsampled_image_handle output = syclexp::create_image(output_mapped_mem_handle, desc, dev, ctxt); +#ifdef TEST_SEMAPHORE_IMPORT // Extension: import semaphores #ifdef _WIN32 syclexp::external_semaphore_descriptor @@ -116,6 +117,10 @@ create_test_handles(sycl::context &ctxt, sycl::device &dev, syclexp::external_semaphore sycl_done_external_semaphore = syclexp::import_external_semaphore(sycl_done_external_semaphore_desc, dev, ctxt); +#else // #ifdef TEST_SEMAPHORE_IMPORT + syclexp::external_semaphore sycl_wait_external_semaphore{}; + syclexp::external_semaphore sycl_done_external_semaphore{}; +#endif // #ifdef TEST_SEMAPHORE_IMPORT return {input_external_mem_1, input_external_mem_2, @@ -131,10 +136,12 @@ create_test_handles(sycl::context &ctxt, sycl::device &dev, } void cleanup_test(sycl::context &ctxt, sycl::device &dev, handles_t handles) { +#ifdef TEST_SEMAPHORE_IMPORT syclexp::release_external_semaphore(handles.sycl_wait_external_semaphore, dev, ctxt); syclexp::release_external_semaphore(handles.sycl_done_external_semaphore, dev, ctxt); +#endif syclexp::destroy_image_handle(handles.input_1, dev, ctxt); syclexp::destroy_image_handle(handles.input_2, dev, ctxt); syclexp::destroy_image_handle(handles.output, dev, ctxt); @@ -175,8 +182,10 @@ void run_ndim_test(sycl::range global_size, img_out_interop_handle, sycl_wait_semaphore_handle, sycl_done_semaphore_handle, img_size, desc); +#ifdef TEST_SEMAPHORE_IMPORT // Extension: wait for imported semaphore q.ext_oneapi_wait_external_semaphore(handles.sycl_wait_external_semaphore); +#endif try { q.submit([&](sycl::handler &cgh) { @@ -236,11 +245,13 @@ void run_ndim_test(sycl::range global_size, }); }); +#ifdef TEST_SEMAPHORE_IMPORT // Extension: signal imported semaphore q.submit([&](sycl::handler &cgh) { cgh.ext_oneapi_signal_external_semaphore( handles.sycl_done_external_semaphore); }); +#endif // Wait for kernel completion before destroying external objects q.wait_and_throw(); @@ -360,6 +371,7 @@ bool run_test(sycl::range dims, sycl::range local_size, VK_CHECK_CALL(vkQueueWaitIdle(vk_compute_queue)); } +#ifdef TEST_SEMAPHORE_IMPORT // Create semaphore to later import in SYCL printString("Creating semaphores\n"); VkSemaphore syclWaitSemaphore; @@ -395,6 +407,7 @@ bool run_test(sycl::range dims, sycl::range local_size, VK_CHECK_CALL( vkCreateSemaphore(vk_device, &sci, nullptr, &syclDoneSemaphore)); } +#endif // #ifdef TEST_SEMAPHORE_IMPORT printString("Copying staging memory to images\n"); // Copy staging to main image memory @@ -424,12 +437,17 @@ bool run_test(sycl::range dims, sycl::range local_size, submission.commandBufferCount = 1; submission.pCommandBuffers = &vk_transferCmdBuffers[0]; +#ifdef TEST_SEMAPHORE_IMPORT submission.signalSemaphoreCount = 1; submission.pSignalSemaphores = &syclWaitSemaphore; +#endif submission.pWaitDstStageMask = stages.data(); VK_CHECK_CALL(vkQueueSubmit(vk_transfer_queue, 1 /*submitCount*/, &submission, VK_NULL_HANDLE /*fence*/)); +#ifndef TEST_SEMAPHORE_IMPORT + VK_CHECK_CALL(vkQueueWaitIdle(vk_transfer_queue)); +#endif } printString("Getting memory interop handles\n"); @@ -451,6 +469,7 @@ bool run_test(sycl::range dims, sycl::range local_size, printString("Getting semaphore interop handles\n"); +#ifdef TEST_SEMAPHORE_IMPORT // Pass semaphores to SYCL for synchronization #ifdef _WIN32 auto sycl_wait_semaphore_handle = @@ -463,6 +482,10 @@ bool run_test(sycl::range dims, sycl::range local_size, auto sycl_done_semaphore_handle = vkutil::getSemaphoreOpaqueFD(syclDoneSemaphore); #endif +#else // #ifdef TEST_SEMAPHORE_IMPORT + void *sycl_wait_semaphore_handle = nullptr; + void *sycl_done_semaphore_handle = nullptr; +#endif // #ifdef TEST_SEMAPHORE_IMPORT printString("Calling into SYCL with interop memory and semaphore handles\n"); @@ -498,8 +521,10 @@ bool run_test(sycl::range dims, sycl::range local_size, submission.commandBufferCount = 1; submission.pCommandBuffers = &vk_transferCmdBuffers[1]; +#ifdef TEST_SEMAPHORE_IMPORT submission.waitSemaphoreCount = 1; submission.pWaitSemaphores = &syclDoneSemaphore; +#endif submission.pWaitDstStageMask = stages.data(); VK_CHECK_CALL(vkQueueSubmit(vk_transfer_queue, 1 /*submitCount*/, @@ -536,19 +561,60 @@ bool run_test(sycl::range dims, sycl::range local_size, printString(" Results are correct!\n"); } +#ifdef TEST_SEMAPHORE_IMPORT // Cleanup vkDestroySemaphore(vk_device, syclWaitSemaphore, nullptr); vkDestroySemaphore(vk_device, syclDoneSemaphore, nullptr); +#endif return validated; } bool run_all() { unsigned int seed = 0; + bool valid = true; +#ifdef TEST_L0_SUPPORTED_VK_FORMAT + printString("Running 3D float\n"); + valid &= run_test<3, float, 1, sycl::image_channel_type::fp32, + sycl::image_channel_order::r, class fp32_3d_c1>( + {64, 16, 2}, {2, 2, 2}, seed); + + printString("Running 3D half2\n"); + valid &= run_test<3, sycl::half, 2, sycl::image_channel_type::fp16, + sycl::image_channel_order::rg, class fp16_3d_c2>( + {64, 16, 2}, {2, 2, 2}, seed); + + printString("Running 3D half4\n"); + valid &= run_test<3, sycl::half, 4, sycl::image_channel_type::fp16, + sycl::image_channel_order::rgba, class fp16_3d_c4>( + {64, 16, 2}, {2, 2, 2}, seed); + printString("Running 3D unorm_int8_c4\n"); + valid &= run_test<3, uint8_t, 4, sycl::image_channel_type::unorm_int8, + sycl::image_channel_order::rgba, class unorm_int8_3d_c4>( + {64, 16, 2}, {2, 2, 2}, seed); + + printString("Running 2D float\n"); + valid &= run_test<2, float, 1, sycl::image_channel_type::fp32, + sycl::image_channel_order::r, class fp32_2d_c1>( + {32, 32}, {2, 2}, seed); + printString("Running 2D half2\n"); + valid &= run_test<2, sycl::half, 2, sycl::image_channel_type::fp16, + sycl::image_channel_order::rg, class fp16_2d_c2>( + {32, 32}, {2, 2}, seed); + printString("Running 2D half4\n"); + valid &= run_test<2, sycl::half, 4, sycl::image_channel_type::fp16, + sycl::image_channel_order::rgba, class fp16_2d_c4>( + {32, 32}, {2, 2}, seed); + + printString("Running 2D unorm_int8_c4\n"); + valid &= run_test<2, uint8_t, 4, sycl::image_channel_type::unorm_int8, + sycl::image_channel_order::rgba, class unorm_int8_2d_c4>( + {32, 32}, {2, 2}, seed); +#else printString("Running 3D uint4\n"); - bool valid = run_test<3, uint32_t, 4, sycl::image_channel_type::signed_int32, - sycl::image_channel_order::rgba, class uint4_3d>( + valid &= run_test<3, uint32_t, 4, sycl::image_channel_type::signed_int32, + sycl::image_channel_order::rgba, class uint4_3d>( {272, 144, 4}, {16, 16, 4}, seed); printString("Running 3D uint2\n"); @@ -604,7 +670,7 @@ bool run_all() { valid &= run_test<2, float, 1, sycl::image_channel_type::fp32, sycl::image_channel_order::r, class float1_2d>( {32, 32}, {2, 2}, seed); - +#endif return valid; } diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images_semaphore.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images_semaphore.cpp new file mode 100644 index 0000000000000..54fc2d7bfb984 --- /dev/null +++ b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images_semaphore.cpp @@ -0,0 +1,8 @@ +// REQUIRES: cuda +// REQUIRES: vulkan + +// RUN: %{build} %link-vulkan -o %t.out +// RUN: %{run} %t.out + +#define TEST_SEMAPHORE_IMPORT +#include "unsampled_images.cpp" diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_common.hpp b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_common.hpp index 4a70f2a8e9edc..52e13441e4200 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_common.hpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_common.hpp @@ -791,7 +791,20 @@ Vulkan format. */ VkFormat to_vulkan_format(sycl::image_channel_order order, sycl::image_channel_type channel_type) { - if (channel_type == sycl::image_channel_type::signed_int8) { + if (channel_type == sycl::image_channel_type::unorm_int8) { + switch (order) { + case sycl::image_channel_order::r: + return VK_FORMAT_R8_UNORM; + case sycl::image_channel_order::rg: + return VK_FORMAT_R8G8_UNORM; + case sycl::image_channel_order::rgba: + return VK_FORMAT_R8G8B8A8_UNORM; + default: { + std::cerr << "error in converting to vulkan format\n"; + exit(-1); + } + } + } else if (channel_type == sycl::image_channel_type::signed_int8) { switch (order) { case sycl::image_channel_order::r: @@ -845,6 +858,19 @@ VkFormat to_vulkan_format(sycl::image_channel_order order, exit(-1); } } + } else if (channel_type == sycl::image_channel_type::fp16) { + switch (order) { + case sycl::image_channel_order::r: + return VK_FORMAT_R16_SFLOAT; + case sycl::image_channel_order::rg: + return VK_FORMAT_R16G16_SFLOAT; + case sycl::image_channel_order::rgba: + return VK_FORMAT_R16G16B16A16_SFLOAT; + default: { + std::cerr << "error in converting to vulkan format\n"; + exit(-1); + } + } } else if (channel_type == sycl::image_channel_type::fp32) { switch (order) { case sycl::image_channel_order::r: