From c9aabcbfa7291328766562c40ef5f6955765a649 Mon Sep 17 00:00:00 2001 From: Andrey Pavlenko Date: Thu, 29 Aug 2024 23:13:49 +0000 Subject: [PATCH 1/2] [OclRuntime] Added gpuMemCopy() function --- .../OpenCLRuntime/OpenCLRuntimeWrappers.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/gc/ExecutionEngine/OpenCLRuntime/OpenCLRuntimeWrappers.cpp b/lib/gc/ExecutionEngine/OpenCLRuntime/OpenCLRuntimeWrappers.cpp index f63d94316..6d4e67200 100644 --- a/lib/gc/ExecutionEngine/OpenCLRuntime/OpenCLRuntimeWrappers.cpp +++ b/lib/gc/ExecutionEngine/OpenCLRuntime/OpenCLRuntimeWrappers.cpp @@ -21,7 +21,7 @@ #ifdef _WIN32 #define OCL_RUNTIME_EXPORT __declspec(dllexport) #else -#define OCL_RUNTIME_EXPORT +#define OCL_RUNTIME_EXPORT __attribute__((visibility("default"))) #endif // _WIN32 namespace { @@ -390,6 +390,14 @@ extern "C" OCL_RUNTIME_EXPORT void gpuMemFree(GPUCLQUEUE *queue, void *ptr) { } } +extern "C" OCL_RUNTIME_EXPORT void gpuMemCopy(GPUCLQUEUE *queue, void *dst, + void *src, uint64_t size) { + auto func = queue->ext_table_ ? queue->ext_table_->enqueneMemcpy + : (clEnqueueMemcpyINTEL_fn)queryCLExtFunc( + queue->device_, EnqueueMemcpyName); + CL_SAFE_CALL(func(queue->queue_, true, dst, src, size, 0, nullptr, nullptr)); +} + extern "C" OCL_RUNTIME_EXPORT cl_program gpuModuleLoad(GPUCLQUEUE *queue, const unsigned char *data, size_t dataSize) { if (queue) { From 4ca71c3dc259b561f9edb3de67942f2844241e34 Mon Sep 17 00:00:00 2001 From: Andrey Pavlenko Date: Mon, 2 Sep 2024 12:30:16 +0200 Subject: [PATCH 2/2] Fixed typo --- .../ExecutionEngine/OpenCLRuntime/OpenCLRuntimeWrappers.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/gc/ExecutionEngine/OpenCLRuntime/OpenCLRuntimeWrappers.cpp b/lib/gc/ExecutionEngine/OpenCLRuntime/OpenCLRuntimeWrappers.cpp index 6d4e67200..6adaac2bd 100644 --- a/lib/gc/ExecutionEngine/OpenCLRuntime/OpenCLRuntimeWrappers.cpp +++ b/lib/gc/ExecutionEngine/OpenCLRuntime/OpenCLRuntimeWrappers.cpp @@ -73,7 +73,7 @@ struct CLExtTable { clSharedMemAllocINTEL_fn allocShared; clMemBlockingFreeINTEL_fn blockingFree; clSetKernelArgMemPointerINTEL_fn setKernelArgMemPtr; - clEnqueueMemcpyINTEL_fn enqueneMemcpy; + clEnqueueMemcpyINTEL_fn enqueueMemcpy; CLExtTable() = default; CLExtTable(cl_platform_id plat) { allocDev = @@ -84,7 +84,7 @@ struct CLExtTable { (clMemBlockingFreeINTEL_fn)queryCLExtFunc(plat, MemBlockingFreeName); setKernelArgMemPtr = (clSetKernelArgMemPointerINTEL_fn)queryCLExtFunc( plat, SetKernelArgMemPointerName); - enqueneMemcpy = + enqueueMemcpy = (clEnqueueMemcpyINTEL_fn)queryCLExtFunc(plat, EnqueueMemcpyName); } }; @@ -392,7 +392,7 @@ extern "C" OCL_RUNTIME_EXPORT void gpuMemFree(GPUCLQUEUE *queue, void *ptr) { extern "C" OCL_RUNTIME_EXPORT void gpuMemCopy(GPUCLQUEUE *queue, void *dst, void *src, uint64_t size) { - auto func = queue->ext_table_ ? queue->ext_table_->enqueneMemcpy + auto func = queue->ext_table_ ? queue->ext_table_->enqueueMemcpy : (clEnqueueMemcpyINTEL_fn)queryCLExtFunc( queue->device_, EnqueueMemcpyName); CL_SAFE_CALL(func(queue->queue_, true, dst, src, size, 0, nullptr, nullptr));