From 06b119a177132b05a5a50d87b5c26fb08cabaae8 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Mon, 14 Oct 2024 02:30:25 +0300 Subject: [PATCH 01/15] fix: use `vm_allocate` to allocate CPU backend buffer on macOS --- ggml/src/ggml-backend.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp index 15d650150a5f3..ef701e61da043 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -24,6 +24,8 @@ #ifdef __APPLE__ #include #include +#include +#include #endif @@ -770,12 +772,21 @@ static const char * ggml_backend_cpu_buffer_type_get_name(ggml_backend_buffer_ty } static ggml_backend_buffer_t ggml_backend_cpu_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { +#ifdef TARGET_OS_OSX + void * data = NULL; + kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &data, size, VM_FLAGS_ANYWHERE); + if (alloc_status != KERN_SUCCESS) { + GGML_LOG_ERROR("%s: failed to allocate buffer using vm_allocate with size %zu\n", __func__, size); + return NULL; + } +#else size += TENSOR_ALIGNMENT; // malloc may return an address that is not aligned void * data = malloc(size); // TODO: use GGML_ALIGNED_MALLOC (move to ggml-impl.h) if (data == NULL) { GGML_LOG_ERROR("%s: failed to allocate buffer of size %zu\n", __func__, size); return NULL; } +#endif return ggml_backend_buffer_init(buft, ggml_backend_cpu_buffer_i, data, size); } From cec2d4e2659ff652877ef5f53983f9da5d57869c Mon Sep 17 00:00:00 2001 From: Gilad S Date: Mon, 14 Oct 2024 03:40:25 +0300 Subject: [PATCH 02/15] fix: switch to `posix_memalign` to keep existing `free()` usages work --- ggml/src/ggml-backend.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp index ef701e61da043..263b3e75dffae 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -24,7 +24,7 @@ #ifdef __APPLE__ #include #include -#include +#include #include #endif @@ -772,11 +772,11 @@ static const char * ggml_backend_cpu_buffer_type_get_name(ggml_backend_buffer_ty } static ggml_backend_buffer_t ggml_backend_cpu_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { -#ifdef TARGET_OS_OSX +#if defined(GGML_USE_METAL) || defined(TARGET_OS_OSX) void * data = NULL; - kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &data, size, VM_FLAGS_ANYWHERE); - if (alloc_status != KERN_SUCCESS) { - GGML_LOG_ERROR("%s: failed to allocate buffer using vm_allocate with size %zu\n", __func__, size); + int result = posix_memalign(&data, sysconf(_SC_PAGESIZE), size); + if (result != 0) { + GGML_LOG_ERROR("%s: failed to allocate buffer using posix_memalign with size %zu\n", __func__, size); return NULL; } #else From 51ccdebc848f61284cc9f02efd97ff134dce8443 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Tue, 15 Oct 2024 01:08:53 +0300 Subject: [PATCH 03/15] feat: move `GGML_ALIGNED_MALLOC` to `ggml-backend-impl.h`, add support for `vm_allocate` on macOS --- ggml/src/ggml-backend-impl.h | 89 ++++++++++++++++++++++++++++++++++++ ggml/src/ggml-backend.cpp | 17 ++----- ggml/src/ggml.c | 63 ++----------------------- 3 files changed, 97 insertions(+), 72 deletions(-) diff --git a/ggml/src/ggml-backend-impl.h b/ggml/src/ggml-backend-impl.h index fd3deae009799..b61ba0454b6d2 100644 --- a/ggml/src/ggml-backend-impl.h +++ b/ggml/src/ggml-backend-impl.h @@ -1,5 +1,25 @@ #pragma once +#include "ggml-impl.h" + +#if defined(_MSC_VER) || defined(__MINGW32__) +#include // using malloc.h with MSC/MINGW +#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__) +#include +#endif + +#include + +#ifdef GGML_USE_CPU_HBM +#include +#endif + +#if defined(__APPLE__) +#include +#include +#include +#endif + // ggml-backend internal header #include "ggml-backend.h" @@ -222,6 +242,75 @@ extern "C" { // TODO: backends can be loaded as a dynamic library, in which case it needs to export this function // typedef ggml_backend_register_t * (*ggml_backend_init)(void); + + // + // Memory allocation + // + +#if defined(_MSC_VER) || defined(__MINGW32__) +#define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN) +#define GGML_ALIGNED_FREE(ptr, size) _aligned_free(ptr) +#else +inline static void * ggml_aligned_malloc(size_t size) { + if (size == 0) { + GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n"); + return NULL; + } + void * aligned_memory = NULL; +#ifdef GGML_USE_CPU_HBM + int result = hbw_posix_memalign(&aligned_memory, 16, size); +#elif TARGET_OS_OSX + kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE); + int result = EFAULT; + switch (alloc_status) { + case KERN_SUCCESS: + result = 0; + break; + + case KERN_INVALID_ADDRESS: + result = EINVAL; + break; + + case KERN_NO_SPACE: + result = ENOMEM; + break; + + default: + result = EFAULT; + break; + } +#elif GGML_USE_METAL + int result = posix_memalign(&aligned_memory, sysconf(_SC_PAGESIZE), size); +#else + int result = posix_memalign(&aligned_memory, GGML_MEM_ALIGN, size); +#endif + if (result != 0) { + // Handle allocation failure + const char *error_desc = "unknown allocation error"; + switch (result) { + case EINVAL: + error_desc = "invalid alignment value"; + break; + case ENOMEM: + error_desc = "insufficient memory"; + break; + } + GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0)); + GGML_ABORT("fatal error"); + return NULL; + } + return aligned_memory; +} +#define GGML_ALIGNED_MALLOC(size) ggml_aligned_malloc(size) +#ifdef GGML_USE_CPU_HBM +#define GGML_ALIGNED_FREE(ptr, size) if(NULL != ptr) hbw_free(ptr) +#elif TARGET_OS_OSX +#define GGML_ALIGNED_FREE(ptr, size) if(NULL != ptr) vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)ptr, size) +#else +#define GGML_ALIGNED_FREE(ptr, size) free(ptr) +#endif +#endif + #ifdef __cplusplus } #endif diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp index 263b3e75dffae..ad2512c535baa 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -24,8 +24,6 @@ #ifdef __APPLE__ #include #include -#include -#include #endif @@ -704,7 +702,7 @@ static void * ggml_backend_cpu_buffer_get_base(ggml_backend_buffer_t buffer) { } static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t buffer) { - free(buffer->context); + GGML_ALIGNED_FREE(buffer->context, buffer->size); } static void ggml_backend_cpu_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) { @@ -772,21 +770,12 @@ static const char * ggml_backend_cpu_buffer_type_get_name(ggml_backend_buffer_ty } static ggml_backend_buffer_t ggml_backend_cpu_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { -#if defined(GGML_USE_METAL) || defined(TARGET_OS_OSX) - void * data = NULL; - int result = posix_memalign(&data, sysconf(_SC_PAGESIZE), size); - if (result != 0) { - GGML_LOG_ERROR("%s: failed to allocate buffer using posix_memalign with size %zu\n", __func__, size); - return NULL; - } -#else - size += TENSOR_ALIGNMENT; // malloc may return an address that is not aligned - void * data = malloc(size); // TODO: use GGML_ALIGNED_MALLOC (move to ggml-impl.h) + void * data = GGML_ALIGNED_MALLOC(size); + if (data == NULL) { GGML_LOG_ERROR("%s: failed to allocate buffer of size %zu\n", __func__, size); return NULL; } -#endif return ggml_backend_buffer_init(buft, ggml_backend_cpu_buffer_i, data, size); } diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 3f01092d9f59a..48437b1e04033 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -3,17 +3,12 @@ #include "ggml-backend.h" #include "ggml-impl.h" +#include "ggml-backend-impl.h" #include "ggml-cpu-impl.h" #include "ggml-quants.h" #include "ggml.h" #include "ggml-aarch64.h" -#if defined(_MSC_VER) || defined(__MINGW32__) -#include // using malloc.h with MSC/MINGW -#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__) -#include -#endif - #include #include #include @@ -35,10 +30,6 @@ #include #endif -#ifdef GGML_USE_METAL -#include -#endif - #if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_MATMUL_INT8) #undef GGML_USE_LLAMAFILE #endif @@ -184,10 +175,6 @@ typedef void * thread_ret_t; typedef pthread_t ggml_thread_t; -#ifdef GGML_USE_CPU_HBM -#include -#endif - #if defined(__APPLE__) #include #endif @@ -386,47 +373,6 @@ void ggml_log_callback_default(enum ggml_log_level level, const char * text, voi //#define GGML_SOFT_MAX_ACCELERATE #endif -#if defined(_MSC_VER) || defined(__MINGW32__) -#define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN) -#define GGML_ALIGNED_FREE(ptr) _aligned_free(ptr) -#else -inline static void * ggml_aligned_malloc(size_t size) { - if (size == 0) { - GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n"); - return NULL; - } - void * aligned_memory = NULL; -#ifdef GGML_USE_CPU_HBM - int result = hbw_posix_memalign(&aligned_memory, 16, size); -#elif GGML_USE_METAL - int result = posix_memalign(&aligned_memory, sysconf(_SC_PAGESIZE), size); -#else - int result = posix_memalign(&aligned_memory, GGML_MEM_ALIGN, size); -#endif - if (result != 0) { - // Handle allocation failure - const char *error_desc = "unknown allocation error"; - switch (result) { - case EINVAL: - error_desc = "invalid alignment value"; - break; - case ENOMEM: - error_desc = "insufficient memory"; - break; - } - GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0)); - GGML_ABORT("fatal error"); - return NULL; - } - return aligned_memory; -} -#define GGML_ALIGNED_MALLOC(size) ggml_aligned_malloc(size) -#ifdef GGML_USE_CPU_HBM -#define GGML_ALIGNED_FREE(ptr) if(NULL != ptr) hbw_free(ptr) -#else -#define GGML_ALIGNED_FREE(ptr) free(ptr) -#endif -#endif inline static void * ggml_malloc(size_t size) { if (size == 0) { @@ -3909,7 +3855,7 @@ void ggml_free(struct ggml_context * ctx) { __func__, i, ggml_used_mem(ctx)); if (ctx->mem_buffer_owned) { - GGML_ALIGNED_FREE(ctx->mem_buffer); + GGML_ALIGNED_FREE(ctx->mem_buffer, ctx->mem_size); } found = true; @@ -19630,8 +19576,9 @@ void ggml_threadpool_free(struct ggml_threadpool* threadpool) { ggml_cond_destroy(&threadpool->cond); #endif // GGML_USE_OPENMP - GGML_ALIGNED_FREE(threadpool->workers); - GGML_ALIGNED_FREE(threadpool); + const size_t workers_size = sizeof(struct ggml_compute_state) * n_threads; + GGML_ALIGNED_FREE(threadpool->workers, workers_size); + GGML_ALIGNED_FREE(threadpool, sizeof(struct ggml_threadpool)); } #ifndef GGML_USE_OPENMP From c2259e3cd12b2b06ff8cfd63ffa4ef4e374e9374 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Tue, 15 Oct 2024 01:14:36 +0300 Subject: [PATCH 04/15] style: formatting --- ggml/src/ggml-backend-impl.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/ggml/src/ggml-backend-impl.h b/ggml/src/ggml-backend-impl.h index b61ba0454b6d2..06f962ee61319 100644 --- a/ggml/src/ggml-backend-impl.h +++ b/ggml/src/ggml-backend-impl.h @@ -266,15 +266,12 @@ inline static void * ggml_aligned_malloc(size_t size) { case KERN_SUCCESS: result = 0; break; - case KERN_INVALID_ADDRESS: result = EINVAL; break; - case KERN_NO_SPACE: result = ENOMEM; break; - default: result = EFAULT; break; From 55298d2530b961cabeebb70bb51d3df5fd483d74 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Tue, 15 Oct 2024 01:21:11 +0300 Subject: [PATCH 05/15] fix: move const outside of `#ifndef` --- ggml/src/ggml.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 48437b1e04033..4419ee95119b7 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -19554,9 +19554,10 @@ static void ggml_thread_cpumask_next(const bool * global_mask, bool * local_mask void ggml_threadpool_free(struct ggml_threadpool* threadpool) { if (!threadpool) return; + const int n_threads = threadpool->n_threads_max; + #ifndef GGML_USE_OPENMP struct ggml_compute_state* workers = threadpool->workers; - const int n_threads = threadpool->n_threads_max; ggml_mutex_lock(&threadpool->mutex); From fa79e0d0dd5c821b10e32e7bcbecef427127e808 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Tue, 15 Oct 2024 01:23:01 +0300 Subject: [PATCH 06/15] style: formatting --- ggml/src/ggml.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 4419ee95119b7..b6ef146ba4fee 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -19555,7 +19555,7 @@ void ggml_threadpool_free(struct ggml_threadpool* threadpool) { if (!threadpool) return; const int n_threads = threadpool->n_threads_max; - + #ifndef GGML_USE_OPENMP struct ggml_compute_state* workers = threadpool->workers; From 19a820fceda5d8010a1c7563d12e8f7476e3bff9 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Tue, 15 Oct 2024 01:28:44 +0300 Subject: [PATCH 07/15] fix: unused var --- ggml/src/ggml.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index b6ef146ba4fee..a2a82fd5cd3f9 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -19577,8 +19577,7 @@ void ggml_threadpool_free(struct ggml_threadpool* threadpool) { ggml_cond_destroy(&threadpool->cond); #endif // GGML_USE_OPENMP - const size_t workers_size = sizeof(struct ggml_compute_state) * n_threads; - GGML_ALIGNED_FREE(threadpool->workers, workers_size); + GGML_ALIGNED_FREE(threadpool->workers, sizeof(struct ggml_compute_state) * n_threads); GGML_ALIGNED_FREE(threadpool, sizeof(struct ggml_threadpool)); } From 8352354aa21410adce6ab5cfc13e167c41f51b5c Mon Sep 17 00:00:00 2001 From: Gilad S Date: Tue, 15 Oct 2024 01:56:07 +0300 Subject: [PATCH 08/15] fix: transform `GGML_ALIGNED_MALLOC` and `GGML_ALIGNED_FREE` into functions and add them to `ggml-impl.h` --- ggml/src/ggml-backend-impl.h | 86 -------------------------------- ggml/src/ggml-backend.cpp | 4 +- ggml/src/ggml-impl.h | 5 ++ ggml/src/ggml.c | 95 +++++++++++++++++++++++++++++++++--- 4 files changed, 95 insertions(+), 95 deletions(-) diff --git a/ggml/src/ggml-backend-impl.h b/ggml/src/ggml-backend-impl.h index 06f962ee61319..fd3deae009799 100644 --- a/ggml/src/ggml-backend-impl.h +++ b/ggml/src/ggml-backend-impl.h @@ -1,25 +1,5 @@ #pragma once -#include "ggml-impl.h" - -#if defined(_MSC_VER) || defined(__MINGW32__) -#include // using malloc.h with MSC/MINGW -#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__) -#include -#endif - -#include - -#ifdef GGML_USE_CPU_HBM -#include -#endif - -#if defined(__APPLE__) -#include -#include -#include -#endif - // ggml-backend internal header #include "ggml-backend.h" @@ -242,72 +222,6 @@ extern "C" { // TODO: backends can be loaded as a dynamic library, in which case it needs to export this function // typedef ggml_backend_register_t * (*ggml_backend_init)(void); - - // - // Memory allocation - // - -#if defined(_MSC_VER) || defined(__MINGW32__) -#define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN) -#define GGML_ALIGNED_FREE(ptr, size) _aligned_free(ptr) -#else -inline static void * ggml_aligned_malloc(size_t size) { - if (size == 0) { - GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n"); - return NULL; - } - void * aligned_memory = NULL; -#ifdef GGML_USE_CPU_HBM - int result = hbw_posix_memalign(&aligned_memory, 16, size); -#elif TARGET_OS_OSX - kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE); - int result = EFAULT; - switch (alloc_status) { - case KERN_SUCCESS: - result = 0; - break; - case KERN_INVALID_ADDRESS: - result = EINVAL; - break; - case KERN_NO_SPACE: - result = ENOMEM; - break; - default: - result = EFAULT; - break; - } -#elif GGML_USE_METAL - int result = posix_memalign(&aligned_memory, sysconf(_SC_PAGESIZE), size); -#else - int result = posix_memalign(&aligned_memory, GGML_MEM_ALIGN, size); -#endif - if (result != 0) { - // Handle allocation failure - const char *error_desc = "unknown allocation error"; - switch (result) { - case EINVAL: - error_desc = "invalid alignment value"; - break; - case ENOMEM: - error_desc = "insufficient memory"; - break; - } - GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0)); - GGML_ABORT("fatal error"); - return NULL; - } - return aligned_memory; -} -#define GGML_ALIGNED_MALLOC(size) ggml_aligned_malloc(size) -#ifdef GGML_USE_CPU_HBM -#define GGML_ALIGNED_FREE(ptr, size) if(NULL != ptr) hbw_free(ptr) -#elif TARGET_OS_OSX -#define GGML_ALIGNED_FREE(ptr, size) if(NULL != ptr) vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)ptr, size) -#else -#define GGML_ALIGNED_FREE(ptr, size) free(ptr) -#endif -#endif - #ifdef __cplusplus } #endif diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp index ad2512c535baa..378bfd68a44b5 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -702,7 +702,7 @@ static void * ggml_backend_cpu_buffer_get_base(ggml_backend_buffer_t buffer) { } static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t buffer) { - GGML_ALIGNED_FREE(buffer->context, buffer->size); + ggml_aligned_free(buffer->context, buffer->size); } static void ggml_backend_cpu_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) { @@ -770,7 +770,7 @@ static const char * ggml_backend_cpu_buffer_type_get_name(ggml_backend_buffer_ty } static ggml_backend_buffer_t ggml_backend_cpu_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { - void * data = GGML_ALIGNED_MALLOC(size); + void * data = ggml_aligned_malloc(size); if (data == NULL) { GGML_LOG_ERROR("%s: failed to allocate buffer of size %zu\n", __func__, size); diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h index d3f4bad8c0a84..c88ae6069959b 100644 --- a/ggml/src/ggml-impl.h +++ b/ggml/src/ggml-impl.h @@ -196,6 +196,11 @@ struct ggml_cgraph { struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph, int i0, int i1); +// Memory allocation + +void * ggml_aligned_malloc(size_t size); +void ggml_aligned_free(void * ptr, size_t size); + #ifdef __cplusplus } #endif diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index a2a82fd5cd3f9..ce041db91d993 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -3,12 +3,17 @@ #include "ggml-backend.h" #include "ggml-impl.h" -#include "ggml-backend-impl.h" #include "ggml-cpu-impl.h" #include "ggml-quants.h" #include "ggml.h" #include "ggml-aarch64.h" +#if defined(_MSC_VER) || defined(__MINGW32__) +#include // using malloc.h with MSC/MINGW +#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__) +#include +#endif + #include #include #include @@ -175,7 +180,13 @@ typedef void * thread_ret_t; typedef pthread_t ggml_thread_t; +#ifdef GGML_USE_CPU_HBM +#include +#endif + #if defined(__APPLE__) +#include +#include #include #endif @@ -374,6 +385,75 @@ void ggml_log_callback_default(enum ggml_log_level level, const char * text, voi #endif +void * ggml_aligned_malloc(size_t size) { +#if defined(_MSC_VER) || defined(__MINGW32__) + return _aligned_malloc(size, GGML_MEM_ALIGN); +#else + if (size == 0) { + GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n"); + return NULL; + } + void * aligned_memory = NULL; +#ifdef GGML_USE_CPU_HBM + int result = hbw_posix_memalign(&aligned_memory, 16, size); +#elif TARGET_OS_OSX + kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE); + int result = EFAULT; + switch (alloc_status) { + case KERN_SUCCESS: + result = 0; + break; + case KERN_INVALID_ADDRESS: + result = EINVAL; + break; + case KERN_NO_SPACE: + result = ENOMEM; + break; + default: + result = EFAULT; + break; + } +#elif GGML_USE_METAL + int result = posix_memalign(&aligned_memory, sysconf(_SC_PAGESIZE), size); +#else + int result = posix_memalign(&aligned_memory, GGML_MEM_ALIGN, size); +#endif + if (result != 0) { + // Handle allocation failure + const char *error_desc = "unknown allocation error"; + switch (result) { + case EINVAL: + error_desc = "invalid alignment value"; + break; + case ENOMEM: + error_desc = "insufficient memory"; + break; + } + GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0)); + GGML_ABORT("fatal error"); + return NULL; + } + return aligned_memory; +#endif +} + +void ggml_aligned_free(void * ptr, size_t size) { +#if defined(_MSC_VER) || defined(__MINGW32__) + _aligned_free(ptr); +#elif GGML_USE_CPU_HBM + if(ptr != NULL) { + hbw_free(ptr); + } +#elif TARGET_OS_OSX + if(ptr != NULL) { + vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)ptr, size); + } +#else + free(ptr); +#endif +} + + inline static void * ggml_malloc(size_t size) { if (size == 0) { GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_malloc!\n"); @@ -3815,7 +3895,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) { *ctx = (struct ggml_context) { /*.mem_size =*/ mem_size, - /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : GGML_ALIGNED_MALLOC(mem_size), + /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : ggml_aligned_malloc(mem_size), /*.mem_buffer_owned =*/ params.mem_buffer ? false : true, /*.no_alloc =*/ params.no_alloc, /*.no_alloc_save =*/ params.no_alloc, @@ -3855,7 +3935,7 @@ void ggml_free(struct ggml_context * ctx) { __func__, i, ggml_used_mem(ctx)); if (ctx->mem_buffer_owned) { - GGML_ALIGNED_FREE(ctx->mem_buffer, ctx->mem_size); + ggml_aligned_free(ctx->mem_buffer, ctx->mem_size); } found = true; @@ -19577,8 +19657,9 @@ void ggml_threadpool_free(struct ggml_threadpool* threadpool) { ggml_cond_destroy(&threadpool->cond); #endif // GGML_USE_OPENMP - GGML_ALIGNED_FREE(threadpool->workers, sizeof(struct ggml_compute_state) * n_threads); - GGML_ALIGNED_FREE(threadpool, sizeof(struct ggml_threadpool)); + const size_t workers_size = sizeof(struct ggml_compute_state) * n_threads; + ggml_aligned_free(threadpool->workers, workers_size); + ggml_aligned_free(threadpool, sizeof(struct ggml_threadpool)); } #ifndef GGML_USE_OPENMP @@ -20010,7 +20091,7 @@ static struct ggml_threadpool * ggml_threadpool_new_impl( struct ggml_cplan * cplan) { struct ggml_threadpool * threadpool = - GGML_ALIGNED_MALLOC(sizeof(struct ggml_threadpool)); + ggml_aligned_malloc(sizeof(struct ggml_threadpool)); { threadpool->cgraph = cgraph; threadpool->cplan = cplan; @@ -20031,7 +20112,7 @@ static struct ggml_threadpool * ggml_threadpool_new_impl( // Allocate and init workers state const size_t workers_size = sizeof(struct ggml_compute_state) * tpp->n_threads; - struct ggml_compute_state * workers = GGML_ALIGNED_MALLOC(workers_size); + struct ggml_compute_state * workers = ggml_aligned_malloc(workers_size); memset(workers, 0, workers_size); for (int j = 0; j < tpp->n_threads; j++) { From df67ae380e09f56e7513fe964c21c5ea779c7c6f Mon Sep 17 00:00:00 2001 From: Gilad S Date: Tue, 15 Oct 2024 02:01:02 +0300 Subject: [PATCH 09/15] fix: unused var --- ggml/src/ggml.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index ce041db91d993..8d968d67c3b75 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -438,6 +438,7 @@ void * ggml_aligned_malloc(size_t size) { } void ggml_aligned_free(void * ptr, size_t size) { + GGML_UNUSED(size); #if defined(_MSC_VER) || defined(__MINGW32__) _aligned_free(ptr); #elif GGML_USE_CPU_HBM From 33b430810bfc97b5387429469201ea20f54ef407 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Tue, 15 Oct 2024 02:51:39 +0300 Subject: [PATCH 10/15] fix: page align to `GGUF_DEFAULT_ALIGNMENT` --- ggml/src/ggml.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 8d968d67c3b75..8c4766469b2b1 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -387,7 +387,7 @@ void ggml_log_callback_default(enum ggml_log_level level, const char * text, voi void * ggml_aligned_malloc(size_t size) { #if defined(_MSC_VER) || defined(__MINGW32__) - return _aligned_malloc(size, GGML_MEM_ALIGN); + return _aligned_malloc(size, GGUF_DEFAULT_ALIGNMENT); #else if (size == 0) { GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n"); @@ -416,7 +416,7 @@ void * ggml_aligned_malloc(size_t size) { #elif GGML_USE_METAL int result = posix_memalign(&aligned_memory, sysconf(_SC_PAGESIZE), size); #else - int result = posix_memalign(&aligned_memory, GGML_MEM_ALIGN, size); + int result = posix_memalign(&aligned_memory, GGUF_DEFAULT_ALIGNMENT, size); #endif if (result != 0) { // Handle allocation failure @@ -442,11 +442,11 @@ void ggml_aligned_free(void * ptr, size_t size) { #if defined(_MSC_VER) || defined(__MINGW32__) _aligned_free(ptr); #elif GGML_USE_CPU_HBM - if(ptr != NULL) { + if (ptr != NULL) { hbw_free(ptr); } #elif TARGET_OS_OSX - if(ptr != NULL) { + if (ptr != NULL) { vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)ptr, size); } #else From cb224645e17dca23f966735927c3143e5979198d Mon Sep 17 00:00:00 2001 From: Gilad S Date: Tue, 15 Oct 2024 02:55:13 +0300 Subject: [PATCH 11/15] fix: page align to `TENSOR_ALIGNMENT` --- ggml/src/ggml.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 8c4766469b2b1..0deb2f90ea4e9 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -387,7 +387,7 @@ void ggml_log_callback_default(enum ggml_log_level level, const char * text, voi void * ggml_aligned_malloc(size_t size) { #if defined(_MSC_VER) || defined(__MINGW32__) - return _aligned_malloc(size, GGUF_DEFAULT_ALIGNMENT); + return _aligned_malloc(size, TENSOR_ALIGNMENT); #else if (size == 0) { GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n"); @@ -416,7 +416,7 @@ void * ggml_aligned_malloc(size_t size) { #elif GGML_USE_METAL int result = posix_memalign(&aligned_memory, sysconf(_SC_PAGESIZE), size); #else - int result = posix_memalign(&aligned_memory, GGUF_DEFAULT_ALIGNMENT, size); + int result = posix_memalign(&aligned_memory, TENSOR_ALIGNMENT, size); #endif if (result != 0) { // Handle allocation failure From 1516f7b7903728a5f447a9bbfe6255504564fde2 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Tue, 15 Oct 2024 03:06:30 +0300 Subject: [PATCH 12/15] fix: convert `TENSOR_ALIGNMENT` to a macro --- ggml/src/ggml-backend.cpp | 2 -- ggml/src/ggml-impl.h | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp index 378bfd68a44b5..6d6ffeb4efe12 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -682,8 +682,6 @@ ggml_backend_t ggml_backend_init_best(void) { // backend CPU -static const size_t TENSOR_ALIGNMENT = 32; // required for mmap as gguf only guarantees 32-byte alignment - static const char * ggml_backend_cpu_buffer_get_name(ggml_backend_buffer_t buffer) { return "CPU"; diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h index c88ae6069959b..65c4f81195be3 100644 --- a/ggml/src/ggml-impl.h +++ b/ggml/src/ggml-impl.h @@ -19,6 +19,9 @@ extern "C" { #define MIN(a, b) ((a) < (b) ? (a) : (b)) #define MAX(a, b) ((a) > (b) ? (a) : (b)) +// required for mmap as gguf only guarantees 32-byte alignment +#define TENSOR_ALIGNMENT 32 + // static_assert should be a #define, but if it's not, // fall back to the _Static_assert C11 keyword. // if C99 - static_assert is noop From 4455c7f0737f15180d2f386f344c3cb214af3aba Mon Sep 17 00:00:00 2001 From: Gilad S Date: Tue, 15 Oct 2024 03:29:23 +0300 Subject: [PATCH 13/15] fix: increase page size to `32` on iOS --- ggml/src/ggml.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 0deb2f90ea4e9..a92600e016ae9 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -414,7 +414,8 @@ void * ggml_aligned_malloc(size_t size) { break; } #elif GGML_USE_METAL - int result = posix_memalign(&aligned_memory, sysconf(_SC_PAGESIZE), size); + const auto page_size = sysconf(_SC_PAGESIZE); + int result = posix_memalign(&aligned_memory, MAX(TENSOR_ALIGNMENT, page_size), sysconf(_SC_PAGESIZE), size); #else int result = posix_memalign(&aligned_memory, TENSOR_ALIGNMENT, size); #endif From de7836a4e34eb142ff8255ed975e998379f240a8 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Tue, 15 Oct 2024 03:38:47 +0300 Subject: [PATCH 14/15] fix: iOS page size --- ggml/src/ggml.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index a92600e016ae9..48e9aa70d023f 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -414,8 +414,8 @@ void * ggml_aligned_malloc(size_t size) { break; } #elif GGML_USE_METAL - const auto page_size = sysconf(_SC_PAGESIZE); - int result = posix_memalign(&aligned_memory, MAX(TENSOR_ALIGNMENT, page_size), sysconf(_SC_PAGESIZE), size); + const long page_size = sysconf(_SC_PAGESIZE); + int result = posix_memalign(&aligned_memory, MAX(TENSOR_ALIGNMENT, page_size), size); #else int result = posix_memalign(&aligned_memory, TENSOR_ALIGNMENT, size); #endif From ecddb2452ab03c2890a066e244d5cf0c904ae468 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Wed, 16 Oct 2024 02:03:03 +0300 Subject: [PATCH 15/15] fix: `hbw_posix_memalign` alignment --- ggml/src/ggml.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 48e9aa70d023f..779b38d1213f2 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -395,7 +395,7 @@ void * ggml_aligned_malloc(size_t size) { } void * aligned_memory = NULL; #ifdef GGML_USE_CPU_HBM - int result = hbw_posix_memalign(&aligned_memory, 16, size); + int result = hbw_posix_memalign(&aligned_memory, TENSOR_ALIGNMENT, size); #elif TARGET_OS_OSX kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE); int result = EFAULT;