From 43838d86d703f778c251563e6adc22af523379df Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 5 Dec 2025 19:59:47 +0200 Subject: [PATCH 1/2] metal : fix build --- ggml/src/ggml-metal/ggml-metal-device.m | 28 ++++++++++++++----------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/ggml/src/ggml-metal/ggml-metal-device.m b/ggml/src/ggml-metal/ggml-metal-device.m index c53ec506cd1..2118efb614e 100644 --- a/ggml/src/ggml-metal/ggml-metal-device.m +++ b/ggml/src/ggml-metal/ggml-metal-device.m @@ -574,22 +574,26 @@ ggml_metal_rsets_t ggml_metal_rsets_init(void) { // the requests stop after a certain amount of time (keep_alive_s) of inactivity dispatch_queue_t d_queue = dispatch_get_global_queue(QOS_CLASS_DEFAULT, 0); dispatch_group_async(res->d_group, d_queue, ^{ - while (!atomic_load_explicit(&res->d_stop, memory_order_relaxed)) { - if (atomic_load_explicit(&res->d_loop, memory_order_relaxed) > 0) { - [res->lock lock]; +#if defined(GGML_METAL_HAS_RESIDENCY_SETS) + if (@available(macOS 15.0, iOS 18.0, tvOS 18.0, visionOS 2.0, *)) { + while (!atomic_load_explicit(&res->d_stop, memory_order_relaxed)) { + if (atomic_load_explicit(&res->d_loop, memory_order_relaxed) > 0) { + [res->lock lock]; - for (int i = 0; i < (int) res->data.count; ++i) { - [res->data[i] requestResidency]; - } + for (int i = 0; i < (int) res->data.count; ++i) { + [res->data[i] requestResidency]; + } - atomic_fetch_sub_explicit(&res->d_loop, 1, memory_order_relaxed); + atomic_fetch_sub_explicit(&res->d_loop, 1, memory_order_relaxed); - [res->lock unlock]; - } + [res->lock unlock]; + } - // half a second - usleep(500 * 1000); - } + // half a second + usleep(500 * 1000); + } + } +#endif }); return res; From 2a3625801ad2873905b5e6500601f6c5aefae7fd Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 5 Dec 2025 22:11:16 +0200 Subject: [PATCH 2/2] tests : fix context destruction --- examples/save-load-state/save-load-state.cpp | 6 ++++++ ggml/src/ggml-metal/ggml-metal-device.m | 4 +--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/examples/save-load-state/save-load-state.cpp b/examples/save-load-state/save-load-state.cpp index 1065ec6bb00..4cd3071f762 100644 --- a/examples/save-load-state/save-load-state.cpp +++ b/examples/save-load-state/save-load-state.cpp @@ -241,6 +241,12 @@ int main(int argc, char ** argv) { llama_batch_free(batch); + // this one is managed by common_init_result + //llama_free(ctx); + + llama_free(ctx2); + llama_free(ctx3); + if (result0 != result2) { fprintf(stderr, "\n%s : error : the seq restore generation is different\n", __func__); return 1; diff --git a/ggml/src/ggml-metal/ggml-metal-device.m b/ggml/src/ggml-metal/ggml-metal-device.m index 2118efb614e..64fd9bdbf61 100644 --- a/ggml/src/ggml-metal/ggml-metal-device.m +++ b/ggml/src/ggml-metal/ggml-metal-device.m @@ -604,6 +604,7 @@ void ggml_metal_rsets_free(ggml_metal_rsets_t rsets) { return; } + // note: if you hit this assert, most likely you haven't deallocated all Metal resources before exiting GGML_ASSERT([rsets->data count] == 0); atomic_store_explicit(&rsets->d_stop, true, memory_order_relaxed); @@ -791,9 +792,6 @@ ggml_metal_device_t ggml_metal_device_init(void) { dev->rsets = nil; } - - // -------------------------------------------------- - // print MTL GPU family: GGML_LOG_INFO("%s: GPU name: %s\n", __func__, dev->props.name);