Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions examples/save-load-state/save-load-state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,12 @@ int main(int argc, char ** argv) {

llama_batch_free(batch);

// this one is managed by common_init_result
//llama_free(ctx);

llama_free(ctx2);
llama_free(ctx3);

if (result0 != result2) {
fprintf(stderr, "\n%s : error : the seq restore generation is different\n", __func__);
return 1;
Expand Down
32 changes: 17 additions & 15 deletions ggml/src/ggml-metal/ggml-metal-device.m
Original file line number Diff line number Diff line change
Expand Up @@ -574,22 +574,26 @@ ggml_metal_rsets_t ggml_metal_rsets_init(void) {
// the requests stop after a certain amount of time (keep_alive_s) of inactivity
dispatch_queue_t d_queue = dispatch_get_global_queue(QOS_CLASS_DEFAULT, 0);
dispatch_group_async(res->d_group, d_queue, ^{
while (!atomic_load_explicit(&res->d_stop, memory_order_relaxed)) {
if (atomic_load_explicit(&res->d_loop, memory_order_relaxed) > 0) {
[res->lock lock];
#if defined(GGML_METAL_HAS_RESIDENCY_SETS)
if (@available(macOS 15.0, iOS 18.0, tvOS 18.0, visionOS 2.0, *)) {
while (!atomic_load_explicit(&res->d_stop, memory_order_relaxed)) {
if (atomic_load_explicit(&res->d_loop, memory_order_relaxed) > 0) {
[res->lock lock];

for (int i = 0; i < (int) res->data.count; ++i) {
[res->data[i] requestResidency];
}
for (int i = 0; i < (int) res->data.count; ++i) {
[res->data[i] requestResidency];
}

atomic_fetch_sub_explicit(&res->d_loop, 1, memory_order_relaxed);
atomic_fetch_sub_explicit(&res->d_loop, 1, memory_order_relaxed);

[res->lock unlock];
}
[res->lock unlock];
}

// half a second
usleep(500 * 1000);
}
// half a second
usleep(500 * 1000);
}
}
#endif
});

return res;
Expand All @@ -600,6 +604,7 @@ void ggml_metal_rsets_free(ggml_metal_rsets_t rsets) {
return;
}

// note: if you hit this assert, most likely you haven't deallocated all Metal resources before exiting
GGML_ASSERT([rsets->data count] == 0);

atomic_store_explicit(&rsets->d_stop, true, memory_order_relaxed);
Expand Down Expand Up @@ -787,9 +792,6 @@ ggml_metal_device_t ggml_metal_device_init(void) {
dev->rsets = nil;
}


// --------------------------------------------------

// print MTL GPU family:
GGML_LOG_INFO("%s: GPU name: %s\n", __func__, dev->props.name);

Expand Down
Loading