Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions ggml/src/ggml-vulkan/ggml-vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,7 @@ struct vk_device_struct {
vk_queue compute_queue;
vk_queue transfer_queue;
bool single_queue;
bool support_async;
uint32_t subgroup_size;
uint32_t shader_core_count;
bool uma;
Expand Down Expand Up @@ -4222,6 +4223,16 @@ static vk_device ggml_vk_get_device(size_t idx) {
device->vendor_id = device->properties.vendorID;
device->driver_id = driver_props.driverID;

// Implementing the async backend interfaces seems broken on older Intel HW,
// see https://github.com/ggml-org/llama.cpp/issues/17302.
device->support_async = (device->vendor_id != VK_VENDOR_ID_INTEL ||
std::string(device->properties.deviceName.data()).find("(DG1)") == std::string::npos) &&
getenv("GGML_VK_DISABLE_ASYNC") == nullptr;

if (!device->support_async) {
GGML_LOG_DEBUG("ggml_vulkan: WARNING: Async execution disabled on certain Intel devices.\n");
}

const char* GGML_VK_FORCE_MAX_ALLOCATION_SIZE = getenv("GGML_VK_FORCE_MAX_ALLOCATION_SIZE");

if (GGML_VK_FORCE_MAX_ALLOCATION_SIZE != nullptr) {
Expand Down Expand Up @@ -12923,6 +12934,10 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
ctx->device->perf_logger->print_timings();
}

if (!ctx->device->support_async) {
ggml_vk_synchronize(ctx);
}

return GGML_STATUS_SUCCESS;

UNUSED(backend);
Expand Down Expand Up @@ -13216,6 +13231,10 @@ ggml_backend_t ggml_backend_vk_init(size_t dev_num) {
/* .context = */ ctx,
};

if (!ctx->device->support_async) {
vk_backend->iface.get_tensor_async = nullptr;
}

return vk_backend;
}

Expand Down
Loading