Skip to content

ggml_vulkan crashes if running 2 threads which allocate their own context for the same GPU device #1087

@philipag

Description

@philipag

I am using Whisper.net which in turn uses whisper.cpp which in turn uses ggml.

Running a single thread with the Vulkan backend works perfectly (but does not saturate the GPU - unlike when using the CUDA backend, which is a separate issue), so I tried adding a second thread which does everything for the same GPU that the first thread does (allocate context and push a separate audio stream). This crashes (invalid memory access), and the 2 thread call stacks always look like this:

				Thread
Not Flagged		4488	50	Worker Thread	SpeechToTextJob.serial: 1/2	msvcp140d.dll!mtx_do_lock
 	 	 	 	 	 	[External Code]
 	 	 	 	 	 	ggml-vulkan-whisper.dll!ggml_pipeline_allocate_descriptor_sets(std::shared_ptr<vk_device_struct> & device={...}) Line 909
 	 	 	 	 	 	ggml-vulkan-whisper.dll!ggml_backend_vk_graph_compute(ggml_backend * backend=0x0000032c7918ccd0, ggml_cgraph * cgraph=0x0000032bb8c1a138) Line 7589
 	 	 	 	 	 	ggml-base-whisper.dll!ggml_backend_graph_compute_async(ggml_backend * backend=0x0000032c7918ccd0, ggml_cgraph * cgraph=0x0000032bb8c1a138) Line 333
 	 	 	 	 	 	ggml-base-whisper.dll!ggml_backend_sched_compute_splits(ggml_backend_sched * sched=0x0000032c72339bb0) Line 1397
 	 	 	 	 	 	ggml-base-whisper.dll!ggml_backend_sched_graph_compute_async(ggml_backend_sched * sched=0x0000032c72339bb0, ggml_cgraph * graph=0x0000032c7a1a0060) Line 1589
 	 	 	 	 	 	ggml-base-whisper.dll!ggml_backend_sched_graph_compute(ggml_backend_sched * sched=0x0000032c72339bb0, ggml_cgraph * graph=0x0000032c7a1a0060) Line 1572
 	 	 	 	 	 	whisper.dll!ggml_graph_compute_helper(ggml_backend_sched * sched=0x0000032c72339bb0, ggml_cgraph * graph=0x0000032c7a1a0060, int n_threads=4) Line 183
 	 	 	 	 	 	whisper.dll!whisper_encode_internal(whisper_context & wctx={...}, whisper_state & wstate={...}, const int mel_offset=0, const int n_threads=4, bool(*)(void *) abort_callback=0x00007ff87a5f3318, void * abort_callback_data=0x0000000000000001) Line 2350
 	 	 	 	 	 	whisper.dll!whisper_full_with_state(whisper_context * ctx=0x0000032bb8ad0980, whisper_state * state=0x0000032c72c95d70, whisper_full_params params={...}, const float * samples=0x000002eb0af9fa28, int n_samples=462400) Line 5573
 	 	 	 	 	 	[External Code]
 	 	 	 	 	 	Whisper.net.dll!Whisper.net.WhisperProcessor.Process(System.ReadOnlySpan<float> samples = "System.ReadOnlySpan<Single>[462400]", System.DateTime firstSampleTime = "2020-10-01 10:00:28.300", bool audioWasSkipped = false) Line 192
 	 	 	 	 	 	Whisper.net.dll!Whisper.net.WhisperProcessor.Process(float[] samples = {float[462400]}, System.DateTime firstSampleTime = "2020-10-01 10:00:28.300", bool audioWasSkipped = false) Line 160
 	 	 	 	 	 	[External Code]

Not Flagged	>	36004	51	Worker Thread	SpeechToTextJob.serial: 2/2	ggml-vulkan-whisper.dll!std::string::size
 	 	 	 	 	 	ggml-vulkan-whisper.dll!std::string::size() Line 2346
 	 	 	 	 	 	ggml-vulkan-whisper.dll!std::hash<std::string>::_Do_hash(const std::string & _Keyval) Line 3354
 	 	 	 	 	 	ggml-vulkan-whisper.dll!std::_Conditionally_enabled_hash<std::string,1>::operator()(const std::string & _Keyval) Line 2339
 	 	 	 	 	 	ggml-vulkan-whisper.dll!std::_Uhash_compare<std::string,std::hash<std::string>,std::equal_to<std::string>>::operator()<std::string>(const std::string & _Keyval) Line 151
 	 	 	 	 	 	ggml-vulkan-whisper.dll!std::unordered_map<std::string,std::weak_ptr<vk_pipeline_struct>,std::hash<std::string>,std::equal_to<std::string>,std::allocator<std::pair<std::string const ,std::weak_ptr<vk_pipeline_struct>>>>::at(const std::string & _Keyval) Line 437
 	 	 	 	 	 	ggml-vulkan-whisper.dll!ggml_pipeline_allocate_descriptor_sets(std::shared_ptr<vk_device_struct> & device={...}) Line 912
 	 	 	 	 	 	ggml-vulkan-whisper.dll!ggml_backend_vk_graph_compute(ggml_backend * backend=0x0000032c791b0250, ggml_cgraph * cgraph=0x0000032bb8c20d38) Line 7589
 	 	 	 	 	 	ggml-base-whisper.dll!ggml_backend_graph_compute_async(ggml_backend * backend=0x0000032c791b0250, ggml_cgraph * cgraph=0x0000032bb8c20d38) Line 333
 	 	 	 	 	 	ggml-base-whisper.dll!ggml_backend_sched_compute_splits(ggml_backend_sched * sched=0x0000032bb871d490) Line 1397
 	 	 	 	 	 	ggml-base-whisper.dll!ggml_backend_sched_graph_compute_async(ggml_backend_sched * sched=0x0000032bb871d490, ggml_cgraph * graph=0x0000032cbd850060) Line 1589
 	 	 	 	 	 	ggml-base-whisper.dll!ggml_backend_sched_graph_compute(ggml_backend_sched * sched=0x0000032bb871d490, ggml_cgraph * graph=0x0000032cbd850060) Line 1572
 	 	 	 	 	 	whisper.dll!ggml_graph_compute_helper(ggml_backend_sched * sched=0x0000032bb871d490, ggml_cgraph * graph=0x0000032cbd850060, int n_threads=4) Line 183
 	 	 	 	 	 	whisper.dll!whisper_encode_internal(whisper_context & wctx={...}, whisper_state & wstate={...}, const int mel_offset=0, const int n_threads=4, bool(*)(void *) abort_callback=0x00007ff87a5f3318, void * abort_callback_data=0x0000000000000002) Line 2311
 	 	 	 	 	 	whisper.dll!whisper_full_with_state(whisper_context * ctx=0x0000032bb8ad0980, whisper_state * state=0x0000032bac961410, whisper_full_params params={...}, const float * samples=0x000002eb0b93b5e0, int n_samples=256000) Line 5573
 	 	 	 	 	 	[External Code]
 	 	 	 	 	 	Whisper.net.dll!Whisper.net.WhisperProcessor.Process(System.ReadOnlySpan<float> samples = "System.ReadOnlySpan<Single>[256000]", System.DateTime firstSampleTime = "2020-10-01 10:13:00.000", bool audioWasSkipped = false) Line 192
 	 	 	 	 	 	Whisper.net.dll!Whisper.net.WhisperProcessor.Process(float[] samples = {float[256000]}, System.DateTime firstSampleTime = "2020-10-01 10:13:00.000", bool audioWasSkipped = false) Line 160
 	 	 	 	 	 	[External Code]


Running multiple concurrent threads+contexts for Cuda or CPU or even Vulkan CPU works as expected. Only the Vulkan GPU backend causes the crash. If I add a lock to prevent the 2 threads from calling WhisperProcessor.Process() at the same time, the crash goes away. This happens with the 1.7.3 release from a few weeks ago as well as whisper.cpp+ggml I pulled and built yesterday.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions