-
Notifications
You must be signed in to change notification settings - Fork 256
Description
A little while ago I got levelzero working quite well, and the install was fairly straightforward. See #832 for the steps I followed. The most notable thing about it is probably that I'm using the intel packages from https://apt.repos.intel.com/oneapi I haven't changed the install at all since then, only run system updates through APT, however when I recently tried loading an app that depends on levelzero I found it segfaults.
Running sycl-ls I found it segfaults too:
$ source /opt/intel/oneapi/setvars.sh
:: initializing oneAPI environment ...
bash: BASH_VERSION = 5.2.21(1)-release
args: Using "$@" for setvars.sh arguments:
:: advisor -- latest
:: ccl -- latest
:: compiler -- latest
:: dal -- latest
:: debugger -- latest
:: dev-utilities -- latest
:: dnnl -- latest
:: dpcpp-ct -- latest
:: dpl -- latest
:: ipp -- latest
:: ippcp -- latest
:: mkl -- latest
:: mpi -- latest
:: tbb -- latest
:: umf -- latest
:: vtune -- latest
:: oneAPI environment initialized ::
$ ZE_ENABLE_LOADER_DEBUG_TRACE=1 sycl-ls
ZE_LOADER_DEBUG_TRACE:Static Loader Using Loader Library Path:
ZE_LOADER_DEBUG_TRACE:Loading Driver libze_intel_gpu.so.1
ZE_LOADER_DEBUG_TRACE:Loading Driver libze_intel_vpu.so.1
ZE_LOADER_DEBUG_TRACE:Load Library of libze_intel_vpu.so.1 failed with libze_intel_vpu.so.1: cannot open shared object file: No such file or directory
ZE_LOADER_DEBUG_TRACE:ze_lib Context Init() Static Loader Found Loader Version v1.16.0
ZE_LOADER_DEBUG_TRACE:ze_lib Context Init() Version Does not support zeInitDrivers
ZE_LOADER_DEBUG_TRACE:ze_lib Context Init() zelLoaderGetContext missing
ZE_LOADER_DEBUG_TRACE:Loader API Version to be requested is v1.9
ZE_LOADER_DEBUG_TRACE:getTableWithCheck Failed for 27_ze_rtas_builder_dditable_t with ZE_RESULT_ERROR_UNINITIALIZED
ZE_LOADER_DEBUG_TRACE:getTableWithCheck Failed for 38_ze_rtas_parallel_operation_dditable_t with ZE_RESULT_ERROR_UNINITIALIZED
ZE_LOADER_DEBUG_TRACE:getTableWithCheck Failed for 34_zet_metric_decoder_exp_dditable_t with ZE_RESULT_ERROR_UNINITIALIZED
ZE_LOADER_DEBUG_TRACE:getTableWithCheck Failed for 33_zet_metric_tracer_exp_dditable_t with ZE_RESULT_ERROR_UNINITIALIZED
ZE_LOADER_DEBUG_TRACE:getTableWithCheck Failed for 26_zet_device_exp_dditable_t with ZE_RESULT_ERROR_UNINITIALIZED
ZE_LOADER_DEBUG_TRACE:getTableWithCheck Failed for 32_zet_command_list_exp_dditable_t with ZE_RESULT_ERROR_UNINITIALIZED
Segmentation fault (core dumped)
Where as previously it worked (copied from referenced issue):
$ ZE_ENABLE_LOADER_DEBUG_TRACE=1 sycl-ls
ZE_LOADER_DEBUG_TRACE:Loading Driver libze_intel_gpu.so.1
ZE_LOADER_DEBUG_TRACE:Loading Driver libze_intel_vpu.so.1
ZE_LOADER_DEBUG_TRACE:Load Library of libze_intel_vpu.so.1 failed with libze_intel_vpu.so.1: cannot open shared object file: No such file or directory
ZE_LOADER_DEBUG_TRACE:check_drivers(flags=ZE_INIT_FLAG_GPU_ONLY)
ZE_LOADER_DEBUG_TRACE:init driver libze_intel_gpu.so.1 zeInit(ZE_INIT_FLAG_GPU_ONLY) returning ZE_RESULT_SUCCESS
[level_zero:gpu][level_zero:0] Intel(R) oneAPI Unified Runtime over Level-Zero, Intel(R) UHD Graphics 750 12.1.0 [1.3.27642]
[opencl:cpu][opencl:0] Intel(R) OpenCL, 11th Gen Intel(R) Core(TM) i7-11700K @ 3.60GHz OpenCL 3.0 (Build 0) [2025.19.4.0.18_160000.xmain-hotfix]
[opencl:gpu][opencl:1] Intel(R) OpenCL Graphics, Intel(R) UHD Graphics 750 OpenCL 3.0 NEO [23.43.027642]
This seems vaguely similar to oneapi-src/level-zero#265 and I've confirmed that i915 is indeed loaded and running:
$ lsmod
Module Size Used by
tls 155648 0
socwatch2_16 131072 0
vtsspp 565248 0
rfcomm 102400 4
sep5 3559424 0
snd_seq_dummy 12288 0
snd_hrtimer 12288 1
xt_CHECKSUM 12288 1
xt_MASQUERADE 16384 3
xt_conntrack 12288 1
ipt_REJECT 12288 2
nf_reject_ipv4 12288 1 ipt_REJECT
xt_tcpudp 16384 0
nft_compat 20480 7
nft_chain_nat 12288 2
nf_nat 61440 2 nft_chain_nat,xt_MASQUERADE
nf_conntrack 196608 3 xt_conntrack,nf_nat,xt_MASQUERADE
nf_defrag_ipv6 24576 1 nf_conntrack
nf_defrag_ipv4 12288 1 nf_conntrack
nf_tables 376832 156 nft_compat,nft_chain_nat
libcrc32c 12288 3 nf_conntrack,nf_nat,nf_tables
bridge 421888 0
stp 12288 1 bridge
llc 16384 2 bridge,stp
pax 16384 0
qrtr 53248 2
cmac 12288 2
algif_hash 12288 1
algif_skcipher 16384 1
af_alg 32768 6 algif_hash,algif_skcipher
bnep 32768 2
binfmt_misc 24576 1
intel_rapl_msr 20480 0
intel_rapl_common 40960 1 intel_rapl_msr
intel_uncore_frequency 16384 0
intel_uncore_frequency_common 16384 1 intel_uncore_frequency
x86_pkg_temp_thermal 20480 0
snd_sof_pci_intel_tgl 12288 0
snd_sof_intel_hda_common 217088 1 snd_sof_pci_intel_tgl
soundwire_intel 73728 1 snd_sof_intel_hda_common
snd_sof_intel_hda_mlink 45056 2 soundwire_intel,snd_sof_intel_hda_common
soundwire_cadence 45056 1 soundwire_intel
snd_sof_intel_hda 24576 1 snd_sof_intel_hda_common
snd_sof_pci 24576 2 snd_sof_intel_hda_common,snd_sof_pci_intel_tgl
snd_sof_xtensa_dsp 12288 1 snd_sof_intel_hda_common
snd_sof 380928 3 snd_sof_pci,snd_sof_intel_hda_common,snd_sof_intel_hda
snd_sof_utils 16384 1 snd_sof
snd_soc_hdac_hda 24576 1 snd_sof_intel_hda_common
snd_hda_ext_core 32768 4 snd_sof_intel_hda_common,snd_soc_hdac_hda,snd_sof_intel_hda_mlink,snd_sof_intel_hda
snd_soc_acpi_intel_match 98304 2 snd_sof_intel_hda_common,snd_sof_pci_intel_tgl
snd_soc_acpi 16384 2 snd_soc_acpi_intel_match,snd_sof_intel_hda_common
soundwire_generic_allocation 12288 1 soundwire_intel
soundwire_bus 110592 3 soundwire_intel,soundwire_generic_allocation,soundwire_cadence
intel_powerclamp 24576 0
snd_soc_core 442368 4 soundwire_intel,snd_sof,snd_sof_intel_hda_common,snd_soc_hdac_hda
snd_compress 28672 1 snd_soc_core
ac97_bus 12288 1 snd_soc_core
xe 2723840 0
snd_hda_codec_hdmi 94208 2
iwlmvm 868352 0
snd_pcm_dmaengine 16384 1 snd_soc_core
kvm_intel 487424 0
snd_hda_intel 61440 2
btusb 77824 0
snd_intel_dspcfg 36864 3 snd_hda_intel,snd_sof,snd_sof_intel_hda_common
drm_gpuvm 45056 1 xe
btrtl 32768 1 btusb
snd_intel_sdw_acpi 16384 2 snd_sof_intel_hda_common,snd_intel_dspcfg
amdgpu 17137664 1
mac80211 1744896 1 iwlmvm
kvm 1409024 1 kvm_intel
snd_usb_audio 512000 3
btintel 57344 1 btusb
snd_hda_codec 204800 4 snd_hda_codec_hdmi,snd_hda_intel,snd_soc_hdac_hda,snd_sof_intel_hda
btbcm 24576 1 btusb
btmtk 12288 1 btusb
zfs 6602752 9
snd_hda_core 139264 7 snd_hda_codec_hdmi,snd_hda_intel,snd_hda_ext_core,snd_hda_codec,snd_sof_intel_hda_common,snd_soc_hdac_hda,snd_sof_intel_hda
irqbypass 12288 1 kvm
snd_usbmidi_lib 53248 1 snd_usb_audio
libarc4 12288 1 mac80211
crct10dif_pclmul 12288 1
bluetooth 1032192 34 btrtl,btmtk,btintel,btbcm,bnep,btusb,rfcomm
snd_hwdep 20480 2 snd_usb_audio,snd_hda_codec
polyval_clmulni 12288 0
polyval_generic 12288 1 polyval_clmulni
snd_ump 45056 1 snd_usb_audio
ghash_clmulni_intel 16384 0
snd_seq_midi 24576 0
sha256_ssse3 32768 0
snd_seq_midi_event 16384 1 snd_seq_midi
sha1_ssse3 32768 0
aesni_intel 356352 3
snd_rawmidi 57344 3 snd_seq_midi,snd_usbmidi_lib,snd_ump
cmdlinepart 12288 0
amdxcp 12288 1 amdgpu
drm_exec 12288 3 drm_gpuvm,amdgpu,xe
crypto_simd 16384 1 aesni_intel
mc 81920 1 snd_usb_audio
gpu_sched 61440 2 amdgpu,xe
spi_nor 163840 0
iwlwifi 602112 1 iwlmvm
snd_seq 118784 9 snd_seq_midi,snd_seq_midi_event,snd_seq_dummy
ecdh_generic 16384 1 bluetooth
cryptd 24576 3 crypto_simd,ghash_clmulni_intel
snd_pcm 192512 12 snd_hda_codec_hdmi,snd_hda_intel,snd_usb_audio,snd_hda_codec,soundwire_intel,snd_sof,snd_sof_intel_hda_common,snd_compress,snd_soc_core,snd_sof_utils,snd_hda_core,snd_pcm_dmaengine
i915 4288512 3
mei_hdcp 28672 0
mei_pxp 16384 0
mtd 98304 3 spi_nor,cmdlinepart
ee1004 16384 0
ecc 45056 1 ecdh_generic
radeon 2174976 0
nls_iso8859_1 12288 1
snd_seq_device 16384 4 snd_seq,snd_seq_midi,snd_ump,snd_rawmidi
spl 180224 1 zfs
intel_cstate 24576 0
gigabyte_wmi 12288 0
intel_wmi_thunderbolt 16384 0
cfg80211 1355776 3 iwlmvm,iwlwifi,mac80211
snd_timer 49152 3 snd_seq,snd_hrtimer,snd_pcm
wmi_bmof 12288 0
mxm_wmi 12288 0
drm_suballoc_helper 16384 3 amdgpu,radeon,xe
drm_buddy 20480 3 amdgpu,xe,i915
drm_ttm_helper 12288 3 amdgpu,radeon,xe
mei_me 53248 2
i2c_i801 36864 0
spi_intel_pci 12288 0
snd 143360 27 snd_seq,snd_seq_device,snd_hda_codec_hdmi,snd_hwdep,snd_hda_intel,snd_usb_audio,snd_usbmidi_lib,snd_hda_codec,snd_sof,snd_timer,snd_compress,snd_soc_core,snd_ump,snd_pcm,snd_rawmidi
spi_intel 32768 1 spi_intel_pci
ttm 110592 5 amdgpu,radeon,drm_ttm_helper,xe,i915
i2c_smbus 16384 1 i2c_i801
mei 172032 5 mei_hdcp,mei_pxp,mei_me
drm_display_helper 237568 4 amdgpu,radeon,xe,i915
soundcore 16384 1 snd
intel_pmc_core 118784 0
cec 94208 3 drm_display_helper,xe,i915
intel_vsec 20480 1 intel_pmc_core
rc_core 73728 1 cec
i2c_algo_bit 16384 4 amdgpu,radeon,xe,i915
pmt_telemetry 16384 2 intel_pmc_core,socwatch2_16
acpi_tad 20480 0
acpi_pad 184320 0
pmt_class 12288 1 pmt_telemetry
mac_hid 12288 0
sch_fq_codel 24576 7
coretemp 24576 0
msr 12288 0
parport_pc 53248 0
ppdev 24576 0
lp 28672 0
parport 73728 3 parport_pc,lp,ppdev
efi_pstore 12288 0
nfnetlink 20480 3 nft_compat,nf_tables
dmi_sysfs 24576 0
ip_tables 32768 0
x_tables 65536 7 xt_conntrack,nft_compat,xt_tcpudp,xt_CHECKSUM,ipt_REJECT,ip_tables,xt_MASQUERADE
autofs4 57344 2
hid_generic 12288 0
usbhid 77824 0
hid 180224 2 usbhid,hid_generic
nvme 61440 2
crc32_pclmul 12288 0
igc 200704 0
nvme_core 212992 3 nvme
ahci 49152 2
xhci_pci 24576 0
libahci 53248 1 ahci
xhci_pci_renesas 20480 1 xhci_pci
nvme_auth 28672 1 nvme_core
video 77824 4 amdgpu,radeon,xe,i915
wmi 28672 5 video,intel_wmi_thunderbolt,gigabyte_wmi,wmi_bmof,mxm_wmi
pinctrl_tigerlake 28672 0
So I'm not sure what else it could be at this point, other than a change in the intel packages? Debugging suggestions welcome.