-
-
Notifications
You must be signed in to change notification settings - Fork 792
Description
System Info
OS: WSL2 Ubuntu22.04
bitsandbytes: 0.44.1
torch: 2.5.1+rocm6.2
GPU: RX 7900XT
Reproduction
Traceback (most recent call last):
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/xinference/api/restful_api.py", line 992, in launch_model
model_uid = await (await self._get_supervisor_ref()).launch_builtin_model(
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/xoscar/backends/context.py", line 231, in send
return self._process_result_message(result)
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/xoscar/backends/context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/xoscar/backends/pool.py", line 659, in send
result = await self._run_coro(message.message_id, coro)
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/xoscar/backends/pool.py", line 370, in _run_coro
return await coro
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/xoscar/api.py", line 384, in on_receive
return await super().on_receive(message) # type: ignore
File "xoscar/core.pyx", line 558, in on_receive
raise ex
File "xoscar/core.pyx", line 520, in xoscar.core._BaseActor.on_receive
async with self._lock:
File "xoscar/core.pyx", line 521, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.on_receive
result = await result
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/xinference/core/supervisor.py", line 1041, in launch_builtin_model
await _launch_model()
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/xinference/core/supervisor.py", line 1005, in _launch_model
await _launch_one_model(rep_model_uid)
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/xinference/core/supervisor.py", line 984, in _launch_one_model
await worker_ref.launch_builtin_model(
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/xoscar/backends/context.py", line 231, in send
return self._process_result_message(result)
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/xoscar/backends/context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/xoscar/backends/pool.py", line 659, in send
result = await self._run_coro(message.message_id, coro)
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/xoscar/backends/pool.py", line 370, in _run_coro
return await coro
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/xoscar/api.py", line 384, in on_receive
return await super().on_receive(message) # type: ignore
File "xoscar/core.pyx", line 558, in on_receive
raise ex
File "xoscar/core.pyx", line 520, in xoscar.core._BaseActor.on_receive
async with self._lock:
File "xoscar/core.pyx", line 521, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.on_receive
result = await result
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/xinference/core/utils.py", line 90, in wrapped
ret = await func(*args, **kwargs)
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/xinference/core/worker.py", line 897, in launch_builtin_model
await model_ref.load()
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/xoscar/backends/context.py", line 231, in send
return self._process_result_message(result)
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/xoscar/backends/context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/xoscar/backends/pool.py", line 659, in send
result = await self._run_coro(message.message_id, coro)
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/xoscar/backends/pool.py", line 370, in _run_coro
return await coro
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/xoscar/api.py", line 384, in on_receive
return await super().on_receive(message) # type: ignore
File "xoscar/core.pyx", line 558, in on_receive
raise ex
File "xoscar/core.pyx", line 520, in xoscar.core._BaseActor.on_receive
async with self._lock:
File "xoscar/core.pyx", line 521, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.on_receive
result = await result
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/xinference/core/model.py", line 399, in load
self._model.load()
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/xinference/model/llm/transformers/core.py", line 670, in load
super().load()
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/xinference/model/llm/transformers/core.py", line 321, in load
self._model, self._tokenizer = self._load_model(**kwargs)
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/xinference/model/llm/transformers/core.py", line 196, in _load_model
model = AutoModelForCausalLM.from_pretrained(
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 564, in from_pretrained
return model_class.from_pretrained(
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/transformers/modeling_utils.py", line 4225, in from_pretrained
) = cls._load_pretrained_model(
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/transformers/modeling_utils.py", line 4728, in _load_pretrained_model
new_error_msgs, offload_index, state_dict_index = _load_state_dict_into_meta_model(
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/transformers/modeling_utils.py", line 995, in _load_state_dict_into_meta_model
hf_quantizer.create_quantized_param(model, param, param_name, param_device, state_dict, unexpected_keys)
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/transformers/quantizers/quantizer_bnb_8bit.py", line 226, in create_quantized_param
new_value = bnb.nn.Int8Params(new_value, requires_grad=False, **kwargs).to(target_device)
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/bitsandbytes/nn/modules.py", line 633, in to
return self.cuda(device)
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/bitsandbytes/nn/modules.py", line 594, in cuda
CB, CBt, SCB, SCBt, coo_tensorB = bnb.functional.double_quant(B)
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/bitsandbytes/functional.py", line 2568, in double_quant
row_stats, col_stats, nnz_row_ptr = get_colrow_absmax(A, threshold=threshold)
File "/root/miniconda3/envs/xinf/lib/python3.10/site-packages/bitsandbytes/functional.py", line 2467, in get_colrow_absmax
lib.cget_col_row_stats(ptrA, ptrRowStats, ptrColStats, ptrNnzrows, ct.c_float(threshold), rows, cols)
AttributeError: [address=0.0.0.0:39287, pid=55548] 'NoneType' object has no attribute 'cget_col_row_stats'
Expected behavior
success