You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Traceback (most recent call last):
File "D:\rag\test_cpm.py", line 45, in <module>
answer, context, _ = model.chat(
File "C:\Users\mi\.cache\huggingface\modules\transformers_modules\MiniCPM-V\modeling_minicpmv.py", line 273, in chat
res, vision_hidden_states = self.generate(
File "C:\Users\mi\.cache\huggingface\modules\transformers_modules\MiniCPM-V\modeling_minicpmv.py", line 230, in generate
model_inputs['inputs_embeds'], vision_hidden_states = self.get_vllm_embedding(model_inputs)
File "C:\Users\mi\.cache\huggingface\modules\transformers_modules\MiniCPM-V\modeling_minicpmv.py", line 88, in get_vllm_embedding
vision_hidden_states.append(self.get_vision_embedding(pixel_values))
File "C:\Users\mi\.cache\huggingface\modules\transformers_modules\MiniCPM-V\modeling_minicpmv.py", line 79, in get_vision_embedding
res.append(self.resampler(vision_embedding))
File "C:\Users\mi\miniconda3\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\Users\mi\miniconda3\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\mi\.cache\huggingface\modules\transformers_modules\MiniCPM-V\resampler.py", line 152, in forward
out = self.attn(
File "C:\Users\mi\miniconda3\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\Users\mi\miniconda3\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\mi\miniconda3\lib\site-packages\torch\nn\modules\activation.py", line 1241, in forward
attn_output, attn_output_weights = F.multi_head_attention_forward(
File "C:\Users\mi\miniconda3\lib\site-packages\torch\nn\functional.py", line 5413, in multi_head_attention_forward
attn_output = linear(attn_output, out_proj_weight, out_proj_bias)
RuntimeError: self and mat2 must have the same dtype, but got Half and Byte
code:
import torch
from PIL import Image
from bigdl.llm.transformers import AutoModel
from transformers import AutoTokenizer
import intel_extension_for_pytorch as ipex
import time
model = AutoModel.from_pretrained('./models/MiniCPM-V', trust_remote_code=True, load_in_low_bit="sym_int4", optimize_model=True, use_cache=True)
# model = AutoModel.from_pretrained('./models/MiniCPM-V', trust_remote_code=True, torch_dtype=torch.bfloat16)
model = model.eval()
model = model.half()
model = model.to("xpu")
tokenizer = AutoTokenizer.from_pretrained('./models/MiniCPM-V', trust_remote_code=True)
image = Image.open('AI.png').convert('RGB')
question = 'What is in the image?'
msgs = [{'role': 'user', 'content': question}]
torch.xpu.synchronize()
t0 = time.time()
answer, context, _ = model.chat(
image=image,
msgs=msgs,
context=None,
tokenizer=tokenizer,
sampling=True,
temperature=0.7
)
torch.xpu.synchronize()
t1 = time.time()
print("---cost time(s): ", t1 - t0)
print(answer)
Since Not Implemented Error: Could not run 'aten::_upsample_bicubic2d_aa.out' with arguments from the 'XPU' backend.”,
you need to make these changes: env\Lib\site-packages\timm\layers\pos_embed.py 大约46行, 改成posemb = F.interpolate(posemb.to("cpu"), size=new_size, mode=interpolation, antialias=antialias).to(posemb.device)
Could you please help to take a look?
Thanks.
Version:
I run miniCPM-v and get the following error:
code:
Since
Not Implemented Error: Could not run 'aten::_upsample_bicubic2d_aa.out' with arguments from the 'XPU' backend.”
,you need to make these changes:
env\Lib\site-packages\timm\layers\pos_embed.py 大约46行, 改成posemb = F.interpolate(posemb.to("cpu"), size=new_size, mode=interpolation, antialias=antialias).to(posemb.device)
Could you please help to take a look?
Thanks.
Version:
The text was updated successfully, but these errors were encountered: