Skip to content

Commit

Permalink
llm example run_accuracy.py: load model to meta device for quantizati…
Browse files Browse the repository at this point in the history
…on (#2195)

* llm example run_accuracy.py: load model to meta device for quantization

* Add more print

* Print exception

* Fix typo ipex._IPEXOnDevice -> ipex.IPEXOnDevice

* ipex.IPEXOnDevice -> ipex.OnDevice

* Fix typo _from_config -> from_config

* Remove mem usage print
  • Loading branch information
Xia-Weiwen committed Oct 25, 2023
1 parent c3a77f3 commit 0cd2502
Showing 1 changed file with 21 additions and 7 deletions.
28 changes: 21 additions & 7 deletions examples/cpu/inference/python/llm/single_instance/run_accuracy.py
Expand Up @@ -124,13 +124,27 @@ def __init__(
config, torchscript=with_jit, trust_remote_code=True
)

self.model = model_class[0].from_pretrained(
model_id,
low_cpu_mem_usage=True,
config=self.config,
torch_dtype=load_dtype,
trust_remote_code=True,
)
if self._dtype == "int8":
try:
with ipex.OnDevice(dtype=torch.float, device="meta"):
self.model = AutoModelForCausalLM.from_config(self.config)
except (RuntimeError, AttributeError) as e:
print('Warning: Loading model to meta device failed:', e)
self.model = model_class[0].from_pretrained(
model_id,
low_cpu_mem_usage=True,
config=self.config,
torch_dtype=load_dtype,
trust_remote_code=True,
)
else:
self.model = model_class[0].from_pretrained(
model_id,
low_cpu_mem_usage=True,
config=self.config,
torch_dtype=load_dtype,
trust_remote_code=True,
)

self.model = self.model.eval()

Expand Down

0 comments on commit 0cd2502

Please sign in to comment.