Merge pull request #26 from sgwhat/model_half

Add model half to reduce memory usage
intel-analytics · Mar 29, 2024 · af95b6c · af95b6c
2 parents 8a97681 + 9b23022
commit af95b6c
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/modules/models.py b/modules/models.py
@@ -352,7 +352,7 @@ def ipex_llm_loader(model_name):
 
     if shared.args.device == "GPU":
         import intel_extension_for_pytorch
-        model = model.to("xpu")
+        model = model.half().to("xpu")
 
     tokenizer = AutoTokenizer.from_pretrained(path_to_model, trust_remote_code=shared.args.trust_remote_code)