From cef2c97c03af5642a06c5a3eee80d339f45a8862 Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Tue, 20 Aug 2024 15:06:20 +0200
Subject: [PATCH 1/3] lora : raise error if lm_head is ignored

---
 convert_lora_to_gguf.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/convert_lora_to_gguf.py b/convert_lora_to_gguf.py
index a88d0d4a978a9..435e9e2e1d68f 100755
--- a/convert_lora_to_gguf.py
+++ b/convert_lora_to_gguf.py
@@ -364,6 +364,10 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
 
             def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
                 dest = super().modify_tensors(data_torch, name, bid)
+                # for now, we cannot convert archs that use the same tensor for tok_embd and output
+                # see: https://github.com/ggerganov/llama.cpp/issues/9065
+                if name == "lm_head.weight" and len(dest) == 0:
+                    raise ValueError(f"lm_head is present in adapter, but is ignored in base model")
                 for dest_name, dest_data in dest:
                     assert isinstance(dest_data, LoraTorchTensor)
                     lora_a, lora_b = dest_data.get_lora_A_B()

From f3a30334154ba63e47f01ba8d3bb24897a7c737b Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Tue, 20 Aug 2024 15:33:02 +0200
Subject: [PATCH 2/3] fix style

---
 convert_lora_to_gguf.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/convert_lora_to_gguf.py b/convert_lora_to_gguf.py
index 435e9e2e1d68f..3aefcabaa9cb1 100755
--- a/convert_lora_to_gguf.py
+++ b/convert_lora_to_gguf.py
@@ -363,11 +363,11 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
                     yield (name, cast(torch.Tensor, LoraTorchTensor(tensor.A, tensor.B)))
 
             def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
-                dest = super().modify_tensors(data_torch, name, bid)
+                dest = list(super().modify_tensors(data_torch, name, bid))
                 # for now, we cannot convert archs that use the same tensor for tok_embd and output
                 # see: https://github.com/ggerganov/llama.cpp/issues/9065
                 if name == "lm_head.weight" and len(dest) == 0:
-                    raise ValueError(f"lm_head is present in adapter, but is ignored in base model")
+                    raise ValueError("lm_head is present in adapter, but is ignored in base model")
                 for dest_name, dest_data in dest:
                     assert isinstance(dest_data, LoraTorchTensor)
                     lora_a, lora_b = dest_data.get_lora_A_B()

From fa0c2bdc45382585b79d672fd43117e51694d56d Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Wed, 28 Aug 2024 11:24:09 +0200
Subject: [PATCH 3/3] clarify comment

---
 convert_lora_to_gguf.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/convert_lora_to_gguf.py b/convert_lora_to_gguf.py
index 3aefcabaa9cb1..3c5ab28ac9e4c 100755
--- a/convert_lora_to_gguf.py
+++ b/convert_lora_to_gguf.py
@@ -364,7 +364,9 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
 
             def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
                 dest = list(super().modify_tensors(data_torch, name, bid))
-                # for now, we cannot convert archs that use the same tensor for tok_embd and output
+                # some archs may have the same tensor for lm_head and output (tie word embeddings)
+                # in this case, adapters targeting lm_head will fail when using llama-export-lora
+                # therefore, we ignore them for now
                 # see: https://github.com/ggerganov/llama.cpp/issues/9065
                 if name == "lm_head.weight" and len(dest) == 0:
                     raise ValueError("lm_head is present in adapter, but is ignored in base model")