Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gguf : general usability improvements #3409

Merged
merged 7 commits into from
Oct 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 2 additions & 3 deletions convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,7 @@

NDArray: TypeAlias = 'np.ndarray[Any, Any]'

ARCH=gguf.MODEL_ARCH.LLAMA
NAMES=gguf.MODEL_TENSOR_NAMES[ARCH]
ARCH = gguf.MODEL_ARCH.LLAMA

DEFAULT_CONCURRENCY = 8
#
Expand Down Expand Up @@ -953,7 +952,7 @@ def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyM
of.close()

def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileType:
wq_type = model[NAMES[gguf.MODEL_TENSOR.ATTN_Q].format(bid=0)+".weight"].data_type
wq_type = model[gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ATTN_Q].format(bid=0)+".weight"].data_type

if output_type_str == "f32" or (output_type_str is None and wq_type == DT_F32):
return GGMLFileType.AllF32
Expand Down
2 changes: 1 addition & 1 deletion examples/finetune/convert-finetune-checkpoint-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ def save_gguf(self, gguf_writer):
gguf_writer.add_feed_forward_length(self.get_n_ff())

def tensor_name(key, bid=None, suffix=".weight"):
return gguf.MODEL_TENSOR_NAMES[gguf.MODEL_ARCH.LLAMA][key].format(bid=bid) + suffix
return gguf.TENSOR_NAMES[key].format(bid=bid) + suffix

class Layer:
def __init__(self, params, lora_params, bid):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ def save_gguf(self, gguf_writer):
gguf_writer.add_feed_forward_length(self.get_n_ff())

def tensor_name(key, bid=None):
return gguf.MODEL_TENSOR_NAMES[gguf.MODEL_ARCH.LLAMA][key].format(bid=bid) + ".weight"
return gguf.TENSOR_NAMES[key].format(bid=bid) + ".weight"

class Layer:
def __init__(self, params, bid):
Expand Down
210 changes: 115 additions & 95 deletions gguf-py/gguf/gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,76 +118,97 @@ class MODEL_TENSOR(IntEnum):
MODEL_ARCH.STARCODER: "starcoder",
}

MODEL_TENSOR_NAMES: dict[MODEL_ARCH, dict[MODEL_TENSOR, str]] = {
MODEL_ARCH.LLAMA: {
MODEL_TENSOR.TOKEN_EMBD: "token_embd",
MODEL_TENSOR.OUTPUT_NORM: "output_norm",
MODEL_TENSOR.OUTPUT: "output",
MODEL_TENSOR.ROPE_FREQS: "rope_freqs",
MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
MODEL_TENSOR.ATTN_Q: "blk.{bid}.attn_q",
MODEL_TENSOR.ATTN_K: "blk.{bid}.attn_k",
MODEL_TENSOR.ATTN_V: "blk.{bid}.attn_v",
MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
MODEL_TENSOR.ATTN_ROT_EMBD: "blk.{bid}.attn_rot_embd",
MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate",
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
},
MODEL_ARCH.GPTNEOX: {
MODEL_TENSOR.TOKEN_EMBD: "token_embd",
MODEL_TENSOR.OUTPUT_NORM: "output_norm",
MODEL_TENSOR.OUTPUT: "output",
MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv",
MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
},
MODEL_ARCH.FALCON: {
MODEL_TENSOR.TOKEN_EMBD: "token_embd",
MODEL_TENSOR.OUTPUT_NORM: "output_norm",
MODEL_TENSOR.OUTPUT: "output",
MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
MODEL_TENSOR.ATTN_NORM_2: "blk.{bid}.attn_norm_2",
MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv",
MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
},
MODEL_ARCH.BAICHUAN: {
MODEL_TENSOR.TOKEN_EMBD: "token_embd",
MODEL_TENSOR.OUTPUT_NORM: "output_norm",
MODEL_TENSOR.OUTPUT: "output",
MODEL_TENSOR.ROPE_FREQS: "rope_freqs",
MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
MODEL_TENSOR.ATTN_Q: "blk.{bid}.attn_q",
MODEL_TENSOR.ATTN_K: "blk.{bid}.attn_k",
MODEL_TENSOR.ATTN_V: "blk.{bid}.attn_v",
MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
MODEL_TENSOR.ATTN_ROT_EMBD: "blk.{bid}.attn_rot_embd",
MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate",
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
},
MODEL_ARCH.STARCODER: {
MODEL_TENSOR.TOKEN_EMBD: "token_embd",
MODEL_TENSOR.POS_EMBD: "position_embd",
MODEL_TENSOR.OUTPUT_NORM: "output_norm",
MODEL_TENSOR.OUTPUT: "output",
MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv",
MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
},
MODEL_ARCH.GPT2: {
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
MODEL_TENSOR.TOKEN_EMBD: "token_embd",
MODEL_TENSOR.POS_EMBD: "position_embd",
MODEL_TENSOR.OUTPUT_NORM: "output_norm",
MODEL_TENSOR.OUTPUT: "output",
MODEL_TENSOR.ROPE_FREQS: "rope_freqs",

MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
MODEL_TENSOR.ATTN_NORM_2: "blk.{bid}.attn_norm_2",
MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv",
MODEL_TENSOR.ATTN_Q: "blk.{bid}.attn_q",
MODEL_TENSOR.ATTN_K: "blk.{bid}.attn_k",
MODEL_TENSOR.ATTN_V: "blk.{bid}.attn_v",
MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
MODEL_TENSOR.ATTN_ROT_EMBD: "blk.{bid}.attn_rot_embd",
MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate",
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
}

MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_ARCH.LLAMA: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.OUTPUT,
MODEL_TENSOR.ROPE_FREQS,
MODEL_TENSOR.ATTN_NORM,
MODEL_TENSOR.ATTN_Q,
MODEL_TENSOR.ATTN_K,
MODEL_TENSOR.ATTN_V,
MODEL_TENSOR.ATTN_OUT,
MODEL_TENSOR.ATTN_ROT_EMBD,
MODEL_TENSOR.FFN_NORM,
MODEL_TENSOR.FFN_GATE,
MODEL_TENSOR.FFN_DOWN,
MODEL_TENSOR.FFN_UP,
],
MODEL_ARCH.GPTNEOX: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.OUTPUT,
MODEL_TENSOR.ATTN_NORM,
MODEL_TENSOR.ATTN_QKV,
MODEL_TENSOR.ATTN_OUT,
MODEL_TENSOR.FFN_NORM,
MODEL_TENSOR.FFN_DOWN,
MODEL_TENSOR.FFN_UP,
],
MODEL_ARCH.FALCON: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.OUTPUT,
MODEL_TENSOR.ATTN_NORM,
MODEL_TENSOR.ATTN_NORM_2,
MODEL_TENSOR.ATTN_QKV,
MODEL_TENSOR.ATTN_OUT,
MODEL_TENSOR.FFN_DOWN,
MODEL_TENSOR.FFN_UP,
],
MODEL_ARCH.BAICHUAN: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.OUTPUT,
MODEL_TENSOR.ROPE_FREQS,
MODEL_TENSOR.ATTN_NORM,
MODEL_TENSOR.ATTN_Q,
MODEL_TENSOR.ATTN_K,
MODEL_TENSOR.ATTN_V,
MODEL_TENSOR.ATTN_OUT,
MODEL_TENSOR.ATTN_ROT_EMBD,
MODEL_TENSOR.FFN_NORM,
MODEL_TENSOR.FFN_GATE,
MODEL_TENSOR.FFN_DOWN,
MODEL_TENSOR.FFN_UP,
],
MODEL_ARCH.STARCODER: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.POS_EMBD,
MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.OUTPUT,
MODEL_TENSOR.ATTN_NORM,
MODEL_TENSOR.ATTN_QKV,
MODEL_TENSOR.ATTN_OUT,
MODEL_TENSOR.FFN_NORM,
MODEL_TENSOR.FFN_DOWN,
MODEL_TENSOR.FFN_UP,
],
MODEL_ARCH.GPT2: [
# TODO
},
],
# TODO
}

Expand Down Expand Up @@ -338,28 +359,24 @@ class TensorNameMap:

mapping: dict[str, tuple[MODEL_TENSOR, str]]

tensor_names: dict[MODEL_TENSOR, str]

def __init__(self, arch: MODEL_ARCH, n_blocks: int):
mapping = self.mapping = {}
tensor_names = self.tensor_names = MODEL_TENSOR_NAMES[arch]
self.mapping = {}
for tensor, keys in self.mappings_cfg.items():
tensor_name = tensor_names.get(tensor)
if tensor_name is None:
if tensor not in MODEL_TENSORS[arch]:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's kind of late to say anything, but I'm curious why you'd make these changes. There's no usability improvement from the user's perspective except it'll be twice as slow now since it requires the key to get hashed and the dictionary searched twice compared to using dict.get().

Copy link
Collaborator Author

@cebtenzzre cebtenzzre Oct 2, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the code makes more sense this way - the standard name of the tensor is never dependent on the model it is from, so we should represent that fact by using separate data structures. You shouldn't have to know what model architecture you are working with to find the name of a standard tensor, either. I don't think this part of the code is performance-critical, so I didn't bother optimizing it.
This has confused developers at least, which is what I mean by usability. See #3417 (comment)

continue
mapping[tensor_name] = (tensor, tensor_name)
tensor_name = TENSOR_NAMES[tensor]
self.mapping[tensor_name] = (tensor, tensor_name)
for key in keys:
mapping[key] = (tensor, tensor_name)
self.mapping[key] = (tensor, tensor_name)
for bid in range(n_blocks):
for tensor, keys in self.block_mappings_cfg.items():
tensor_name = tensor_names.get(tensor)
if tensor_name is None:
if tensor not in MODEL_TENSORS[arch]:
continue
tensor_name = tensor_name.format(bid = bid)
mapping[tensor_name] = (tensor, tensor_name)
tensor_name = TENSOR_NAMES[tensor].format(bid = bid)
self.mapping[tensor_name] = (tensor, tensor_name)
for key in keys:
key = key.format(bid = bid)
mapping[key] = (tensor, tensor_name)
self.mapping[key] = (tensor, tensor_name)

def get_type_and_name(self, key: str, try_suffixes: Sequence[str] = ()) -> tuple[MODEL_TENSOR, str] | None:
result = self.mapping.get(key)
Expand Down Expand Up @@ -800,22 +817,25 @@ class SpecialVocab:
special_token_types: tuple[str, ...] = ('bos', 'eos', 'unk', 'sep', 'pad')
special_token_ids: dict[str, int] = {}

def __init__(self, path: Path, load_merges: bool = False, special_token_types: tuple[str, ...] | None = None):
def __init__(
self, path: str | os.PathLike[str], load_merges: bool = False,
special_token_types: tuple[str, ...] | None = None,
):
self.special_token_ids = {}
self.load_merges = load_merges
if special_token_types is not None:
self.special_token_types = special_token_types
self.load(path)
self._load(Path(path))

def load(self, path: Path):
if not self.try_load_from_tokenizer_json(path):
self.try_load_from_config_json(path)
def _load(self, path: Path) -> None:
if not self._try_load_from_tokenizer_json(path):
self._try_load_from_config_json(path)

def try_load_from_tokenizer_json(self, path: Path) -> bool:
def _try_load_from_tokenizer_json(self, path: Path) -> bool:
tokenizer_file = path / 'tokenizer.json'
if not tokenizer_file.is_file():
return False
with open(tokenizer_file, 'r', encoding = 'utf-8') as f:
with open(tokenizer_file, encoding = 'utf-8') as f:
tokenizer = json.load(f)
if self.load_merges:
merges = tokenizer.get('model', {}).get('merges')
Expand All @@ -825,7 +845,7 @@ def try_load_from_tokenizer_json(self, path: Path) -> bool:
added_tokens = tokenizer.get('added_tokens')
if added_tokens is None or not tokenizer_config_file.is_file():
return True
with open(tokenizer_config_file, 'r', encoding = 'utf-8') as f:
with open(tokenizer_config_file, encoding = 'utf-8') as f:
tokenizer_config = json.load(f)
for typ in self.special_token_types:
entry = tokenizer_config.get(f'{typ}_token')
Expand All @@ -844,19 +864,19 @@ def try_load_from_tokenizer_json(self, path: Path) -> bool:
break
return True

def try_load_from_config_json(self, path: Path) -> bool:
def _try_load_from_config_json(self, path: Path) -> bool:
config_file = path / 'config.json'
if not config_file.is_file():
return False
with open(config_file, 'r', encoding = 'utf-8') as f:
with open(config_file, encoding = 'utf-8') as f:
config = json.load(f)
for typ in self.special_token_types:
maybe_token_id = config.get(f'{typ}_token_id')
if isinstance(maybe_token_id, int) and maybe_token_id >= 0:
self.special_token_ids[typ] = maybe_token_id
return True

def add_to_gguf(self, gw: GGUFWriter):
def add_to_gguf(self, gw: GGUFWriter) -> None:
if len(self.merges) > 0:
print(f'gguf: Adding {len(self.merges)} merge(s).')
gw.add_token_merges(self.merges)
Expand All @@ -868,8 +888,8 @@ def add_to_gguf(self, gw: GGUFWriter):
print(f'gguf: Setting special token type {typ} to {tokid}')
handler(tokid)

def __repr__(self):
return f'<SpecialVocab with {len(self.merges)} merges and special tokens {self.special_token_ids if self.special_token_ids else "unset"}>'
def __repr__(self) -> str:
return f'<SpecialVocab with {len(self.merges)} merges and special tokens {self.special_token_ids or "unset"}>'


# Example usage:
Expand Down
2 changes: 1 addition & 1 deletion gguf-py/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "gguf"
version = "0.3.3"
version = "0.4.0"
description = "Write ML models in GGUF for GGML"
authors = ["GGML <ggml@ggml.ai>"]
packages = [
Expand Down