Skip to content

Commit

Permalink
gguf-py : add GGMLFileType
Browse files Browse the repository at this point in the history
* convert-hf : use GGMLFileType
  • Loading branch information
compilade committed May 8, 2024
1 parent bffdaf4 commit cad22e1
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 14 deletions.
32 changes: 18 additions & 14 deletions convert-hf-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,10 +239,7 @@ def write_tensors(self):
data: np.ndarray = data # type hint
n_dims = len(data.shape)
data_dtype = data.dtype

# if f32 desired, convert any float16 to float32
if self.ftype == 0 and data_dtype == np.float16:
data = data.astype(np.float32)
data_qtype: gguf.GGMLQuantizationType | None = None

# when both are True, f32 should win
extra_f32 = self.extra_f32_tensors(name, new_name, bid, n_dims)
Expand All @@ -254,20 +251,27 @@ def write_tensors(self):
# if f16 desired, convert any float32 2-dim weight tensors to float16
extra_f16 = extra_f16 or (name.endswith(".weight") and n_dims >= 2)

# when both extra_f32 and extra_f16 are False, convert to float32 by default
if self.ftype == 1 and data_dtype == np.float16 and (extra_f32 or not extra_f16):
data = data.astype(np.float32)
if self.ftype != gguf.GGMLFileType.ALL_F32 and extra_f16 and not extra_f32:
if self.ftype == gguf.GGMLFileType.MOSTLY_F16:
if data_dtype != np.float16:
data = data.astype(np.float16)
data_qtype = gguf.GGMLQuantizationType.F16
# TODO: add more types (like BF16) here

else: # by default, convert to float32
if data_dtype != np.float32:
data = data.astype(np.float32)
data_qtype = gguf.GGMLQuantizationType.F32

if self.ftype == 1 and data_dtype == np.float32 and extra_f16 and not extra_f32:
data = data.astype(np.float16)
assert data_qtype is not None

# reverse shape to make it similar to the internal ggml dimension order
shape_str = f"{{{', '.join(str(n) for n in reversed(data.shape))}}}"

# n_dims is implicit in the shape
logger.info(f"{f'%-{max_name_len}s' % f'{new_name},'} {old_dtype} --> {data.dtype}, shape = {shape_str}")
logger.info(f"{f'%-{max_name_len}s' % f'{new_name},'} {old_dtype} --> {data_qtype.name}, shape = {shape_str}")

self.gguf_writer.add_tensor(new_name, data)
self.gguf_writer.add_tensor(new_name, data, raw_dtype=data_qtype)

def write(self):
self.write_tensors()
Expand Down Expand Up @@ -2472,9 +2476,9 @@ def main() -> None:
logger.error(f'Error: {args.model} is not a directory')
sys.exit(1)

ftype_map = {
"f32": gguf.GGMLQuantizationType.F32,
"f16": gguf.GGMLQuantizationType.F16,
ftype_map: dict[str, gguf.GGMLFileType] = {
"f32": gguf.GGMLFileType.ALL_F32,
"f16": gguf.GGMLFileType.MOSTLY_F16,
}

if args.outfile is not None:
Expand Down
36 changes: 36 additions & 0 deletions gguf-py/gguf/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -820,6 +820,42 @@ class GGMLQuantizationType(IntEnum):
BF16 = 30


class GGMLFileType(IntEnum):
ALL_F32 = 0
MOSTLY_F16 = 1 # except 1d tensors
MOSTLY_Q4_0 = 2 # except 1d tensors
MOSTLY_Q4_1 = 3 # except 1d tensors
MOSTLY_Q4_1_SOME_F16 = 4 # tok_embeddings.weight and output.weight are F16
# MOSTLY_Q4_2 = 5 # support has been removed
# MOSTLY_Q4_3 = 6 # support has been removed
MOSTLY_Q8_0 = 7 # except 1d tensors
MOSTLY_Q5_0 = 8 # except 1d tensors
MOSTLY_Q5_1 = 9 # except 1d tensors
MOSTLY_Q2_K = 10 # except 1d tensors
MOSTLY_Q3_K_S = 11 # except 1d tensors
MOSTLY_Q3_K_M = 12 # except 1d tensors
MOSTLY_Q3_K_L = 13 # except 1d tensors
MOSTLY_Q4_K_S = 14 # except 1d tensors
MOSTLY_Q4_K_M = 15 # except 1d tensors
MOSTLY_Q5_K_S = 16 # except 1d tensors
MOSTLY_Q5_K_M = 17 # except 1d tensors
MOSTLY_Q6_K = 18 # except 1d tensors
MOSTLY_IQ2_XXS = 19 # except 1d tensors
MOSTLY_IQ2_XS = 20 # except 1d tensors
MOSTLY_Q2_K_S = 21 # except 1d tensors
MOSTLY_IQ3_XS = 22 # except 1d tensors
MOSTLY_IQ3_XXS = 23 # except 1d tensors
MOSTLY_IQ1_S = 24 # except 1d tensors
MOSTLY_IQ4_NL = 25 # except 1d tensors
MOSTLY_IQ3_S = 26 # except 1d tensors
MOSTLY_IQ3_M = 27 # except 1d tensors
MOSTLY_IQ2_S = 28 # except 1d tensors
MOSTLY_IQ2_M = 29 # except 1d tensors
MOSTLY_IQ4_XS = 30 # except 1d tensors
MOSTLY_IQ1_M = 31 # except 1d tensors
MOSTLY_BF16 = 32 # except 1d tensors


class GGUFEndian(IntEnum):
LITTLE = 0
BIG = 1
Expand Down

0 comments on commit cad22e1

Please sign in to comment.