Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1054,6 +1054,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
if chkhsh == "53e325976a6e142379c19b09afcae354f2f496f147afa8f9e189a33fe4e3024e":
# ref: https://huggingface.co/ibm-granite/granite-docling-258M
res = "granite-docling"
if chkhsh == "f4f37b6c8eb9ea29b3eac6bb8c8487c5ab7885f8d8022e67edc1c68ce8403e95":
# ref: https://huggingface.co/MiniMaxAI/MiniMax-M2
res = "minimax-m2"

if res is None:
logger.warning("\n")
Expand Down Expand Up @@ -6909,6 +6912,84 @@ def prepare_tensors(self):
raise ValueError(f"Unprocessed experts: {experts}")


@ModelBase.register("MiniMaxM2ForCausalLM")
class MiniMaxM2Model(TextModel):
model_arch = gguf.MODEL_ARCH.MINIMAXM2
_experts_cache: dict[int, dict[str, Tensor]] = {}

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.hparams["num_experts"] = self.hparams["num_local_experts"]

def set_gguf_parameters(self):
if self.hparams["scoring_func"] == "sigmoid":
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
elif self.hparams["scoring_func"] == "softmax":
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SOFTMAX)
else:
raise ValueError(f"Unsupported scoring_func value: {self.hparams['scoring_func']}")

block_count = self.find_hparam(["num_hidden_layers", "n_layer"])
n_embd = self.find_hparam(["hidden_size", "n_embd"])
n_head = self.find_hparam(["num_attention_heads", "n_head"])
n_head_kv = self.find_hparam(["num_key_value_heads", "n_head_kv"])
rms_eps = self.find_hparam(["rms_norm_eps"])
max_pos_embds = self.find_hparam(["n_positions", "max_position_embeddings"])
head_dim = self.find_hparam(["head_dim"])

self.gguf_writer.add_context_length(max_pos_embds)
self.gguf_writer.add_embedding_length(n_embd)
self.gguf_writer.add_feed_forward_length(self.find_hparam(["intermediate_size"]))
self.gguf_writer.add_expert_feed_forward_length(self.find_hparam(["intermediate_size"]))
self.gguf_writer.add_expert_count(self.find_hparam(["num_local_experts"]))
self.gguf_writer.add_expert_used_count(self.find_hparam(["num_experts_per_tok"]))
self.gguf_writer.add_block_count(block_count)
self.gguf_writer.add_head_count(n_head)
self.gguf_writer.add_head_count_kv(n_head_kv)
self.gguf_writer.add_layer_norm_rms_eps(rms_eps)
self.gguf_writer.add_layer_norm_eps(rms_eps)
self.gguf_writer.add_key_length(head_dim)
self.gguf_writer.add_value_length(head_dim)
self.gguf_writer.add_rope_dimension_count(self.find_hparam(["rotary_dim"]))
self.gguf_writer.add_rope_freq_base(self.find_hparam(["rope_theta"]))

def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
if name.endswith("e_score_correction_bias"):
name = name.replace("e_score_correction_bias", "e_score_correction.bias")

# merge expert weights
if 'experts' in name:
n_experts = self.hparams["num_experts"]
assert bid is not None

expert_cache = self._experts_cache.setdefault(bid, {})
expert_cache[name] = data_torch
expert_weights = ["w1", "w2", "w3"]

# not enough expert weights to merge
if len(expert_cache) < n_experts * len(expert_weights):
return []

tensors: list[tuple[str, Tensor]] = []
for w_name in expert_weights:
datas: list[Tensor] = []

for xid in range(n_experts):
ename = f"model.layers.{bid}.block_sparse_moe.experts.{xid}.{w_name}.weight"
datas.append(expert_cache[ename])
del expert_cache[ename]

data_torch = torch.stack(datas, dim=0)
merged_name = f"model.layers.{bid}.block_sparse_moe.experts.{w_name}.weight"
new_name = self.map_tensor_name(merged_name)
tensors.append((new_name, data_torch))

del self._experts_cache[bid]
return tensors

return super().modify_tensors(data_torch, name, bid)


@ModelBase.register("Dots1ForCausalLM")
class Dots1Model(Qwen2MoeModel):
model_arch = gguf.MODEL_ARCH.DOTS1
Expand Down
4 changes: 4 additions & 0 deletions convert_hf_to_gguf_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ class TOKENIZER_TYPE(IntEnum):
{"name": "mellum", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/JetBrains/Mellum-4b-base", },
{"name": "bailingmoe2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/inclusionAI/Ling-mini-base-2.0", },
{"name": "granite-docling", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ibm-granite/granite-docling-258M", },
{"name": "minimax-m2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/MiniMaxAI/MiniMax-M2", },
]

# some models are known to be broken upstream, so we will skip them as exceptions
Expand Down Expand Up @@ -438,6 +439,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
except OSError as e:
logger.error(f"Failed to load tokenizer for model {name}. Error: {e}")
continue # Skip this model and continue with the next one in the loop
except TypeError as e:
logger.error(f"Failed to load tokenizer for model {name}. Error: {e}")
continue # Skip this model and continue with the next one in the loop

if not os.path.exists(f"models/ggml-vocab-{name}.gguf"):
logger.info(f"Skip vocab files for model {name}, no GGUF file found")
Expand Down
21 changes: 21 additions & 0 deletions gguf-py/gguf/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,7 @@ class MODEL_ARCH(IntEnum):
SEED_OSS = auto()
GROVEMOE = auto()
APERTUS = auto()
MINIMAXM2 = auto()


class VISION_PROJECTOR_TYPE(IntEnum):
Expand Down Expand Up @@ -766,6 +767,7 @@ class MODEL_TENSOR(IntEnum):
MODEL_ARCH.SEED_OSS: "seed_oss",
MODEL_ARCH.GROVEMOE: "grovemoe",
MODEL_ARCH.APERTUS: "apertus",
MODEL_ARCH.MINIMAXM2: "minimax-m2",
}

VISION_PROJECTOR_TYPE_NAMES: dict[VISION_PROJECTOR_TYPE, str] = {
Expand Down Expand Up @@ -2837,6 +2839,25 @@ class MODEL_TENSOR(IntEnum):
MODEL_TENSOR.FFN_DOWN_CHEXP,
MODEL_TENSOR.FFN_UP_CHEXP,
],
MODEL_ARCH.MINIMAXM2: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.OUTPUT,
MODEL_TENSOR.ATTN_NORM,
MODEL_TENSOR.ATTN_Q,
MODEL_TENSOR.ATTN_Q_NORM,
MODEL_TENSOR.ATTN_K,
MODEL_TENSOR.ATTN_K_NORM,
MODEL_TENSOR.ATTN_V,
MODEL_TENSOR.ATTN_OUT,
MODEL_TENSOR.FFN_NORM,
MODEL_TENSOR.FFN_GATE_INP,
MODEL_TENSOR.FFN_GATE_EXP,
MODEL_TENSOR.FFN_DOWN_EXP,
MODEL_TENSOR.FFN_UP_EXP,
MODEL_TENSOR.FFN_EXP_PROBS_B,
],

# TODO
}

Expand Down
1 change: 1 addition & 0 deletions gguf-py/gguf/tensor_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,7 @@ class TensorNameMap:
"model.layers.{bid}.mlp.moe_statics.e_score_correction", # ernie4.5-moe
"model.layers.{bid}.mlp.gate.expert_bias", # bailingmoe2
"model.layers.{bid}.feed_forward.expert_bias", # lfm2moe
"model.layers.{bid}.block_sparse_moe.e_score_correction", # minimax-m2
),

# Feed-forward up
Expand Down
Binary file added models/ggml-vocab-minimax-m2.gguf
Binary file not shown.
112 changes: 112 additions & 0 deletions models/ggml-vocab-minimax-m2.gguf.inp
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
ied 4 ½ months
__ggml_vocab_test__
Äpfel
__ggml_vocab_test__

__ggml_vocab_test__

__ggml_vocab_test__

__ggml_vocab_test__

__ggml_vocab_test__

__ggml_vocab_test__


__ggml_vocab_test__



__ggml_vocab_test__




__ggml_vocab_test__


__ggml_vocab_test__
Hello world
__ggml_vocab_test__
Hello world
__ggml_vocab_test__
Hello World
__ggml_vocab_test__
Hello World
__ggml_vocab_test__
Hello World!
__ggml_vocab_test__
Hello, world!
__ggml_vocab_test__
Hello, world!
__ggml_vocab_test__
this is 🦙.cpp
__ggml_vocab_test__
w048 7tuijk dsdfhu
__ggml_vocab_test__
нещо на Български
__ggml_vocab_test__
កាន់តែពិសេសអាចខលចេញ
__ggml_vocab_test__
🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)
__ggml_vocab_test__
Hello
__ggml_vocab_test__
Hello
__ggml_vocab_test__
Hello
__ggml_vocab_test__
Hello
__ggml_vocab_test__
Hello
__ggml_vocab_test__
Hello
Hello
__ggml_vocab_test__
(
__ggml_vocab_test__

=
__ggml_vocab_test__
' era
__ggml_vocab_test__
Hello, y'all! How are you 😁 ?我想在apple工作1314151天~
__ggml_vocab_test__
!!!!!!
__ggml_vocab_test__
3
__ggml_vocab_test__
33
__ggml_vocab_test__
333
__ggml_vocab_test__
3333
__ggml_vocab_test__
33333
__ggml_vocab_test__
333333
__ggml_vocab_test__
3333333
__ggml_vocab_test__
33333333
__ggml_vocab_test__
333333333
__ggml_vocab_test__
Cửa Việt
__ggml_vocab_test__
discards
__ggml_vocab_test__











🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български ''''''```````""""......!!!!!!?????? I've been 'told he's there, 'RE you sure? 'M not sure I'll make it, 'D you like some tea? We'Ve a'lL
__ggml_vocab_test__
46 changes: 46 additions & 0 deletions models/ggml-vocab-minimax-m2.gguf.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
1233 32 52 32 23901 4632
69967 30230 295

32
256
326
9
10
367
4368
10380
19739 2035
53398 2035
19739 5476
53398 5476
53398 5476 33
19739 44 2035 33
53398 44 2035 33
546 355 9753 166 153 46 52243
119 48218 32 55 116 2157 60350 40081 6107 15931
8827 40614 3642 11575 185034 8623
76300 128 76300 182 76300 147 157246 139 76300 143 157246 130 76300 150 76300 183 76300 159 225 35097 76300 159 76300 162 76300 182 76300 133 76300 129 76300 155 76300 133 225 35097 76300 137
150333 359 14291 41 19918 182 61587 79213 171 21243 359 79401 158243 176756 41 181343 359 10141 113958 389 760 1072 1813 11248 41
19739
53398
32 53398
256 53398
326 53398
326 53398 10 326 53398
359
10 409
39 5784
19739 44 330 53147 33 2329 457 390 184404 3479 32020 594 44450 2489 17246 35341 49 1419 5516
34485 6255
51
2893
18397
18397 51
18397 2893
18397 18397
18397 18397 51
18397 18397 2893
18397 18397 18397
67 191937 97 31042 84408 116
2300 2958
137106 35066 24361 56254 151540 4315 10877 7671 41564 150333 359 14291 41 19918 182 61587 79213 171 21243 359 79401 158243 176756 41 181343 9753 166 153 186278 153 32 51 32 2893 32 18397 32 18397 51 32 18397 2893 32 18397 18397 32 18397 18397 51 32 18397 18397 2893 32 51 46 51 32 51 645 51 32 51 1662 51 29559 158 128 76300 182 76300 147 157246 139 76300 143 157246 130 76300 150 76300 183 76300 159 225 35097 76300 159 76300 162 76300 182 76300 133 21557 129 3479 32020 594 44450 2489 17246 35341 49 1419 5516 109618 1246 9435 6833 40614 3642 11575 185034 8623 8462 3443 64346 2765 111832 22815 34485 6255 61018 13074 8244 1040 722 116 1186 13396 986 44 722 2380 390 3123 63 722 77 516 3123 13098 1454 412 44 722 68 390 1079 1001 17251 63 1559 39 34121 258 99132 76
22 changes: 22 additions & 0 deletions src/llama-arch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
{ LLM_ARCH_SEED_OSS, "seed_oss" },
{ LLM_ARCH_GROVEMOE, "grovemoe" },
{ LLM_ARCH_APERTUS, "apertus" },
{ LLM_ARCH_MINIMAX_M2, "minimax-m2" },
{ LLM_ARCH_UNKNOWN, "(unknown)" },
};

Expand Down Expand Up @@ -2312,6 +2313,27 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
{ LLM_TENSOR_FFN_UP_CHEXPS, "blk.%d.ffn_up_chexps" },
},
},
{
LLM_ARCH_MINIMAX_M2,
{
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
{ LLM_TENSOR_OUTPUT, "output" },
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
{ LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
},
},
{
LLM_ARCH_UNKNOWN,
{
Expand Down
1 change: 1 addition & 0 deletions src/llama-arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ enum llm_arch {
LLM_ARCH_SEED_OSS,
LLM_ARCH_GROVEMOE,
LLM_ARCH_APERTUS,
LLM_ARCH_MINIMAX_M2,
LLM_ARCH_UNKNOWN,
};

Expand Down
Loading
Loading