Skip to content
This repository was archived by the owner on Aug 30, 2024. It is now read-only.

Commit 2309fbb

Browse files
authored
fix qwen load error (#164)
1 parent 8e17b67 commit 2309fbb

13 files changed

+29
-22
lines changed

neural_speed/convert/convert-hf-to-gguf.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ def __init__(self, fname_tokenizer: Path, fname_added_tokens: Path | None = None
6464
fname_tokenizer,
6565
cache_dir=fname_tokenizer,
6666
local_files_only=True,
67+
trust_remote_code=True
6768
)
6869

6970
# Initialize lists and dictionaries for added tokens
@@ -402,7 +403,7 @@ def _set_vocab_gpt2(self):
402403
toktypes: list[int] = []
403404

404405
from transformers import AutoTokenizer
405-
tokenizer = AutoTokenizer.from_pretrained(dir_model)
406+
tokenizer = AutoTokenizer.from_pretrained(dir_model, trust_remote_code=True)
406407
vocab_size = hparams.get("vocab_size", len(tokenizer.vocab))
407408
assert max(tokenizer.vocab.values()) < vocab_size
408409

neural_speed/convert/convert_bloom.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def main(args_in: Optional[List[str]] = None) -> None:
7777
torch_dtype=torch.float16 if ftype == 1 else torch.float32,
7878
low_cpu_mem_usage=True,
7979
trust_remote_code=True)
80-
tokenizer = AutoTokenizer.from_pretrained(dir_model)
80+
tokenizer = AutoTokenizer.from_pretrained(dir_model, trust_remote_code=True)
8181
hparams = config.to_dict()
8282
print("Loading model: ", dir_model)
8383

neural_speed/convert/convert_dolly.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,9 @@ def main(args_in: Optional[List[str]] = None) -> None:
7979
from modelscope import AutoModelForCausalLM, AutoTokenizer
8080
else:
8181
from transformers import AutoModelForCausalLM, AutoTokenizer
82-
model = AutoModelForCausalLM.from_pretrained(dir_model, torch_dtype=torch.float16 if ftype == 1 else torch.float32)
83-
tokenizer = AutoTokenizer.from_pretrained(dir_model)
82+
model = AutoModelForCausalLM.from_pretrained(dir_model, torch_dtype=torch.float16 if ftype == 1 else torch.float32,
83+
trust_remote_code=True)
84+
tokenizer = AutoTokenizer.from_pretrained(dir_model, trust_remote_code=True)
8485
print("Loading model: ", dir_model)
8586
model.eval()
8687
for p in model.parameters():

neural_speed/convert/convert_gptj.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def main(args_in: Optional[List[str]] = None) -> None:
7474
else:
7575
from transformers import AutoModelForCausalLM, AutoTokenizer
7676
print("Loading model: ", dir_model)
77-
model = AutoModelForCausalLM.from_pretrained(dir_model, low_cpu_mem_usage=True)
77+
model = AutoModelForCausalLM.from_pretrained(dir_model, low_cpu_mem_usage=True, trust_remote_code=True)
7878
tokenizer = AutoTokenizer.from_pretrained(dir_model, trust_remote_code=True)
7979
hparams = model.config.to_dict()
8080
list_vars = model.state_dict()

neural_speed/convert/convert_gptneox.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,9 @@ def main(args_in: Optional[List[str]] = None) -> None:
8181
else:
8282
from transformers import AutoModelForCausalLM, AutoTokenizer
8383
print("Loading model: ", dir_model)
84-
model = AutoModelForCausalLM.from_pretrained(dir_model, torch_dtype=torch.float16 if ftype == 1 else torch.float32)
85-
tokenizer = AutoTokenizer.from_pretrained(dir_model)
84+
model = AutoModelForCausalLM.from_pretrained(dir_model, torch_dtype=torch.float16 if ftype == 1 else torch.float32,
85+
trust_remote_code=True)
86+
tokenizer = AutoTokenizer.from_pretrained(dir_model, trust_remote_code=True)
8687
model.eval()
8788
for p in model.parameters():
8889
p.requires_grad = False

neural_speed/convert/convert_opt.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,9 @@ def main(args_in: Optional[List[str]] = None) -> None:
7979
else:
8080
from transformers import AutoModelForCausalLM, AutoTokenizer
8181
print("Loading model: ", dir_model)
82-
model = AutoModelForCausalLM.from_pretrained(dir_model, torch_dtype=torch.float16 if ftype == 1 else torch.float32)
83-
tokenizer = AutoTokenizer.from_pretrained(dir_model)
82+
model = AutoModelForCausalLM.from_pretrained(dir_model, torch_dtype=torch.float16 if ftype == 1 else torch.float32,
83+
trust_remote_code=True)
84+
tokenizer = AutoTokenizer.from_pretrained(dir_model, trust_remote_code=True)
8485
model.eval()
8586
hparams = model.config.to_dict()
8687

neural_speed/convert/convert_phi.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def write_vocab_gguf(dir_model, hparams, gguf_writer):
8686
toktypes: list[int] = []
8787

8888
from transformers import AutoTokenizer # type: ignore[attr-defined]
89-
tokenizer = AutoTokenizer.from_pretrained(dir_model)
89+
tokenizer = AutoTokenizer.from_pretrained(dir_model, trust_remote_code=True)
9090
vocab_size = hparams.get("vocab_size", len(tokenizer.vocab))
9191
assert max(tokenizer.vocab.values()) < vocab_size
9292

neural_speed/convert/convert_quantized_bloom.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ def bytes_to_unicode():
121121

122122
model_name = "/mnt/disk1/data2/zhenweil/models/bloom/bloom-7b1"
123123
prompt = "Once upon a time, a little girl"
124-
tokenizer = AutoTokenizer.from_pretrained(model_name)
124+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
125125
config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
126126
inputs = tokenizer(prompt, return_tensors="pt").input_ids
127127
streamer = TextStreamer(tokenizer)

neural_speed/convert/convert_quantized_qwen.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,15 +159,18 @@ def main(args_in: Optional[List[str]] = None) -> None:
159159
"i", hparams["kv_channels"] if "kv_channels" in hparams else int(hparams["hidden_size"] /
160160
hparams["num_attention_heads"])))
161161
f.write(struct.pack("i", ftype))
162-
f.write(struct.pack("i", hparams["seq_length"] if "seq_length" in hparams else hparams["max_position_embeddings"]))
162+
f.write(struct.pack("i", hparams["max_position_embeddings"]))
163163
f.write(struct.pack("f", 0.0))
164164
f.write(struct.pack("f", 0.0))
165165
f.write(struct.pack("i", 0))
166166
f.write(struct.pack("i", 0)) # word_embed_proj_dim (for opt)
167167
f.write(struct.pack("i", 0)) # do_layer_norm_before (for opt)
168168

169169
f.write(struct.pack("i", 0))
170-
f.write(struct.pack("i", hparams["intermediate_size"]))
170+
if hparams['model_type']=='qwen2':
171+
fout.write(struct.pack("i", hparams["intermediate_size"]))
172+
else:
173+
fout.write(struct.pack("i", int(hparams["intermediate_size"]/2)))
171174
f.write(struct.pack("i", 0))
172175
f.write(struct.pack("i", 0)) # n_experts
173176
f.write(struct.pack("i", 0)) # n_expert_used

neural_speed/convert/convert_qwen.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,8 @@ def main(args_in: Optional[List[str]] = None) -> None:
8181
else:
8282
from transformers import AutoModelForCausalLM, AutoTokenizer
8383
print("Loading model: ", dir_model)
84-
model = AutoModelForCausalLM.from_pretrained(dir_model)
85-
tokenizer = AutoTokenizer.from_pretrained(dir_model)
84+
model = AutoModelForCausalLM.from_pretrained(dir_model, trust_remote_code=True)
85+
tokenizer = AutoTokenizer.from_pretrained(dir_model, trust_remote_code=True)
8686
model.eval()
8787
for p in model.parameters():
8888
p.requires_grad = False
@@ -111,15 +111,18 @@ def main(args_in: Optional[List[str]] = None) -> None:
111111
hparams["num_attention_heads"])))
112112
fout.write(struct.pack("i", ftype))
113113
fout.write(
114-
struct.pack("i", hparams["seq_length"] if "seq_length" in hparams else hparams["max_position_embeddings"]))
114+
struct.pack("i", hparams["max_position_embeddings"]))
115115
fout.write(struct.pack("f", 0.0))
116116
fout.write(struct.pack("f", 0.0))
117117
fout.write(struct.pack("i", 0))
118118
fout.write(struct.pack("i", 0)) # word_embed_proj_dim (for opt)
119119
fout.write(struct.pack("i", 0)) # do_layer_norm_before (for opt)
120120

121121
fout.write(struct.pack("i", 0))
122-
fout.write(struct.pack("i", hparams["intermediate_size"]))
122+
if hparams['model_type']=='qwen2':
123+
fout.write(struct.pack("i", hparams["intermediate_size"]))
124+
else:
125+
fout.write(struct.pack("i", int(hparams["intermediate_size"]/2)))
123126
fout.write(struct.pack("i", 0))
124127
fout.write(struct.pack("i", 0)) # n_experts
125128
fout.write(struct.pack("i", 0)) # n_expert_used

0 commit comments

Comments
 (0)