@@ -50,6 +50,7 @@ def bytes_to_unicode():
50
50
51
51
52
52
class SentencePieceVocab :
53
+
53
54
def __init__ (self , fname_tokenizer : Path , fname_added_tokens : Optional [Path ]) -> None :
54
55
self .sentencepiece_tokenizer = SentencePieceProcessor (str (fname_tokenizer ))
55
56
added_tokens : Dict [str , int ]
@@ -149,11 +150,11 @@ def chatglm2_convert_gguf(model, tokenizer, dir_model, fname_out, ftype, hparams
149
150
print ("ChatGLM-2.gguf converting: " )
150
151
list_vars = model .state_dict ()
151
152
for name in list_vars .keys ():
152
- print (name , list_vars [name ].shape , list_vars [name ].dtype )
153
+ print ("%-80s" % name , list_vars [name ].shape , list_vars [name ].dtype )
153
154
154
155
print (hparams )
155
156
156
- gguf_file = fname_out + '.gguf'
157
+ gguf_file = fname_out
157
158
gguf_writer = gguf .GGUFWriter (gguf_file , "chatglm2" )
158
159
159
160
arch = "chatglm2."
@@ -285,35 +286,68 @@ def write_vocab_gguf(dir_model):
285
286
print ("gguf: get tensor metadata" )
286
287
for name in list_vars .keys ():
287
288
data = list_vars [name ].squeeze ().numpy ()
288
-
289
- print ("Processing variable: " + name + " with shape: " , data .shape )
290
289
if 'inv_freq' in name :
290
+ print ("Converting: %-75s" % name , " shape: %-15s" % str (data .shape ))
291
291
continue
292
292
293
+ print ("Converting: %-75s" % name , " shape: %-15s" % str (data .shape ), end = " " )
293
294
n_dims = len (data .shape )
294
295
295
296
# ftype == 0 -> float32, ftype == 1 -> float16
296
297
ftype_cur = 0
297
298
if ftype != 0 :
298
299
if name [- 7 :] == ".weight" and n_dims == 2 :
299
- print (" Converting to float16" )
300
+ print (" to float16" . rjust ( 15 ) )
300
301
data = data .astype (np .float16 )
301
302
ftype_cur = 1
302
303
else :
303
- print (" Converting to float32" )
304
+ print (" to float32" . rjust ( 15 ) )
304
305
data = data .astype (np .float32 )
305
306
ftype_cur = 0
306
307
else :
307
308
if data .dtype != np .float32 :
308
- print (" Converting to float32" )
309
+ print (" to float32" . rjust ( 15 ) )
309
310
data = data .astype (np .float32 )
310
311
ftype_cur = 0
311
312
312
- # print(f"[{i+1:{padi}d}/{len(model)}]
313
- # Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type.name:4}")
314
-
315
313
gguf_writer .add_tensor (name , data )
316
314
315
+ if "mlp.dense_h_to_4h" in name :
316
+ name_0 = name .replace ("dense_h_to_4h" , "dense_h_to_4h_0" )
317
+ name_1 = name .replace ("dense_h_to_4h" , "dense_h_to_4h_1" )
318
+ shape_0 = data .shape [0 ]
319
+ half_shape_0 = int (shape_0 / 2 )
320
+ data_0 = data [0 :half_shape_0 , :]
321
+ data_1 = data [half_shape_0 :shape_0 , :]
322
+
323
+ print ("Converting: %-75s" % name_0 , " shape: %-15s" % str (data_0 .shape ))
324
+ print ("Converting: %-75s" % name_1 , " shape: %-15s" % str (data_1 .shape ))
325
+
326
+ n_dims = len (data_0 .shape )
327
+ assert (len (data_0 .shape ) == len (data_1 .shape ))
328
+ # ftype == 0 -> float32, ftype == 1 -> float16
329
+ ftype_cur = 0
330
+ if ftype != 0 :
331
+ if name_0 [- 7 :] == ".weight" and n_dims == 2 :
332
+ print (" to float16" .rjust (15 ))
333
+ data_0 = data_0 .astype (np .float16 )
334
+ data_1 = data_1 .astype (np .float32 )
335
+ ftype_cur = 1
336
+ else :
337
+ print (" to float32" .rjust (15 ))
338
+ data_0 = data_0 .astype (np .float32 )
339
+ data_1 = data_1 .astype (np .float32 )
340
+ ftype_cur = 0
341
+ else :
342
+ if data_0 .dtype != np .float32 :
343
+ print (" to float32" .rjust (15 ))
344
+ data_0 = data_0 .astype (np .float32 )
345
+ data_1 = data_1 .astype (np .float32 )
346
+ ftype_cur = 0
347
+
348
+ gguf_writer .add_tensor (name_0 , data_0 )
349
+ gguf_writer .add_tensor (name_1 , data_1 )
350
+
317
351
print ("gguf: write header" )
318
352
gguf_writer .write_header_to_file ()
319
353
print ("gguf: write metadata" )
@@ -363,9 +397,9 @@ def chatglm2_convert(model, tokenizer, dir_model, fname_out, ftype, hparams):
363
397
fout .write (struct .pack ("f" , 10000.0 )) # freq_base
364
398
fout .write (struct .pack ("f" , 1.0 )) # rope_factor
365
399
366
- fout .write (struct .pack ("f" , 0.0 )) # config.json "rope_scaling.factor", not enabled
367
- fout .write (struct .pack ("i" , 0 )) # rope_scaling.original_max_position_embeddings
368
- fout .write (struct .pack ("i" , 0 )) # params["rope_scaling"]["type"] =="yarn" else 0))
400
+ fout .write (struct .pack ("f" , 0.0 )) # config.json "rope_scaling.factor", not enabled
401
+ fout .write (struct .pack ("i" , 0 )) # rope_scaling.original_max_position_embeddings
402
+ fout .write (struct .pack ("i" , 0 )) # params["rope_scaling"]["type"] =="yarn" else 0))
369
403
370
404
fout .write (struct .pack ("i" , tokenizer .bos_token_id if tokenizer .bos_token_id is not None else 1 ))
371
405
fout .write (struct .pack ("i" , tokenizer .eos_token_id if tokenizer .eos_token_id is not None else 2 ))
@@ -419,10 +453,56 @@ def chatglm2_convert(model, tokenizer, dir_model, fname_out, ftype, hparams):
419
453
for i in range (n_dims ):
420
454
fout .write (struct .pack ("i" , data .shape [n_dims - 1 - i ]))
421
455
fout .write (str )
422
-
423
456
# data
424
457
data .tofile (fout )
425
458
459
+ if "mlp.dense_h_to_4h" in name :
460
+ name_0 = name .replace ("dense_h_to_4h" , "dense_h_to_4h_0" )
461
+ name_1 = name .replace ("dense_h_to_4h" , "dense_h_to_4h_1" )
462
+ shape_0 = data .shape [0 ]
463
+ half_shape_0 = int (shape_0 / 2 )
464
+ data_0 = data [0 :half_shape_0 , :]
465
+ data_1 = data [half_shape_0 :shape_0 , :]
466
+
467
+ print ("Converting: %-75s" % name_0 , " shape: " , data_0 .shape )
468
+ print ("Converting: %-75s" % name_1 , " shape: " , data_1 .shape )
469
+
470
+ n_dims = len (data_0 .shape )
471
+ assert (len (data_0 .shape ) == len (data_1 .shape ))
472
+ # ftype == 0 -> float32, ftype == 1 -> float16
473
+ ftype_cur = 0
474
+ if ftype != 0 :
475
+ if name_0 [- 7 :] == ".weight" and n_dims == 2 :
476
+ print (" to float16" .rjust (15 ))
477
+ data_0 = data_0 .astype (np .float16 )
478
+ data_1 = data_1 .astype (np .float32 )
479
+ ftype_cur = 1
480
+ else :
481
+ print (" to float32" .rjust (15 ))
482
+ data_0 = data_0 .astype (np .float32 )
483
+ data_1 = data_1 .astype (np .float32 )
484
+ ftype_cur = 0
485
+ else :
486
+ if data_0 .dtype != np .float32 :
487
+ print (" to float32" .rjust (15 ))
488
+ data_0 = data_0 .astype (np .float32 )
489
+ data_1 = data_1 .astype (np .float32 )
490
+ ftype_cur = 0
491
+
492
+ str_0 = name_0 .encode ("utf-8" )
493
+ fout .write (struct .pack ("iii" , n_dims , len (str_0 ), ftype_cur ))
494
+ for i in range (n_dims ):
495
+ fout .write (struct .pack ("i" , data_0 .shape [n_dims - 1 - i ]))
496
+ fout .write (str_0 )
497
+ data_0 .tofile (fout )
498
+
499
+ str_1 = name_1 .encode ("utf-8" )
500
+ fout .write (struct .pack ("iii" , n_dims , len (str_1 ), ftype_cur ))
501
+ for i in range (n_dims ):
502
+ fout .write (struct .pack ("i" , data_1 .shape [n_dims - 1 - i ]))
503
+ fout .write (str_1 )
504
+ data_1 .tofile (fout )
505
+
426
506
fout .close ()
427
507
428
508
print ("Done. Output file: " + fname_out )
0 commit comments