@@ -365,47 +365,6 @@ function gg_run_open_llama_3b_v2 {
365365
366366 cat $OUT /${ci} -imatrix.log | grep " Final" >> $OUT /${ci} -imatrix-sum.log
367367
368- # lora
369- function compare_ppl {
370- qnt=" $1 "
371- ppl1=$( echo " $2 " | grep -oE " [0-9]+\.[0-9]+" | tail -n 1)
372- ppl2=$( echo " $3 " | grep -oE " [0-9]+\.[0-9]+" | tail -n 1)
373-
374- if [ $( echo " $ppl1 < $ppl2 " | bc) -eq 1 ]; then
375- printf ' - %s @ %s (FAIL: %s > %s)\n' " $qnt " " $ppl " " $ppl1 " " $ppl2 "
376- return 20
377- fi
378-
379- printf ' - %s @ %s %s OK\n' " $qnt " " $ppl1 " " $ppl2 "
380- return 0
381- }
382-
383- path_lora=" ../models-mnt/open-llama/3B-v2/lora"
384- path_shakespeare=" ../models-mnt/shakespeare"
385-
386- shakespeare=" ${path_shakespeare} /shakespeare.txt"
387- lora_shakespeare=" ${path_lora} /ggml-adapter-model.bin"
388-
389- gg_wget ${path_lora} https://huggingface.co/slaren/open_llama_3b_v2_shakespeare_lora/resolve/main/adapter_config.json
390- gg_wget ${path_lora} https://huggingface.co/slaren/open_llama_3b_v2_shakespeare_lora/resolve/main/adapter_model.bin
391- gg_wget ${path_shakespeare} https://huggingface.co/slaren/open_llama_3b_v2_shakespeare_lora/resolve/main/shakespeare.txt
392-
393- python3 ../convert-lora-to-ggml.py ${path_lora}
394-
395- # f16
396- (time ./bin/perplexity --model ${model_f16} -f ${shakespeare} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT /${ci} -ppl-shakespeare-f16.log
397- (time ./bin/perplexity --model ${model_f16} -f ${shakespeare} --lora ${lora_shakespeare} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT /${ci} -ppl-shakespeare-lora-f16.log
398- compare_ppl " f16 shakespeare" " $( cat $OUT /${ci} -ppl-shakespeare-f16.log | grep " ^\[1\]" ) " " $( cat $OUT /${ci} -ppl-shakespeare-lora-f16.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -lora-ppl.log
399-
400- # q8_0
401- (time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT /${ci} -ppl-shakespeare-q8_0.log
402- (time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT /${ci} -ppl-shakespeare-lora-q8_0.log
403- compare_ppl " q8_0 shakespeare" " $( cat $OUT /${ci} -ppl-shakespeare-q8_0.log | grep " ^\[1\]" ) " " $( cat $OUT /${ci} -ppl-shakespeare-lora-q8_0.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -lora-ppl.log
404-
405- # q8_0 + f16 lora-base
406- (time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} --lora-base ${model_f16} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT /${ci} -ppl-shakespeare-lora-q8_0-f16.log
407- compare_ppl " q8_0 / f16 base shakespeare" " $( cat $OUT /${ci} -ppl-shakespeare-q8_0.log | grep " ^\[1\]" ) " " $( cat $OUT /${ci} -ppl-shakespeare-lora-q8_0-f16.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -lora-ppl.log
408-
409368 set +e
410369}
411370
@@ -416,7 +375,6 @@ function gg_sum_open_llama_3b_v2 {
416375 gg_printf ' - status: %s\n' " $( cat $OUT /${ci} .exit) "
417376 gg_printf ' - perplexity:\n%s\n' " $( cat $OUT /${ci} -ppl.log) "
418377 gg_printf ' - imatrix:\n```\n%s\n```\n' " $( cat $OUT /${ci} -imatrix-sum.log) "
419- gg_printf ' - lora:\n%s\n' " $( cat $OUT /${ci} -lora-ppl.log) "
420378 gg_printf ' - f16: \n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-f16.log) "
421379 gg_printf ' - q8_0:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q8_0.log) "
422380 gg_printf ' - q4_0:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q4_0.log) "
@@ -429,11 +387,6 @@ function gg_sum_open_llama_3b_v2 {
429387 gg_printf ' - q5_k:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q5_k.log) "
430388 gg_printf ' - q6_k:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q6_k.log) "
431389 gg_printf ' - save-load-state: \n```\n%s\n```\n' " $( cat $OUT /${ci} -save-load-state.log) "
432- gg_printf ' - shakespeare (f16):\n```\n%s\n```\n' " $( cat $OUT /${ci} -ppl-shakespeare-f16.log) "
433- gg_printf ' - shakespeare (f16 lora):\n```\n%s\n```\n' " $( cat $OUT /${ci} -ppl-shakespeare-lora-f16.log) "
434- gg_printf ' - shakespeare (q8_0):\n```\n%s\n```\n' " $( cat $OUT /${ci} -ppl-shakespeare-q8_0.log) "
435- gg_printf ' - shakespeare (q8_0 lora):\n```\n%s\n```\n' " $( cat $OUT /${ci} -ppl-shakespeare-lora-q8_0.log) "
436- gg_printf ' - shakespeare (q8_0 / f16 base lora):\n```\n%s\n```\n' " $( cat $OUT /${ci} -ppl-shakespeare-lora-q8_0-f16.log) "
437390}
438391
439392# open_llama_7b_v2
@@ -549,48 +502,6 @@ function gg_run_open_llama_7b_v2 {
549502
550503 cat $OUT /${ci} -imatrix.log | grep " Final" >> $OUT /${ci} -imatrix-sum.log
551504
552- # lora
553- function compare_ppl {
554- qnt=" $1 "
555- ppl1=$( echo " $2 " | grep -oE " [0-9]+\.[0-9]+" | tail -n 1)
556- ppl2=$( echo " $3 " | grep -oE " [0-9]+\.[0-9]+" | tail -n 1)
557-
558- if [ $( echo " $ppl1 < $ppl2 " | bc) -eq 1 ]; then
559- printf ' - %s @ %s (FAIL: %s > %s)\n' " $qnt " " $ppl " " $ppl1 " " $ppl2 "
560- return 20
561- fi
562-
563- printf ' - %s @ %s %s OK\n' " $qnt " " $ppl1 " " $ppl2 "
564- return 0
565- }
566-
567- path_lora=" ../models-mnt/open-llama/7B-v2/lora"
568- path_shakespeare=" ../models-mnt/shakespeare"
569-
570- shakespeare=" ${path_shakespeare} /shakespeare.txt"
571- lora_shakespeare=" ${path_lora} /ggml-adapter-model.bin"
572-
573- gg_wget ${path_lora} https://huggingface.co/slaren/open_llama_7b_v2_shakespeare_lora/resolve/main/adapter_config.json
574- gg_wget ${path_lora} https://huggingface.co/slaren/open_llama_7b_v2_shakespeare_lora/resolve/main/adapter_model.bin
575- gg_wget ${path_shakespeare} https://huggingface.co/slaren/open_llama_7b_v2_shakespeare_lora/resolve/main/shakespeare.txt
576-
577- python3 ../convert-lora-to-ggml.py ${path_lora}
578-
579- # f16
580- (time ./bin/perplexity --model ${model_f16} -f ${shakespeare} -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>&1 | tee -a $OUT /${ci} -ppl-shakespeare-f16.log
581- (time ./bin/perplexity --model ${model_f16} -f ${shakespeare} --lora ${lora_shakespeare} -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>&1 | tee -a $OUT /${ci} -ppl-shakespeare-lora-f16.log
582- compare_ppl " f16 shakespeare" " $( cat $OUT /${ci} -ppl-shakespeare-f16.log | grep " ^\[1\]" ) " " $( cat $OUT /${ci} -ppl-shakespeare-lora-f16.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -lora-ppl.log
583-
584- # currently not supported by the CUDA backend
585- # q8_0
586- # (time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-q8_0.log
587- # (time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-lora-q8_0.log
588- # compare_ppl "q8_0 shakespeare" "$(cat $OUT/${ci}-ppl-shakespeare-q8_0.log | grep "^\[1\]")" "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-lora-ppl.log
589-
590- # q8_0 + f16 lora-base
591- # (time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} --lora-base ${model_f16} -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log
592- # compare_ppl "q8_0 / f16 shakespeare" "$(cat $OUT/${ci}-ppl-shakespeare-q8_0.log | grep "^\[1\]")" "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-lora-ppl.log
593-
594505 set +e
595506}
596507
@@ -601,7 +512,6 @@ function gg_sum_open_llama_7b_v2 {
601512 gg_printf ' - status: %s\n' " $( cat $OUT /${ci} .exit) "
602513 gg_printf ' - perplexity:\n%s\n' " $( cat $OUT /${ci} -ppl.log) "
603514 gg_printf ' - imatrix:\n```\n%s\n```\n' " $( cat $OUT /${ci} -imatrix-sum.log) "
604- gg_printf ' - lora:\n%s\n' " $( cat $OUT /${ci} -lora-ppl.log) "
605515 gg_printf ' - f16: \n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-f16.log) "
606516 gg_printf ' - q8_0:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q8_0.log) "
607517 gg_printf ' - q4_0:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q4_0.log) "
@@ -614,11 +524,6 @@ function gg_sum_open_llama_7b_v2 {
614524 gg_printf ' - q5_k:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q5_k.log) "
615525 gg_printf ' - q6_k:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q6_k.log) "
616526 gg_printf ' - save-load-state: \n```\n%s\n```\n' " $( cat $OUT /${ci} -save-load-state.log) "
617- gg_printf ' - shakespeare (f16):\n```\n%s\n```\n' " $( cat $OUT /${ci} -ppl-shakespeare-f16.log) "
618- gg_printf ' - shakespeare (f16 lora):\n```\n%s\n```\n' " $( cat $OUT /${ci} -ppl-shakespeare-lora-f16.log) "
619- # gg_printf '- shakespeare (q8_0):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-q8_0.log)"
620- # gg_printf '- shakespeare (q8_0 lora):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0.log)"
621- # gg_printf '- shakespeare (q8_0 / f16 base lora):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log)"
622527}
623528
624529# bge-small
0 commit comments