From 9f75377cebf11205eafc8f7f0d686ce4d07a8846 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 22 Sep 2025 08:50:39 +0300 Subject: [PATCH 1/2] ci : switch from gemma to qwen3 0.6b --- tools/gguf-split/tests.sh | 4 ++-- tools/quantize/tests.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/gguf-split/tests.sh b/tools/gguf-split/tests.sh index c9ad85da0f1f3..7643dee8fcb15 100755 --- a/tools/gguf-split/tests.sh +++ b/tools/gguf-split/tests.sh @@ -31,12 +31,12 @@ rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-merge*.gguf # 1. Get a model ( cd $WORK_PATH -"$ROOT_DIR"/scripts/hf.sh --repo ggml-org/gemma-1.1-2b-it-Q8_0-GGUF --file gemma-1.1-2b-it.Q8_0.gguf +"$ROOT_DIR"/scripts/hf.sh --repo ggml-org/Qwen3-0.6B-GGUF --file Qwen3-0.6B-Q8_0.gguf ) echo PASS # 2. Split with max tensors strategy -$SPLIT --split-max-tensors 28 $WORK_PATH/gemma-1.1-2b-it.Q8_0.gguf $WORK_PATH/ggml-model-split +$SPLIT --split-max-tensors 28 $WORK_PATH/Qwen3-0.6B-Q8_0.gguf $WORK_PATH/ggml-model-split echo PASS echo diff --git a/tools/quantize/tests.sh b/tools/quantize/tests.sh index ba96161484232..df90900933396 100644 --- a/tools/quantize/tests.sh +++ b/tools/quantize/tests.sh @@ -32,12 +32,12 @@ rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-requant*.gguf # 1. Get a model ( cd $WORK_PATH -"$ROOT_DIR"/scripts/hf.sh --repo ggml-org/gemma-1.1-2b-it-Q8_0-GGUF --file gemma-1.1-2b-it.Q8_0.gguf +"$ROOT_DIR"/scripts/hf.sh --repo ggml-org/Qwen3-0.6B-GGUF --file Qwen3-0.6B-Q8_0.gguf ) echo PASS # 2. Split model -$SPLIT --split-max-tensors 28 $WORK_PATH/gemma-1.1-2b-it.Q8_0.gguf $WORK_PATH/ggml-model-split +$SPLIT --split-max-tensors 28 $WORK_PATH/Qwen3-0.6B-Q8_0.gguf $WORK_PATH/ggml-model-split echo PASS echo From 0d6a7dfe8e12ae04edcacee8d8007edd50bf6d71 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 22 Sep 2025 09:08:58 +0300 Subject: [PATCH 2/2] ci : use smaller model for some tests --- ci/run.sh | 35 +++++------------------------------ tools/gguf-split/tests.sh | 14 +++++++------- tools/quantize/tests.sh | 8 ++++---- 3 files changed, 16 insertions(+), 41 deletions(-) diff --git a/ci/run.sh b/ci/run.sh index 09417ef619025..cb90f7a7850fc 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -210,33 +210,9 @@ function gg_sum_ctest_release { gg_printf '```\n' } -# test_scripts_debug +# test_scripts -function gg_run_test_scripts_debug { - cd ${SRC} - - set -e - - (cd ./tools/gguf-split && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log - (cd ./tools/quantize && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log - - set +e -} - -function gg_sum_test_scripts_debug { - gg_printf '### %s\n\n' "${ci}" - - gg_printf 'Runs test scripts in debug mode\n' - gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" - gg_printf '```\n' - gg_printf '%s\n' "$(cat $OUT/${ci}-scripts.log)" - gg_printf '```\n' - gg_printf '\n' -} - -# test_scripts_release - -function gg_run_test_scripts_release { +function gg_run_test_scripts { cd ${SRC} set -e @@ -247,10 +223,10 @@ function gg_run_test_scripts_release { set +e } -function gg_sum_test_scripts_release { +function gg_sum_test_scripts { gg_printf '### %s\n\n' "${ci}" - gg_printf 'Runs test scripts in release mode\n' + gg_printf 'Runs test scripts\n' gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" gg_printf '```\n' gg_printf '%s\n' "$(cat $OUT/${ci}-scripts.log)" @@ -627,8 +603,7 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then test $ret -eq 0 && gg_run rerank_tiny if [ -z ${GG_BUILD_CLOUD} ] || [ ${GG_BUILD_EXTRA_TESTS_0} ]; then - test $ret -eq 0 && gg_run test_scripts_debug - test $ret -eq 0 && gg_run test_scripts_release + test $ret -eq 0 && gg_run test_scripts fi test $ret -eq 0 && gg_run qwen3_0_6b diff --git a/tools/gguf-split/tests.sh b/tools/gguf-split/tests.sh index 7643dee8fcb15..e8677018f55f2 100755 --- a/tools/gguf-split/tests.sh +++ b/tools/gguf-split/tests.sh @@ -41,17 +41,17 @@ echo PASS echo # 2b. Test the sharded model is loading properly -$MAIN -no-cnv --model $WORK_PATH/ggml-model-split-00001-of-00006.gguf --n-predict 32 +$MAIN -no-cnv --model $WORK_PATH/ggml-model-split-00001-of-00012.gguf -p "I believe the meaning of life is" --n-predict 32 echo PASS echo # 3. Merge -$SPLIT --merge $WORK_PATH/ggml-model-split-00001-of-00006.gguf $WORK_PATH/ggml-model-merge.gguf +$SPLIT --merge $WORK_PATH/ggml-model-split-00001-of-00012.gguf $WORK_PATH/ggml-model-merge.gguf echo PASS echo # 3b. Test the merged model is loading properly -$MAIN -no-cnv --model $WORK_PATH/ggml-model-merge.gguf --n-predict 32 +$MAIN -no-cnv --model $WORK_PATH/ggml-model-merge.gguf -p "I believe the meaning of life is" --n-predict 32 echo PASS echo @@ -61,12 +61,12 @@ echo PASS echo # 4b. Test the sharded model is loading properly -$MAIN -no-cnv --model $WORK_PATH/ggml-model-split-32-tensors-00001-of-00007.gguf --n-predict 32 +$MAIN -no-cnv --model $WORK_PATH/ggml-model-split-32-tensors-00001-of-00011.gguf -p "I believe the meaning of life is" --n-predict 32 echo PASS echo # 5. Merge -#$SPLIT --merge $WORK_PATH/ggml-model-split-32-tensors-00001-of-00006.gguf $WORK_PATH/ggml-model-merge-2.gguf +#$SPLIT --merge $WORK_PATH/ggml-model-split-32-tensors-00001-of-00012.gguf $WORK_PATH/ggml-model-merge-2.gguf #echo PASS #echo @@ -76,12 +76,12 @@ echo #echo # 6. Split with size strategy -$SPLIT --split-max-size 2G $WORK_PATH/ggml-model-merge.gguf $WORK_PATH/ggml-model-split-2G +$SPLIT --split-max-size 500M $WORK_PATH/ggml-model-merge.gguf $WORK_PATH/ggml-model-split-500M echo PASS echo # 6b. Test the sharded model is loading properly -$MAIN -no-cnv --model $WORK_PATH/ggml-model-split-2G-00001-of-00002.gguf --n-predict 32 +$MAIN -no-cnv --model $WORK_PATH/ggml-model-split-500M-00001-of-00002.gguf -p "I believe the meaning of life is" --n-predict 32 echo PASS echo diff --git a/tools/quantize/tests.sh b/tools/quantize/tests.sh index df90900933396..acc54fd9b1594 100644 --- a/tools/quantize/tests.sh +++ b/tools/quantize/tests.sh @@ -42,22 +42,22 @@ echo PASS echo # 3. Requant model with '--keep-split' -$QUANTIZE --allow-requantize --keep-split $WORK_PATH/ggml-model-split-00001-of-00006.gguf $WORK_PATH/ggml-model-requant.gguf Q4_K +$QUANTIZE --allow-requantize --keep-split $WORK_PATH/ggml-model-split-00001-of-00012.gguf $WORK_PATH/ggml-model-requant.gguf Q4_K echo PASS echo # 3a. Test the requanted model is loading properly -$MAIN -no-cnv --model $WORK_PATH/ggml-model-requant-00001-of-00006.gguf --n-predict 32 +$MAIN -no-cnv --model $WORK_PATH/ggml-model-requant-00001-of-00012.gguf -p "I believe the meaning of life is" --n-predict 32 echo PASS echo # 4. Requant mode without '--keep-split' -$QUANTIZE --allow-requantize $WORK_PATH/ggml-model-split-00001-of-00006.gguf $WORK_PATH/ggml-model-requant-merge.gguf Q4_K +$QUANTIZE --allow-requantize $WORK_PATH/ggml-model-split-00001-of-00012.gguf $WORK_PATH/ggml-model-requant-merge.gguf Q4_K echo PASS echo # 4b. Test the requanted model is loading properly -$MAIN -no-cnv --model $WORK_PATH/ggml-model-requant-merge.gguf --n-predict 32 +$MAIN -no-cnv --model $WORK_PATH/ggml-model-requant-merge.gguf -p "I believe the meaning of life is" --n-predict 32 echo PASS echo