From 9f75377cebf11205eafc8f7f0d686ce4d07a8846 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Mon, 22 Sep 2025 08:50:39 +0300
Subject: [PATCH 1/2] ci : switch from gemma to qwen3 0.6b

---
 tools/gguf-split/tests.sh | 4 ++--
 tools/quantize/tests.sh   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/gguf-split/tests.sh b/tools/gguf-split/tests.sh
index c9ad85da0f1f3..7643dee8fcb15 100755
--- a/tools/gguf-split/tests.sh
+++ b/tools/gguf-split/tests.sh
@@ -31,12 +31,12 @@ rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-merge*.gguf
 # 1. Get a model
 (
 cd $WORK_PATH
-"$ROOT_DIR"/scripts/hf.sh --repo ggml-org/gemma-1.1-2b-it-Q8_0-GGUF --file gemma-1.1-2b-it.Q8_0.gguf
+"$ROOT_DIR"/scripts/hf.sh --repo ggml-org/Qwen3-0.6B-GGUF --file Qwen3-0.6B-Q8_0.gguf
 )
 echo PASS
 
 # 2. Split with max tensors strategy
-$SPLIT --split-max-tensors 28  $WORK_PATH/gemma-1.1-2b-it.Q8_0.gguf $WORK_PATH/ggml-model-split
+$SPLIT --split-max-tensors 28  $WORK_PATH/Qwen3-0.6B-Q8_0.gguf $WORK_PATH/ggml-model-split
 echo PASS
 echo
 
diff --git a/tools/quantize/tests.sh b/tools/quantize/tests.sh
index ba96161484232..df90900933396 100644
--- a/tools/quantize/tests.sh
+++ b/tools/quantize/tests.sh
@@ -32,12 +32,12 @@ rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-requant*.gguf
 # 1. Get a model
 (
 cd $WORK_PATH
-"$ROOT_DIR"/scripts/hf.sh --repo ggml-org/gemma-1.1-2b-it-Q8_0-GGUF --file gemma-1.1-2b-it.Q8_0.gguf
+"$ROOT_DIR"/scripts/hf.sh --repo ggml-org/Qwen3-0.6B-GGUF --file Qwen3-0.6B-Q8_0.gguf
 )
 echo PASS
 
 # 2. Split model
-$SPLIT --split-max-tensors 28  $WORK_PATH/gemma-1.1-2b-it.Q8_0.gguf $WORK_PATH/ggml-model-split
+$SPLIT --split-max-tensors 28  $WORK_PATH/Qwen3-0.6B-Q8_0.gguf $WORK_PATH/ggml-model-split
 echo PASS
 echo
 

From 0d6a7dfe8e12ae04edcacee8d8007edd50bf6d71 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Mon, 22 Sep 2025 09:08:58 +0300
Subject: [PATCH 2/2] ci : use smaller model for some tests

---
 ci/run.sh                 | 35 +++++------------------------------
 tools/gguf-split/tests.sh | 14 +++++++-------
 tools/quantize/tests.sh   |  8 ++++----
 3 files changed, 16 insertions(+), 41 deletions(-)

diff --git a/ci/run.sh b/ci/run.sh
index 09417ef619025..cb90f7a7850fc 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -210,33 +210,9 @@ function gg_sum_ctest_release {
     gg_printf '```\n'
 }
 
-# test_scripts_debug
+# test_scripts
 
-function gg_run_test_scripts_debug {
-    cd ${SRC}
-
-    set -e
-
-    (cd ./tools/gguf-split && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
-    (cd ./tools/quantize   && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
-
-    set +e
-}
-
-function gg_sum_test_scripts_debug {
-    gg_printf '### %s\n\n' "${ci}"
-
-    gg_printf 'Runs test scripts in debug mode\n'
-    gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
-    gg_printf '```\n'
-    gg_printf '%s\n' "$(cat $OUT/${ci}-scripts.log)"
-    gg_printf '```\n'
-    gg_printf '\n'
-}
-
-# test_scripts_release
-
-function gg_run_test_scripts_release {
+function gg_run_test_scripts {
     cd ${SRC}
 
     set -e
@@ -247,10 +223,10 @@ function gg_run_test_scripts_release {
     set +e
 }
 
-function gg_sum_test_scripts_release {
+function gg_sum_test_scripts {
     gg_printf '### %s\n\n' "${ci}"
 
-    gg_printf 'Runs test scripts in release mode\n'
+    gg_printf 'Runs test scripts\n'
     gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
     gg_printf '```\n'
     gg_printf '%s\n' "$(cat $OUT/${ci}-scripts.log)"
@@ -627,8 +603,7 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
     test $ret -eq 0 && gg_run rerank_tiny
 
     if [ -z ${GG_BUILD_CLOUD} ] || [ ${GG_BUILD_EXTRA_TESTS_0} ]; then
-        test $ret -eq 0 && gg_run test_scripts_debug
-        test $ret -eq 0 && gg_run test_scripts_release
+        test $ret -eq 0 && gg_run test_scripts
     fi
 
     test $ret -eq 0 && gg_run qwen3_0_6b
diff --git a/tools/gguf-split/tests.sh b/tools/gguf-split/tests.sh
index 7643dee8fcb15..e8677018f55f2 100755
--- a/tools/gguf-split/tests.sh
+++ b/tools/gguf-split/tests.sh
@@ -41,17 +41,17 @@ echo PASS
 echo
 
 # 2b. Test the sharded model is loading properly
-$MAIN -no-cnv --model $WORK_PATH/ggml-model-split-00001-of-00006.gguf --n-predict 32
+$MAIN -no-cnv --model $WORK_PATH/ggml-model-split-00001-of-00012.gguf -p "I believe the meaning of life is" --n-predict 32
 echo PASS
 echo
 
 # 3. Merge
-$SPLIT --merge $WORK_PATH/ggml-model-split-00001-of-00006.gguf $WORK_PATH/ggml-model-merge.gguf
+$SPLIT --merge $WORK_PATH/ggml-model-split-00001-of-00012.gguf $WORK_PATH/ggml-model-merge.gguf
 echo PASS
 echo
 
 # 3b. Test the merged model is loading properly
-$MAIN -no-cnv --model $WORK_PATH/ggml-model-merge.gguf --n-predict 32
+$MAIN -no-cnv --model $WORK_PATH/ggml-model-merge.gguf -p "I believe the meaning of life is" --n-predict 32
 echo PASS
 echo
 
@@ -61,12 +61,12 @@ echo PASS
 echo
 
 # 4b. Test the sharded model is loading properly
-$MAIN -no-cnv --model $WORK_PATH/ggml-model-split-32-tensors-00001-of-00007.gguf --n-predict 32
+$MAIN -no-cnv --model $WORK_PATH/ggml-model-split-32-tensors-00001-of-00011.gguf -p "I believe the meaning of life is" --n-predict 32
 echo PASS
 echo
 
 # 5. Merge
-#$SPLIT --merge $WORK_PATH/ggml-model-split-32-tensors-00001-of-00006.gguf $WORK_PATH/ggml-model-merge-2.gguf
+#$SPLIT --merge $WORK_PATH/ggml-model-split-32-tensors-00001-of-00012.gguf $WORK_PATH/ggml-model-merge-2.gguf
 #echo PASS
 #echo
 
@@ -76,12 +76,12 @@ echo
 #echo
 
 # 6. Split with size strategy
-$SPLIT --split-max-size 2G $WORK_PATH/ggml-model-merge.gguf $WORK_PATH/ggml-model-split-2G
+$SPLIT --split-max-size 500M $WORK_PATH/ggml-model-merge.gguf $WORK_PATH/ggml-model-split-500M
 echo PASS
 echo
 
 # 6b. Test the sharded model is loading properly
-$MAIN -no-cnv --model $WORK_PATH/ggml-model-split-2G-00001-of-00002.gguf --n-predict 32
+$MAIN -no-cnv --model $WORK_PATH/ggml-model-split-500M-00001-of-00002.gguf -p "I believe the meaning of life is" --n-predict 32
 echo PASS
 echo
 
diff --git a/tools/quantize/tests.sh b/tools/quantize/tests.sh
index df90900933396..acc54fd9b1594 100644
--- a/tools/quantize/tests.sh
+++ b/tools/quantize/tests.sh
@@ -42,22 +42,22 @@ echo PASS
 echo
 
 # 3. Requant model with '--keep-split'
-$QUANTIZE --allow-requantize --keep-split $WORK_PATH/ggml-model-split-00001-of-00006.gguf $WORK_PATH/ggml-model-requant.gguf Q4_K
+$QUANTIZE --allow-requantize --keep-split $WORK_PATH/ggml-model-split-00001-of-00012.gguf $WORK_PATH/ggml-model-requant.gguf Q4_K
 echo PASS
 echo
 
 # 3a. Test the requanted model is loading properly
-$MAIN -no-cnv --model $WORK_PATH/ggml-model-requant-00001-of-00006.gguf --n-predict 32
+$MAIN -no-cnv --model $WORK_PATH/ggml-model-requant-00001-of-00012.gguf -p "I believe the meaning of life is" --n-predict 32
 echo PASS
 echo
 
 # 4. Requant mode without '--keep-split'
-$QUANTIZE --allow-requantize $WORK_PATH/ggml-model-split-00001-of-00006.gguf $WORK_PATH/ggml-model-requant-merge.gguf Q4_K
+$QUANTIZE --allow-requantize $WORK_PATH/ggml-model-split-00001-of-00012.gguf $WORK_PATH/ggml-model-requant-merge.gguf Q4_K
 echo PASS
 echo
 
 # 4b. Test the requanted model is loading properly
-$MAIN -no-cnv --model $WORK_PATH/ggml-model-requant-merge.gguf --n-predict 32
+$MAIN -no-cnv --model $WORK_PATH/ggml-model-requant-merge.gguf -p "I believe the meaning of life is" --n-predict 32
 echo PASS
 echo