From daedf7ce97caacbd927044e96ba4ec889522624b Mon Sep 17 00:00:00 2001 From: Pavol Rusnak Date: Mon, 13 Mar 2023 13:49:46 +0100 Subject: [PATCH 1/5] Add quantize script for batch quantization --- quantize.sh | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100755 quantize.sh diff --git a/quantize.sh b/quantize.sh new file mode 100755 index 0000000000000..591cb9890249c --- /dev/null +++ b/quantize.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +if ! [[ "$1" =~ ^[0-9]{1,2}B$ ]]; then + echo + echo "Usage: quantize.sh 7B|13B|30B|65B [--remove-f16]" + echo + exit 1 +fi + +for i in "models/$1/ggml-model-f16.bin*"; do + ./quantize "$i" "${i/f16/q4_0}" 2 + if [[ "$2" == "--remove-f16" ]]; then + rm "$i" + fi +done From d328973aa1b8ecddc6410086cfa1d64d1d455721 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 13 Mar 2023 18:05:18 +0200 Subject: [PATCH 2/5] Indentation --- quantize.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/quantize.sh b/quantize.sh index 591cb9890249c..f2aea1659597b 100755 --- a/quantize.sh +++ b/quantize.sh @@ -1,15 +1,15 @@ #!/usr/bin/env bash if ! [[ "$1" =~ ^[0-9]{1,2}B$ ]]; then - echo - echo "Usage: quantize.sh 7B|13B|30B|65B [--remove-f16]" - echo - exit 1 + echo + echo "Usage: quantize.sh 7B|13B|30B|65B [--remove-f16]" + echo + exit 1 fi for i in "models/$1/ggml-model-f16.bin*"; do - ./quantize "$i" "${i/f16/q4_0}" 2 - if [[ "$2" == "--remove-f16" ]]; then - rm "$i" - fi + ./quantize "$i" "${i/f16/q4_0}" 2 + if [[ "$2" == "--remove-f16" ]]; then + rm "$i" + fi done From 5f5332b4ef860152e720e9ef859f445eaad542cd Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 13 Mar 2023 18:08:14 +0200 Subject: [PATCH 3/5] README for new quantize.sh --- README.md | 34 +++------------------------------- 1 file changed, 3 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 3a6d757d636e0..478f9985b8a5e 100644 --- a/README.md +++ b/README.md @@ -145,44 +145,16 @@ python3 -m pip install torch numpy sentencepiece python3 convert-pth-to-ggml.py models/7B/ 1 # quantize the model to 4-bits -./quantize ./models/7B/ggml-model-f16.bin ./models/7B/ggml-model-q4_0.bin 2 +./quantize 7B # run the inference ./main -m ./models/7B/ggml-model-q4_0.bin -t 8 -n 128 ``` -For the bigger models, there are a few extra quantization steps. For example, for LLaMA-13B, converting to FP16 format -will create 2 ggml files, instead of one: - -```bash -ggml-model-f16.bin -ggml-model-f16.bin.1 -``` - -You need to quantize each of them separately like this: - -```bash -./quantize ./models/13B/ggml-model-f16.bin ./models/13B/ggml-model-q4_0.bin 2 -./quantize ./models/13B/ggml-model-f16.bin.1 ./models/13B/ggml-model-q4_0.bin.1 2 -``` - -Everything else is the same. Simply run: - -```bash -./main -m ./models/13B/ggml-model-q4_0.bin -t 8 -n 128 -``` - -The number of files generated for each model is as follows: - -``` -7B -> 1 file -13B -> 2 files -30B -> 4 files -65B -> 8 files -``` - When running the larger models, make sure you have enough disk space to store all the intermediate files. +TODO: add model disk/mem requirements + ### Interactive mode If you want a more ChatGPT-like experience, you can run in interactive mode by passing `-i` as a parameter. From 23d334b660525cc3f55b7facf4fe2afa679b5b71 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 13 Mar 2023 18:09:18 +0200 Subject: [PATCH 4/5] Fix script name --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 478f9985b8a5e..65be1a687dbd6 100644 --- a/README.md +++ b/README.md @@ -145,7 +145,7 @@ python3 -m pip install torch numpy sentencepiece python3 convert-pth-to-ggml.py models/7B/ 1 # quantize the model to 4-bits -./quantize 7B +./quantize.sh 7B # run the inference ./main -m ./models/7B/ggml-model-q4_0.bin -t 8 -n 128 From acf35ec45a5f535ccba44945c457fe5e149db5fc Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 13 Mar 2023 18:14:22 +0200 Subject: [PATCH 5/5] Fix file list on Mac OS --- quantize.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quantize.sh b/quantize.sh index f2aea1659597b..6194649b3f529 100755 --- a/quantize.sh +++ b/quantize.sh @@ -7,7 +7,7 @@ if ! [[ "$1" =~ ^[0-9]{1,2}B$ ]]; then exit 1 fi -for i in "models/$1/ggml-model-f16.bin*"; do +for i in `ls models/$1/ggml-model-f16.bin*`; do ./quantize "$i" "${i/f16/q4_0}" 2 if [[ "$2" == "--remove-f16" ]]; then rm "$i"