From daedf7ce97caacbd927044e96ba4ec889522624b Mon Sep 17 00:00:00 2001
From: Pavol Rusnak <pavol@rusnak.io>
Date: Mon, 13 Mar 2023 13:49:46 +0100
Subject: [PATCH 1/5] Add quantize script for batch quantization

---
 quantize.sh | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100755 quantize.sh

diff --git a/quantize.sh b/quantize.sh
new file mode 100755
index 0000000000000..591cb9890249c
--- /dev/null
+++ b/quantize.sh
@@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+
+if ! [[ "$1" =~ ^[0-9]{1,2}B$ ]]; then
+  echo
+  echo "Usage: quantize.sh 7B|13B|30B|65B [--remove-f16]"
+  echo
+  exit 1
+fi
+
+for i in "models/$1/ggml-model-f16.bin*"; do
+  ./quantize "$i" "${i/f16/q4_0}" 2
+  if [[ "$2" == "--remove-f16" ]]; then
+    rm "$i"
+  fi
+done

From d328973aa1b8ecddc6410086cfa1d64d1d455721 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Mon, 13 Mar 2023 18:05:18 +0200
Subject: [PATCH 2/5] Indentation

---
 quantize.sh | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/quantize.sh b/quantize.sh
index 591cb9890249c..f2aea1659597b 100755
--- a/quantize.sh
+++ b/quantize.sh
@@ -1,15 +1,15 @@
 #!/usr/bin/env bash
 
 if ! [[ "$1" =~ ^[0-9]{1,2}B$ ]]; then
-  echo
-  echo "Usage: quantize.sh 7B|13B|30B|65B [--remove-f16]"
-  echo
-  exit 1
+    echo
+    echo "Usage: quantize.sh 7B|13B|30B|65B [--remove-f16]"
+    echo
+    exit 1
 fi
 
 for i in "models/$1/ggml-model-f16.bin*"; do
-  ./quantize "$i" "${i/f16/q4_0}" 2
-  if [[ "$2" == "--remove-f16" ]]; then
-    rm "$i"
-  fi
+    ./quantize "$i" "${i/f16/q4_0}" 2
+    if [[ "$2" == "--remove-f16" ]]; then
+        rm "$i"
+    fi
 done

From 5f5332b4ef860152e720e9ef859f445eaad542cd Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Mon, 13 Mar 2023 18:08:14 +0200
Subject: [PATCH 3/5] README for new quantize.sh

---
 README.md | 34 +++-------------------------------
 1 file changed, 3 insertions(+), 31 deletions(-)

diff --git a/README.md b/README.md
index 3a6d757d636e0..478f9985b8a5e 100644
--- a/README.md
+++ b/README.md
@@ -145,44 +145,16 @@ python3 -m pip install torch numpy sentencepiece
 python3 convert-pth-to-ggml.py models/7B/ 1
 
 # quantize the model to 4-bits
-./quantize ./models/7B/ggml-model-f16.bin ./models/7B/ggml-model-q4_0.bin 2
+./quantize 7B
 
 # run the inference
 ./main -m ./models/7B/ggml-model-q4_0.bin -t 8 -n 128
 ```
 
-For the bigger models, there are a few extra quantization steps. For example, for LLaMA-13B, converting to FP16 format
-will create 2 ggml files, instead of one:
-
-```bash
-ggml-model-f16.bin
-ggml-model-f16.bin.1
-```
-
-You need to quantize each of them separately like this:
-
-```bash
-./quantize ./models/13B/ggml-model-f16.bin   ./models/13B/ggml-model-q4_0.bin 2
-./quantize ./models/13B/ggml-model-f16.bin.1 ./models/13B/ggml-model-q4_0.bin.1 2
-```
-
-Everything else is the same. Simply run:
-
-```bash
-./main -m ./models/13B/ggml-model-q4_0.bin -t 8 -n 128
-```
-
-The number of files generated for each model is as follows:
-
-```
-7B  -> 1 file
-13B -> 2 files
-30B -> 4 files
-65B -> 8 files
-```
-
 When running the larger models, make sure you have enough disk space to store all the intermediate files.
 
+TODO: add model disk/mem requirements
+
 ### Interactive mode
 
 If you want a more ChatGPT-like experience, you can run in interactive mode by passing `-i` as a parameter.

From 23d334b660525cc3f55b7facf4fe2afa679b5b71 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Mon, 13 Mar 2023 18:09:18 +0200
Subject: [PATCH 4/5] Fix script name

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 478f9985b8a5e..65be1a687dbd6 100644
--- a/README.md
+++ b/README.md
@@ -145,7 +145,7 @@ python3 -m pip install torch numpy sentencepiece
 python3 convert-pth-to-ggml.py models/7B/ 1
 
 # quantize the model to 4-bits
-./quantize 7B
+./quantize.sh 7B
 
 # run the inference
 ./main -m ./models/7B/ggml-model-q4_0.bin -t 8 -n 128

From acf35ec45a5f535ccba44945c457fe5e149db5fc Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Mon, 13 Mar 2023 18:14:22 +0200
Subject: [PATCH 5/5] Fix file list on Mac OS

---
 quantize.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/quantize.sh b/quantize.sh
index f2aea1659597b..6194649b3f529 100755
--- a/quantize.sh
+++ b/quantize.sh
@@ -7,7 +7,7 @@ if ! [[ "$1" =~ ^[0-9]{1,2}B$ ]]; then
     exit 1
 fi
 
-for i in "models/$1/ggml-model-f16.bin*"; do
+for i in `ls models/$1/ggml-model-f16.bin*`; do
     ./quantize "$i" "${i/f16/q4_0}" 2
     if [[ "$2" == "--remove-f16" ]]; then
         rm "$i"