Initial version

countzero · Nov 28, 2023 · 5a612e0 · 5a612e0
1 parent 27f09fd
commit 5a612e0
Show file tree

Hide file tree

Showing 10 changed files with 178 additions and 33 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1,4 @@
+# This overrides the core.autocrlf setting - http://git-scm.com/docs/gitattributes
+# Set default behaviour, in case users don't have core.autocrlf set.
+# We default to Unix line endings (LF) because the exceptions from this are rare.
+* text=auto eol=lf
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,7 @@
+#
+# .gitignore
+#
+
+*.sublime-workspace
+
+.env
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,13 @@
+# Changelog
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [1.0.0] - 2023-11-28
+
+### Added
+- Add .env configuration
+- Add Documentation
+- Add download script
+- Add quantization script
diff --git a/README.md b/README.md
@@ -1,2 +1,107 @@
-# windows_manage_llms
-PowerShell automation to download large language models (LLMs) from Git repositories and quantize them with llama.cpp into the GGUF format.
+# Windows Manage Large Language Models
+
+PowerShell automation to download large language models (LLMs) via Git and quantize them with llama.cpp to the `GGUF` format.
+
+Think batch quantization like https://huggingface.co/TheBloke does it, but on your local machine :wink:
+
+## Features
+
+- Easy configuration via a `.env` file
+- Automates the synchronization of Git repositories containing large files (LFS)
+- Only fetches one LFS object at a time
+- Displays a progress indicator on downloading LFS objects
+- Automates the quantization from the source models
+- Handles the intermediate files during quantization to reduce disk usage
+- Improves quantization speed by separating read from write loads
+
+## Installation
+
+### Prerequisites
+
+Use https://github.com/countzero/windows_llama.cpp to compile a specific version of the [llama.cpp](https://github.com/ggerganov/llama.cpp) project on your machine.
+
+
+### Clone the repository from GitHub
+
+Clone the repository to a nice place on your machine via:
+
+```PowerShell
+git clone git@github.com:countzero/windows_manage_large_language_models.git
+```
+
+### Create a .env file
+
+Create the following `.env` file in the project directory. Make sure to change the `LLAMA_CPP_DIRECTORY` value.
+
+```Env
+# Path to the llama.cpp project that contains the
+# convert.py script and the quantize.exe binary.
+LLAMA_CPP_DIRECTORY=C:\windows_llama.cpp\vendor\llama.cpp
+
+# Path to the Git repositories containing the models.
+SOURCE_DIRECTORY=.\source
+
+# Path to the quantized models in GGUF format.
+TARGET_DIRECTORY=.\gguf
+
+# Path to the cache directory for intermediate files.
+#
+# Hint: Ideally this should be located on a different
+# physical drive to improve the quantization speed.
+CACHE_DIRECTORY=.\cache
+
+#
+# Comma separated list of quantization types.
+#
+# Possible llama.cpp quantization types:
+#
+#     Q2_K   :  2.63G, +0.6717 ppl @ LLaMA-v1-7B
+#     Q3_K_S :  2.75G, +0.5551 ppl @ LLaMA-v1-7B
+#     Q3_K_M :  3.07G, +0.2496 ppl @ LLaMA-v1-7B
+#     Q3_K_L :  3.35G, +0.1764 ppl @ LLaMA-v1-7B
+#     Q4_0   :  3.56G, +0.2166 ppl @ LLaMA-v1-7B
+#     Q4_1   :  3.90G, +0.1585 ppl @ LLaMA-v1-7B
+#     Q4_K_S :  3.59G, +0.0992 ppl @ LLaMA-v1-7B
+#     Q4_K_M :  3.80G, +0.0532 ppl @ LLaMA-v1-7B
+#     Q5_0   :  4.33G, +0.0683 ppl @ LLaMA-v1-7B
+#     Q5_1   :  4.70G, +0.0349 ppl @ LLaMA-v1-7B
+#     Q5_K_S :  4.33G, +0.0400 ppl @ LLaMA-v1-7B
+#     Q5_K_M :  4.45G, +0.0122 ppl @ LLaMA-v1-7B
+#     Q6_K   :  5.15G, -0.0008 ppl @ LLaMA-v1-7B
+#     Q8_0   :  6.70G, +0.0004 ppl @ LLaMA-v1-7B
+#     F16    : 13.00G              @ 7B
+#     F32    : 26.00G              @ 7B
+#     COPY   : only copy tensors, no quantizing
+#
+# Hint: The sweet spot is Q4_K_M.
+#
+QUANTIZATION_TYPES=q4_K_M,q2_K
+```
+
+## Usage
+
+### Clone a model
+
+Clone a Git repository containing an LLM into the `SOURCE_DIRECTORY` without checking out any files and downloading any large files (lfs).
+
+```PowerShell
+git -C "./source" clone --no-checkout https://huggingface.co/microsoft/Orca-2-7b
+```
+
+### Download model sources
+
+Download all files across all Git repositories that are inside the `SOURCE_DIRECTORY`.
+
+```PowerShell
+./download_model_sources.ps1
+```
+
+**Hint:** This can also be used to update already existing sources from the remote repositories.
+
+### Quantize models
+
+Quantize all model weights that are inside the `SOURCE_DIRECTORY` into the `TARGET_DIRECTORY` to create a specific `GGUF` file for each `QUANTIZATION_TYPES`.
+
+```PowerShell
+./quantize_weights_for_llama.cpp.ps1
+```
diff --git a/cache/.gitignore b/cache/.gitignore
@@ -0,0 +1,3 @@
+# Ignore everything in this directory except this file.
+*
+!.gitignore
diff --git a/download_model_sources.ps1 b/download_model_sources.ps1
@@ -1,6 +1,17 @@
 $stopwatch = [System.Diagnostics.Stopwatch]::startNew()
 
-$sourceDirectory = "R:\AI\LLM\source"
+Get-Content "./.env" | ForEach {
+
+    $name, $value = $_.split('=', 2)
+
+    if ([string]::IsNullOrWhiteSpace($name) -or $name.Contains('#')) {
+        return
+    }
+
+    Set-Content env:\$name $value
+}
+
+$sourceDirectory = $env:SOURCE_DIRECTORY
 
 $naturalSort = { [regex]::Replace($_, '\d+', { $args[0].Value.PadLeft(20) }) }
 

diff --git a/gguf/.gitignore b/gguf/.gitignore
@@ -0,0 +1,3 @@
+# Ignore everything in this directory except this file.
+*
+!.gitignore
diff --git a/quantize_weights_for_llama.cpp.ps1 b/quantize_weights_for_llama.cpp.ps1
@@ -1,35 +1,25 @@
 $stopwatch = [System.Diagnostics.Stopwatch]::startNew()
 
-$llamaCppDirectory = "D:\Privat\GitHub\windows_llama.cpp\vendor\llama.cpp"
-$sourceDirectory = "R:\AI\LLM\source"
-$targetDirectory = "R:\AI\LLM\gguf"
-$cacheDirectory = "E:\cache"
-
-$exclude = @()
-
-$types = @(
-    # "q2_K"
-    # "q3_K"
-    # "q3_K_L"
-    # "q3_K_M"
-    # "q3_K_S"
-    # "q4_0"
-    # "q4_1"
-    # "q4_K"
-    "q4_K_M"
-    # "q4_K_S"
-    # "q5_0"
-    # "q5_1"
-    # "q5_K"
-    # "q5_K_M"
-    # "q5_K_S"
-    # "q6_K"
-    # "q8_0"
-)
+Get-Content "./.env" | ForEach {
+
+    $name, $value = $_.split('=', 2)
+
+    if ([string]::IsNullOrWhiteSpace($name) -or $name.Contains('#')) {
+        return
+    }
+
+    Set-Content env:\$name $value
+}
+
+$llamaCppDirectory = $env:LLAMA_CPP_DIRECTORY
+$sourceDirectory = $env:SOURCE_DIRECTORY
+$targetDirectory = $env:TARGET_DIRECTORY
+$cacheDirectory = $env:CACHE_DIRECTORY
+$quantizationTypes = $env:QUANTIZATION_TYPES -split ','
 
 $naturalSort = { [regex]::Replace($_, '\d+', { $args[0].Value.PadLeft(20) }) }
 
-$repositoryDirectories = Get-ChildItem -Directory $sourceDirectory -Exclude $exclude -Name | Sort-Object $naturalSort
+$repositoryDirectories = Get-ChildItem -Directory $sourceDirectory -Name | Sort-Object $naturalSort
 
 Write-Host "Quantizing $($repositoryDirectories.Length) large language models." -ForegroundColor "Yellow"
 
@@ -46,11 +36,9 @@ ForEach ($repositoryName in $repositoryDirectories) {
 
     Write-Host "Working on ${repositoryName}..." -ForegroundColor "DarkYellow"
 
-    # We are creating the intermediate unquantized model in a dedicated cache directory
-    # so that it can be locatend on another drive to improve the quantization speed.
     $unquantizedModelPath = Join-Path -Path $cacheDirectory -ChildPath "${repositoryName}.model-unquantized.gguf"
 
-    ForEach ($type in $types) {
+    ForEach ($type in $quantizationTypes) {
 
         $quantizedModelPath = Join-Path -Path $targetDirectoryPath -ChildPath "model-quantized-${type}.gguf"
 

diff --git a/source/.gitignore b/source/.gitignore
@@ -0,0 +1,3 @@
+# Ignore everything in this directory except this file.
+*
+!.gitignore
diff --git a/windows_manage_large_language_models.sublime-project b/windows_manage_large_language_models.sublime-project
@@ -0,0 +1,8 @@
+{
+	"folders":
+	[
+		{
+			"path": "."
+		}
+	]
+}