janhq · hiro-v · Mar 11, 2024 · Mar 12, 2024 · Mar 12, 2024
diff --git a/.github/workflows/windows-build-release.yml b/.github/workflows/windows-build-release.yml
@@ -84,7 +84,7 @@ jobs:
           cp -Force C:\workspace\cuDNN\bin\cudnn64_8.dll .\build_nitro
           ls .\build_nitro
           dotnet tool install --global AzureSignTool
-          %USERPROFILE%\.dotnet\tools\azuresigntool.exe sign -kvu "${{ secrets.AZURE_KEY_VAULT_URI }}" -kvi "${{ secrets.AZURE_CLIENT_ID }}" -kvt "${{ secrets.AZURE_TENANT_ID }}" -kvs "${{ secrets.AZURE_CLIENT_SECRET }}" -kvc ${{ secrets.AZURE_CERT_NAME }} -tr http://timestamp.globalsign.com/tsa/r6advanced1 -v ".\build_nitro\nitro.exe"
+          C:\Users\ContainerAdministrator\.dotnet\tools\azuresigntool.exe sign -kvu "${{ secrets.AZURE_KEY_VAULT_URI }}" -kvi "${{ secrets.AZURE_CLIENT_ID }}" -kvt "${{ secrets.AZURE_TENANT_ID }}" -kvs "${{ secrets.AZURE_CLIENT_SECRET }}" -kvc ${{ secrets.AZURE_CERT_NAME }} -tr http://timestamp.globalsign.com/tsa/r6advanced1 -v ".\build_nitro\nitro.exe"
           tar -czvf nitro.tar.gz .\build_nitro
 
       - uses: actions/upload-release-asset@v1.0.1

diff --git a/BUILD_ENGINE_MODEL.md b/BUILD_ENGINE_MODEL.md
@@ -0,0 +1,63 @@
+# Build engine for NVIDIA TensorRT-LLM on Windows
+
+## Automated script
+
+- Open Powershell (Administrator)
+- Run command:
+
+```ps1
+.\TensorRT-LLM\windows\setup_env.ps1
+```
+
+- CUDA 12.2
+- Python 3.10
+- Miscrosoft MPI
+- TensorRT 9.2
+- CuDNN 8.9
+
+## Manual step by step
+
+- Step 1: Prepare environment with command
+
+```ps1
+.\TensorRT-LLM\windows\setup_env.ps1
+```
+
+- Step 2: Verify that TensorRT-LLM and TensorRT can run fine on your machine by running these 2 command
+
+```ps1
+python -c "import tensorrt as trt; print(trt.__version__)"
+python -c "import tensorrt_llm; print(tensorrt_llm._utils.trt_version())"
+```
+
+- Step 2: Download huggingface model
+
+```ps1
+pip install -U "huggingface_hub[cli]" hf_transfer
+# Create folder to store model
+mkdir model
+mkdir checkpoint
+mkdir engine
+
+# Download model to the folder `model\` with huggingface model handler (e.g jan-hq/stealth-v1.2)
+set HF_HUB_ENABLE_HF_TRANSFER=1 && huggingface-cli download --repo-type model --local-dir .\model <model handler>
+
+cd examples\llama
+# At this step, you can choose whether to run the model at INT4 for FP16, choose either next step
+
+# For FP16 option
+python convert_checkpoint.py --model_dir ..\..\..\model --output_dir ..\..\..\checkpoint --dtype float16
+
+# For INT4 option (currently it has a problem with `nvidia-anmo`, have to check)
+python ../quantization/quantize.py --model_dir ..\..\..\model --output_dir ..\..\..\checkpoint --dtype float16 --dtype float16 --awq_block_size 128 --kv_cache_dtype int8 --calib_size 32
+
+# Build the engine for TensorRT-LLM to use from either FP16 or INT4 options
+trtllm-build --checkpoint_dir ..\..\..\checkpoint --output_dir ..\..\..\engine --gemm_plugin float16
+
+# Prepare the tokenizers related files
+cp ..\..\..\model/tokenizer.json ..\..\..\engine
+cp ..\..\..\model/tokenizer.model ..\..\..\engine
+cp ..\..\..\model/tokenizer_config.json ..\..\..\engine
+```
+
+- Step 3: The `engine` folder is ready to be used with TensorRT-LLM engine
diff --git a/windows/setup_env.ps1 b/windows/setup_env.ps1
@@ -2,7 +2,9 @@
 param (
     [switch]$skipCUDA,
     [switch]$skipPython,
-    [switch]$skipMPI
+    [switch]$skipMPI,
+    [switch]$skipCuDNN,
+    [switch]$skipTensorRT
 )
 
 # Set the error action preference to 'Stop' for the entire script.
@@ -58,4 +60,49 @@ if (-not ($skipMPI)) {
     Write-Output "Skipping MPI installation"
 }
 
-# TODO Automate cuDNN installation
+# Function to safely add a path to the system PATH environment variable without exceeding the 1024-character limit
+Function Add-ToSystemPath([string]$newPath) {
+    $currentPath = [System.Environment]::GetEnvironmentVariable('Path', [System.EnvironmentVariableTarget]::Machine)
+    $newPathValue = "$currentPath;$newPath"
+
+    if ($newPathValue.Length -le 1024) {
+        [System.Environment]::SetEnvironmentVariable('Path', $newPathValue, [System.EnvironmentVariableTarget]::Machine)
+        Write-Output "Added $newPath to system PATH."
+    } else {
+        Write-Output "Cannot add $newPath to system PATH because it would exceed the 1024-character limit."
+    }
+}
+
+# Install CuDNN 8.9
+if (-not ($skipCuDNN)) {
+    Write-Output "Downloading NVIDIA CuDNN for Windows"
+    Invoke-WebRequest -Uri 'https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/9.2.0/tensorrt-9.2.0.5.windows10.x86_64.cuda-12.2.llm.beta.zip' -OutFile 'cudnn.zip'
+    Write-Output "Extracting NVIDIA CuDNN"
+    $cuDNNExtractPath = 'C:\Program Files\NVIDIA GPU Computing Toolkit\CuDNN\v8.9'
+    Expand-Archive -Path 'cudnn.zip' -DestinationPath $cuDNNExtractPath
+    Write-Output "Removing CuDNN installer"
+    Remove-Item -Path 'cudnn.zip' -Force
+    # Add both bin and lib directories to the system PATH
+    Add-ToSystemPath "$cuDNNExtractPath\bin"
+    Add-ToSystemPath "$cuDNNExtractPath\lib"
+    Write-Output "Done CuDNN installation"
+} else {
+    Write-Output "Skipping CuDNN installation"
+}
+
+# Install TensorRT 9.2
+if (-not ($skipTensorRT)) {
+    Write-Output "Downloading NVIDIA TensorRT for Windows"
+    Invoke-WebRequest -Uri 'https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.0.0.312_cuda12-archive.zip' -OutFile 'tensorrt.zip'
+    Write-Output "Extracting NVIDIA TensorRT"
+    $tensorRTExtractPath = 'C:\Program Files\NVIDIA GPU Computing Toolkit\TensorRT\v9.2'
+    Expand-Archive -Path 'tensorrt.zip' -DestinationPath $tensorRTExtractPath
+    Write-Output "Removing TensorRT installer"
+    Remove-Item -Path 'tensorrt.zip' -Force
+    # Add both lib and bin directories to the system PATH
+    Add-ToSystemPath "$tensorRTExtractPath\lib"
+    Add-ToSystemPath "$tensorRTExtractPath\bin"
+    Write-Output "Done TensorRT installation"
+} else {
+    Write-Output "Skipping TensorRT installation"
+}