Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/windows-build-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ jobs:
cp -Force C:\workspace\cuDNN\bin\cudnn64_8.dll .\build_nitro
ls .\build_nitro
dotnet tool install --global AzureSignTool
%USERPROFILE%\.dotnet\tools\azuresigntool.exe sign -kvu "${{ secrets.AZURE_KEY_VAULT_URI }}" -kvi "${{ secrets.AZURE_CLIENT_ID }}" -kvt "${{ secrets.AZURE_TENANT_ID }}" -kvs "${{ secrets.AZURE_CLIENT_SECRET }}" -kvc ${{ secrets.AZURE_CERT_NAME }} -tr http://timestamp.globalsign.com/tsa/r6advanced1 -v ".\build_nitro\nitro.exe"
C:\Users\ContainerAdministrator\.dotnet\tools\azuresigntool.exe sign -kvu "${{ secrets.AZURE_KEY_VAULT_URI }}" -kvi "${{ secrets.AZURE_CLIENT_ID }}" -kvt "${{ secrets.AZURE_TENANT_ID }}" -kvs "${{ secrets.AZURE_CLIENT_SECRET }}" -kvc ${{ secrets.AZURE_CERT_NAME }} -tr http://timestamp.globalsign.com/tsa/r6advanced1 -v ".\build_nitro\nitro.exe"
tar -czvf nitro.tar.gz .\build_nitro

- uses: actions/upload-release-asset@v1.0.1
Expand Down
63 changes: 63 additions & 0 deletions BUILD_ENGINE_MODEL.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Build engine for NVIDIA TensorRT-LLM on Windows

## Automated script

- Open Powershell (Administrator)
- Run command:

```ps1
.\TensorRT-LLM\windows\setup_env.ps1
```

- CUDA 12.2
- Python 3.10
- Miscrosoft MPI
- TensorRT 9.2
- CuDNN 8.9

## Manual step by step

- Step 1: Prepare environment with command

```ps1
.\TensorRT-LLM\windows\setup_env.ps1
```

- Step 2: Verify that TensorRT-LLM and TensorRT can run fine on your machine by running these 2 command

```ps1
python -c "import tensorrt as trt; print(trt.__version__)"
python -c "import tensorrt_llm; print(tensorrt_llm._utils.trt_version())"
```

- Step 2: Download huggingface model

```ps1
pip install -U "huggingface_hub[cli]" hf_transfer
# Create folder to store model
mkdir model
mkdir checkpoint
mkdir engine

# Download model to the folder `model\` with huggingface model handler (e.g jan-hq/stealth-v1.2)
set HF_HUB_ENABLE_HF_TRANSFER=1 && huggingface-cli download --repo-type model --local-dir .\model <model handler>

cd examples\llama
# At this step, you can choose whether to run the model at INT4 for FP16, choose either next step

# For FP16 option
python convert_checkpoint.py --model_dir ..\..\..\model --output_dir ..\..\..\checkpoint --dtype float16

# For INT4 option (currently it has a problem with `nvidia-anmo`, have to check)
python ../quantization/quantize.py --model_dir ..\..\..\model --output_dir ..\..\..\checkpoint --dtype float16 --dtype float16 --awq_block_size 128 --kv_cache_dtype int8 --calib_size 32

# Build the engine for TensorRT-LLM to use from either FP16 or INT4 options
trtllm-build --checkpoint_dir ..\..\..\checkpoint --output_dir ..\..\..\engine --gemm_plugin float16

# Prepare the tokenizers related files
cp ..\..\..\model/tokenizer.json ..\..\..\engine
cp ..\..\..\model/tokenizer.model ..\..\..\engine
cp ..\..\..\model/tokenizer_config.json ..\..\..\engine
```

- Step 3: The `engine` folder is ready to be used with TensorRT-LLM engine
51 changes: 49 additions & 2 deletions windows/setup_env.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
param (
[switch]$skipCUDA,
[switch]$skipPython,
[switch]$skipMPI
[switch]$skipMPI,
[switch]$skipCuDNN,
[switch]$skipTensorRT
)

# Set the error action preference to 'Stop' for the entire script.
Expand Down Expand Up @@ -58,4 +60,49 @@ if (-not ($skipMPI)) {
Write-Output "Skipping MPI installation"
}

# TODO Automate cuDNN installation
# Function to safely add a path to the system PATH environment variable without exceeding the 1024-character limit
Function Add-ToSystemPath([string]$newPath) {
$currentPath = [System.Environment]::GetEnvironmentVariable('Path', [System.EnvironmentVariableTarget]::Machine)
$newPathValue = "$currentPath;$newPath"

if ($newPathValue.Length -le 1024) {
[System.Environment]::SetEnvironmentVariable('Path', $newPathValue, [System.EnvironmentVariableTarget]::Machine)
Write-Output "Added $newPath to system PATH."
} else {
Write-Output "Cannot add $newPath to system PATH because it would exceed the 1024-character limit."
}
}

# Install CuDNN 8.9
if (-not ($skipCuDNN)) {
Write-Output "Downloading NVIDIA CuDNN for Windows"
Invoke-WebRequest -Uri 'https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/9.2.0/tensorrt-9.2.0.5.windows10.x86_64.cuda-12.2.llm.beta.zip' -OutFile 'cudnn.zip'
Write-Output "Extracting NVIDIA CuDNN"
$cuDNNExtractPath = 'C:\Program Files\NVIDIA GPU Computing Toolkit\CuDNN\v8.9'
Expand-Archive -Path 'cudnn.zip' -DestinationPath $cuDNNExtractPath
Write-Output "Removing CuDNN installer"
Remove-Item -Path 'cudnn.zip' -Force
# Add both bin and lib directories to the system PATH
Add-ToSystemPath "$cuDNNExtractPath\bin"
Add-ToSystemPath "$cuDNNExtractPath\lib"
Write-Output "Done CuDNN installation"
} else {
Write-Output "Skipping CuDNN installation"
}

# Install TensorRT 9.2
if (-not ($skipTensorRT)) {
Write-Output "Downloading NVIDIA TensorRT for Windows"
Invoke-WebRequest -Uri 'https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.0.0.312_cuda12-archive.zip' -OutFile 'tensorrt.zip'
Write-Output "Extracting NVIDIA TensorRT"
$tensorRTExtractPath = 'C:\Program Files\NVIDIA GPU Computing Toolkit\TensorRT\v9.2'
Expand-Archive -Path 'tensorrt.zip' -DestinationPath $tensorRTExtractPath
Write-Output "Removing TensorRT installer"
Remove-Item -Path 'tensorrt.zip' -Force
# Add both lib and bin directories to the system PATH
Add-ToSystemPath "$tensorRTExtractPath\lib"
Add-ToSystemPath "$tensorRTExtractPath\bin"
Write-Output "Done TensorRT installation"
} else {
Write-Output "Skipping TensorRT installation"
}