diff --git a/.github/runners/Dockerfile.window.runner-ada b/.github/runners/Dockerfile.window.runner-ada new file mode 100644 index 00000000000..4ed2145599d --- /dev/null +++ b/.github/runners/Dockerfile.window.runner-ada @@ -0,0 +1,291 @@ +# Use the Windows Server Core 2019 image. +# https://learn.microsoft.com/en-us/visualstudio/install/build-tools-container?view=vs-2022 + +# Use the Windows Server Core 2019 image. +FROM mcr.microsoft.com/windows/servercore:ltsc2019 + +# Restore the default Windows shell for correct batch processing. +# (Used for VS Build Tools installation) +SHELL ["cmd", "/S", "/C"] + +# ----------------------------------------------------------------------------- + +# Install CUDA 12.2 + +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/cuda_12.2.2_537.13_windows.exe \ + --output "cuda_installer.exe"; \ + Start-Process cuda_installer.exe -Wait -ArgumentList '-s'; \ + Remove-Item cuda_installer.exe -Force + +# ----------------------------------------------------------------------------- + +# Install Python 3.10.11 + +# Download and install Python +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/python-3.10.11-amd64.exe --output python-3.10.11.exe ; \ + Start-Process python-3.10.11.exe -Wait -ArgumentList '/quiet InstallAllUsers=1 PrependPath=1' ; \ + Remove-Item python-3.10.11.exe -Force + +# Add python3 command +RUN powershell -Command \ + cp "\"C:\\\\Program Files\\\\Python310\\\\python.exe\" \"C:\\\\Program Files\\\\Python310\\\\python3.exe\"" + +# ----------------------------------------------------------------------------- + +# Install Microsoft MPI + +# The latest version is 10.1.3, but it requires you to get a temporary download +# link. +# https://learn.microsoft.com/en-us/message-passing-interface/microsoft-mpi-release-notes +# We use 10.1.1 which has a release on the GitHub page +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/msmpisetup.exe \ + --output "msmpisetup.exe"; \ + Start-Process .\msmpisetup.exe -Wait ; \ + Remove-Item msmpisetup.exe -Force + +# Add MPI binaries to Path +RUN setx Path "%Path%;C:\Program Files\Microsoft MPI\Bin" + +# Download the MSMPI SDK +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/msmpisdk.msi \ + --output "msmpisdk.msi"; \ + Start-Process msiexec.exe -Wait -ArgumentList '/I msmpisdk.msi /quiet'; \ + Remove-Item msmpisdk.msi -Force + +# ----------------------------------------------------------------------------- + +# Install CMake + +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/cmake-3.27.7-windows-x86_64.msi \ + --output "cmake.msi"; \ + Start-Process msiexec.exe -Wait -ArgumentList '/I cmake.msi /quiet'; \ + Remove-Item cmake.msi -Force + +# Add CMake binaries to Path +RUN setx Path "%Path%;C:\Program Files\CMake\bin" + +# ----------------------------------------------------------------------------- + +# Install VS Build Tools + +RUN \ + # Download the Build Tools bootstrapper. + curl -SL --output vs_buildtools.exe https://aka.ms/vs/17/release/vs_buildtools.exe \ + \ + # Install Build Tools with the Microsoft.VisualStudio.Workload.AzureBuildTools workload, excluding workloads and components with known issues. + && (start /w vs_buildtools.exe --quiet --wait --norestart --nocache \ + --installPath "%ProgramFiles(x86)%\Microsoft Visual Studio\2022\BuildTools" \ + --includeRecommended \ + --add Microsoft.VisualStudio.Workload.MSBuildTools \ + --add Microsoft.VisualStudio.Workload.VCTools \ + --remove Microsoft.VisualStudio.Component.Windows10SDK.10240 \ + --remove Microsoft.VisualStudio.Component.Windows10SDK.10586 \ + --remove Microsoft.VisualStudio.Component.Windows10SDK.14393 \ + --remove Microsoft.VisualStudio.Component.Windows81SDK \ + || IF "%ERRORLEVEL%"=="3010" EXIT 0) \ + \ + # Cleanup + && del /q vs_buildtools.exe + +# ----------------------------------------------------------------------------- + +# Install Vim (can delete this but it's nice to have) + +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/gvim90.exe \ + --output "install_vim.exe"; \ + Start-Process install_vim.exe -Wait -ArgumentList '/S'; \ + Remove-Item install_vim.exe -Force + +# Add Vim binaries to Path +RUN setx Path "%Path%;C:\Program Files (x86)\Vim\vim90" + +# ----------------------------------------------------------------------------- + +# Install Chocolatey +# Chocolatey is a package manager for Windows +# I probably could've used it to install some of the above, but I didn't... + +# If you try to install Chocolatey 2.0.0, it fails on .NET Framework 4.8 installation +# https://stackoverflow.com/a/76470753 +ENV chocolateyVersion=1.4.0 + +# https://docs.chocolatey.org/en-us/choco/setup#install-with-cmd.exe +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + powershell.exe -NoProfile -InputFormat None -ExecutionPolicy Bypass \ + -Command "[System.Net.ServicePointManager]::SecurityProtocol = 3072; \ + iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))" && \ + SET "PATH=%PATH%;%ALLUSERSPROFILE%\chocolatey\bin" + +# ----------------------------------------------------------------------------- + +# Install Git via Chocolatey +RUN powershell -Command \ + choco install git -y + +# ----------------------------------------------------------------------------- + +# Install CUDA 11.8 NVTX + +#RUN powershell -Command \ +# $ErrorActionPreference = 'Stop'; \ +# curl.exe https://developer.download.nvidia.com/compute/cuda/11.8.0/network_installers/cuda_11.8.0_windows_network.exe \ +# --output "cuda_11_installer.exe"; \ +# Start-Process cuda_11_installer.exe -Wait -ArgumentList '-s nvtx_11.8'; \ +# Remove-Item cuda_11_installer.exe -Force + +# The above command-line installation method installs NVTX headers at +# C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\include\nvtx3\ +# CMake can't find this location for some reason. +# Instead, we just copy the older NvToolsExt version to where CMake expects. +# This assumes NvToolsExt was installed on the host machine using the +# CUDA 11.8 GUI installer and copied to the build context + +# COPY ["NvToolsExt", "C:\\\\Program Files\\\\NVIDIA Corporation\\\\NvToolsExt"] +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/NvToolsExt.zip \ + --output NvToolsExt.zip; \ + Expand-Archive .\NvToolsExt.zip -DestinationPath 'C:\Program Files\NVIDIA Corporation\'; \ + Remove-Item NvToolsExt.zip -Force + +# ----------------------------------------------------------------------------- + +# Create a working directory +WORKDIR "C:\\\\workspace" + +# ----------------------------------------------------------------------------- + +# Download and unzip TensorrRT 9.2.0.5 for TensorRT-LLM +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/TensorRT-9.2.0.5.Windows10.x86_64.cuda-12.2.llm.beta.zip \ + --output TensorRT-9.2.0.5.zip; \ + Expand-Archive .\TensorRT-9.2.0.5.zip -DestinationPath .; \ + Remove-Item TensorRT-9.2.0.5.zip -Force + +# Add TensorRT libs to Path +RUN setx Path "%Path%;C:\workspace\TensorRT-9.2.0.5\lib" + +# Install TensorRT Python wheel +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + pip install TensorRT-9.2.0.5\python\tensorrt-9.2.0.post12.dev5-cp310-none-win_amd64.whl + +# ----------------------------------------------------------------------------- + +# Copy cuDNN into the working directory +# This assumes cuDNN exists on the host machine in the build context +# COPY ["cuDNN", "cuDNN"] +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/cuDNN.zip \ + --output cuDNN.zip; \ + Expand-Archive .\cuDNN.zip -DestinationPath .; \ + Remove-Item cuDNN.zip -Force + +# Add cuDNN libs and bin to Path. +RUN setx Path "%Path%;C:\workspace\cuDNN\lib;C:\workspace\cuDNN\bin;" + +# ----------------------------------------------------------------------------- + +# Define the entry point for the docker container. +# This entry point launches the 64-bit PowerShell developer shell. +# We need to launch with amd64 arch otherwise Powershell defaults to x86 32-bit build commands which don't jive with CUDA +ENTRYPOINT ["C:\\Program Files (x86)\\Microsoft Visual Studio\\2022\\BuildTools\\Common7\\Tools\\VsDevCmd.bat", "-arch=amd64", "&&", "powershell.exe", "-NoLogo", "-ExecutionPolicy", "Bypass"] + +# ----------------------------------------------------------------------------- + +# Additional dependencies to build Nitro + +# This bellow command lt MSVC recognize cuda compiler +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + Copy-Item -Path 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.2\extras\visual_studio_integration\MSBuildExtensions\*' -Destination 'C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\MSBuild\Microsoft\VC\v170\BuildCustomizations' + +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + Copy-Item -Path 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.2\extras\visual_studio_integration\MSBuildExtensions\*' -Destination 'C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\MSBuild\Microsoft\VC\v160\BuildCustomizations' + + +# Set git safe directory for nitro clone dependencies +RUN powershell -Command \ + git config --global --add safe.directory '*' + +# Package for nitro compile +RUN powershell -Command \ + choco install pkgconfiglite --allow-empty-checksums -y + +RUN powershell -Command \ + choco install Ninja -y + +RUN choco install 7zip -y; \ + 7z --help + +# Requirements to build tensorrt-llm on windows +# COPY ./requirements-windows.txt ./tensorrt-llm-nitro/requirements-windows.txt +# COPY ./requirements-dev-windows.txt ./tensorrt-llm-nitro/requirements-dev-windows.txt +# RUN powershell -Command \ +# cd tensorrt-llm-nitro; \ +# pip install --no-cache-dir -r .\requirements-dev-windows.txt + +# COPY ./.git ./tensorrt-llm-nitro/.git + +# COPY ./3rdparty ./tensorrt-llm-nitro/3rdparty + +# COPY ./cpp ./tensorrt-llm-nitro/cpp + +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + git clone https://github.com/janhq/nitro-tensorrt-llm.git; \ + cd nitro-tensorrt-llm; \ + git checkout tensorrt-llm-nitro-rel; \ + git submodule update --init --recursive; \ + pip install --no-cache-dir -r .\requirements-dev-windows.txt; \ + cd cpp/tensorrt_llm/nitro; \ + cmake -S ./nitro_deps -B ./build_deps/nitro_deps; \ + cmake --build ./build_deps/nitro_deps --config Release + +RUN setx Path "%Path%;C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\Common7\Tools" + +RUN VsDevCmd.bat -arch=amd64 && \ + powershell.exe -NoLogo -ExecutionPolicy Bypass "cd nitro-tensorrt-llm; python .\scripts\build_wheel.py -a '89-real' --trt_root 'C:\workspace\TensorRT-9.2.0.5\'" + +# # ----------------------------------------------------------------------------- + +# Requirements to build tensorrt-llm on windows +ARG RUNNER_VERSION=2.314.1 + +# Define the entry point for the docker container. +# This entry point launches the 64-bit PowerShell developer shell. +# We need to launch with amd64 arch otherwise Powershell defaults to x86 32-bit build commands which don't jive with CUDA +# ENTRYPOINT ["C:\\Program Files (x86)\\Microsoft Visual Studio\\2022\\BuildTools\\Common7\\Tools\\VsDevCmd.bat", "-arch=amd64", "&&", "powershell.exe", "-NoLogo", "-ExecutionPolicy", "Bypass"] + +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + Invoke-WebRequest \ + -Uri https://github.com/actions/runner/releases/download/v$env:RUNNER_VERSION/actions-runner-win-x64-$env:RUNNER_VERSION.zip \ + -OutFile runner.zip; \ + Expand-Archive -Path ./runner.zip -DestinationPath ./actions-runner; \ + Remove-Item -Path .\runner.zip; \ + setx /M PATH $(${Env:PATH} + \";${Env:ProgramFiles}\Git\bin\") + +ADD runner.ps1 ./runner.ps1 + +RUN powershell -Command New-ItemProperty -Path "HKLM:\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 -PropertyType DWORD -Force + +RUN powershell -Command icacls 'C:\workspace\nitro-tensorrt-llm' /grant 'Everyone:F' /T + +CMD ["powershell.exe", "-ExecutionPolicy", "Unrestricted", "-File", ".\\runner.ps1"] \ No newline at end of file diff --git a/.github/runners/Dockerfile.window.runner-ampere b/.github/runners/Dockerfile.window.runner-ampere new file mode 100644 index 00000000000..c41eb6205e9 --- /dev/null +++ b/.github/runners/Dockerfile.window.runner-ampere @@ -0,0 +1,291 @@ +# Use the Windows Server Core 2019 image. +# https://learn.microsoft.com/en-us/visualstudio/install/build-tools-container?view=vs-2022 + +# Use the Windows Server Core 2019 image. +FROM mcr.microsoft.com/windows/servercore:ltsc2019 + +# Restore the default Windows shell for correct batch processing. +# (Used for VS Build Tools installation) +SHELL ["cmd", "/S", "/C"] + +# ----------------------------------------------------------------------------- + +# Install CUDA 12.2 + +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/cuda_12.2.2_537.13_windows.exe \ + --output "cuda_installer.exe"; \ + Start-Process cuda_installer.exe -Wait -ArgumentList '-s'; \ + Remove-Item cuda_installer.exe -Force + +# ----------------------------------------------------------------------------- + +# Install Python 3.10.11 + +# Download and install Python +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/python-3.10.11-amd64.exe --output python-3.10.11.exe ; \ + Start-Process python-3.10.11.exe -Wait -ArgumentList '/quiet InstallAllUsers=1 PrependPath=1' ; \ + Remove-Item python-3.10.11.exe -Force + +# Add python3 command +RUN powershell -Command \ + cp "\"C:\\\\Program Files\\\\Python310\\\\python.exe\" \"C:\\\\Program Files\\\\Python310\\\\python3.exe\"" + +# ----------------------------------------------------------------------------- + +# Install Microsoft MPI + +# The latest version is 10.1.3, but it requires you to get a temporary download +# link. +# https://learn.microsoft.com/en-us/message-passing-interface/microsoft-mpi-release-notes +# We use 10.1.1 which has a release on the GitHub page +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/msmpisetup.exe \ + --output "msmpisetup.exe"; \ + Start-Process .\msmpisetup.exe -Wait ; \ + Remove-Item msmpisetup.exe -Force + +# Add MPI binaries to Path +RUN setx Path "%Path%;C:\Program Files\Microsoft MPI\Bin" + +# Download the MSMPI SDK +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/msmpisdk.msi \ + --output "msmpisdk.msi"; \ + Start-Process msiexec.exe -Wait -ArgumentList '/I msmpisdk.msi /quiet'; \ + Remove-Item msmpisdk.msi -Force + +# ----------------------------------------------------------------------------- + +# Install CMake + +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/cmake-3.27.7-windows-x86_64.msi \ + --output "cmake.msi"; \ + Start-Process msiexec.exe -Wait -ArgumentList '/I cmake.msi /quiet'; \ + Remove-Item cmake.msi -Force + +# Add CMake binaries to Path +RUN setx Path "%Path%;C:\Program Files\CMake\bin" + +# ----------------------------------------------------------------------------- + +# Install VS Build Tools + +RUN \ + # Download the Build Tools bootstrapper. + curl -SL --output vs_buildtools.exe https://aka.ms/vs/17/release/vs_buildtools.exe \ + \ + # Install Build Tools with the Microsoft.VisualStudio.Workload.AzureBuildTools workload, excluding workloads and components with known issues. + && (start /w vs_buildtools.exe --quiet --wait --norestart --nocache \ + --installPath "%ProgramFiles(x86)%\Microsoft Visual Studio\2022\BuildTools" \ + --includeRecommended \ + --add Microsoft.VisualStudio.Workload.MSBuildTools \ + --add Microsoft.VisualStudio.Workload.VCTools \ + --remove Microsoft.VisualStudio.Component.Windows10SDK.10240 \ + --remove Microsoft.VisualStudio.Component.Windows10SDK.10586 \ + --remove Microsoft.VisualStudio.Component.Windows10SDK.14393 \ + --remove Microsoft.VisualStudio.Component.Windows81SDK \ + || IF "%ERRORLEVEL%"=="3010" EXIT 0) \ + \ + # Cleanup + && del /q vs_buildtools.exe + +# ----------------------------------------------------------------------------- + +# Install Vim (can delete this but it's nice to have) + +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/gvim90.exe \ + --output "install_vim.exe"; \ + Start-Process install_vim.exe -Wait -ArgumentList '/S'; \ + Remove-Item install_vim.exe -Force + +# Add Vim binaries to Path +RUN setx Path "%Path%;C:\Program Files (x86)\Vim\vim90" + +# ----------------------------------------------------------------------------- + +# Install Chocolatey +# Chocolatey is a package manager for Windows +# I probably could've used it to install some of the above, but I didn't... + +# If you try to install Chocolatey 2.0.0, it fails on .NET Framework 4.8 installation +# https://stackoverflow.com/a/76470753 +ENV chocolateyVersion=1.4.0 + +# https://docs.chocolatey.org/en-us/choco/setup#install-with-cmd.exe +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + powershell.exe -NoProfile -InputFormat None -ExecutionPolicy Bypass \ + -Command "[System.Net.ServicePointManager]::SecurityProtocol = 3072; \ + iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))" && \ + SET "PATH=%PATH%;%ALLUSERSPROFILE%\chocolatey\bin" + +# ----------------------------------------------------------------------------- + +# Install Git via Chocolatey +RUN powershell -Command \ + choco install git -y + +# ----------------------------------------------------------------------------- + +# Install CUDA 11.8 NVTX + +#RUN powershell -Command \ +# $ErrorActionPreference = 'Stop'; \ +# curl.exe https://developer.download.nvidia.com/compute/cuda/11.8.0/network_installers/cuda_11.8.0_windows_network.exe \ +# --output "cuda_11_installer.exe"; \ +# Start-Process cuda_11_installer.exe -Wait -ArgumentList '-s nvtx_11.8'; \ +# Remove-Item cuda_11_installer.exe -Force + +# The above command-line installation method installs NVTX headers at +# C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\include\nvtx3\ +# CMake can't find this location for some reason. +# Instead, we just copy the older NvToolsExt version to where CMake expects. +# This assumes NvToolsExt was installed on the host machine using the +# CUDA 11.8 GUI installer and copied to the build context + +# COPY ["NvToolsExt", "C:\\\\Program Files\\\\NVIDIA Corporation\\\\NvToolsExt"] +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/NvToolsExt.zip \ + --output NvToolsExt.zip; \ + Expand-Archive .\NvToolsExt.zip -DestinationPath 'C:\Program Files\NVIDIA Corporation\'; \ + Remove-Item NvToolsExt.zip -Force + +# ----------------------------------------------------------------------------- + +# Create a working directory +WORKDIR "C:\\\\workspace" + +# ----------------------------------------------------------------------------- + +# Download and unzip TensorrRT 9.2.0.5 for TensorRT-LLM +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/TensorRT-9.2.0.5.Windows10.x86_64.cuda-12.2.llm.beta.zip \ + --output TensorRT-9.2.0.5.zip; \ + Expand-Archive .\TensorRT-9.2.0.5.zip -DestinationPath .; \ + Remove-Item TensorRT-9.2.0.5.zip -Force + +# Add TensorRT libs to Path +RUN setx Path "%Path%;C:\workspace\TensorRT-9.2.0.5\lib" + +# Install TensorRT Python wheel +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + pip install TensorRT-9.2.0.5\python\tensorrt-9.2.0.post12.dev5-cp310-none-win_amd64.whl + +# ----------------------------------------------------------------------------- + +# Copy cuDNN into the working directory +# This assumes cuDNN exists on the host machine in the build context +# COPY ["cuDNN", "cuDNN"] +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/cuDNN.zip \ + --output cuDNN.zip; \ + Expand-Archive .\cuDNN.zip -DestinationPath .; \ + Remove-Item cuDNN.zip -Force + +# Add cuDNN libs and bin to Path. +RUN setx Path "%Path%;C:\workspace\cuDNN\lib;C:\workspace\cuDNN\bin;" + +# ----------------------------------------------------------------------------- + +# Define the entry point for the docker container. +# This entry point launches the 64-bit PowerShell developer shell. +# We need to launch with amd64 arch otherwise Powershell defaults to x86 32-bit build commands which don't jive with CUDA +ENTRYPOINT ["C:\\Program Files (x86)\\Microsoft Visual Studio\\2022\\BuildTools\\Common7\\Tools\\VsDevCmd.bat", "-arch=amd64", "&&", "powershell.exe", "-NoLogo", "-ExecutionPolicy", "Bypass"] + +# ----------------------------------------------------------------------------- + +# Additional dependencies to build Nitro + +# This bellow command lt MSVC recognize cuda compiler +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + Copy-Item -Path 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.2\extras\visual_studio_integration\MSBuildExtensions\*' -Destination 'C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\MSBuild\Microsoft\VC\v170\BuildCustomizations' + +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + Copy-Item -Path 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.2\extras\visual_studio_integration\MSBuildExtensions\*' -Destination 'C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\MSBuild\Microsoft\VC\v160\BuildCustomizations' + + +# Set git safe directory for nitro clone dependencies +RUN powershell -Command \ + git config --global --add safe.directory '*' + +# Package for nitro compile +RUN powershell -Command \ + choco install pkgconfiglite --allow-empty-checksums -y + +RUN powershell -Command \ + choco install Ninja -y + +RUN choco install 7zip -y; \ + 7z --help + +# Requirements to build tensorrt-llm on windows +# COPY ./requirements-windows.txt ./tensorrt-llm-nitro/requirements-windows.txt +# COPY ./requirements-dev-windows.txt ./tensorrt-llm-nitro/requirements-dev-windows.txt +# RUN powershell -Command \ +# cd tensorrt-llm-nitro; \ +# pip install --no-cache-dir -r .\requirements-dev-windows.txt + +# COPY ./.git ./tensorrt-llm-nitro/.git + +# COPY ./3rdparty ./tensorrt-llm-nitro/3rdparty + +# COPY ./cpp ./tensorrt-llm-nitro/cpp + +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + git clone https://github.com/janhq/nitro-tensorrt-llm.git; \ + cd nitro-tensorrt-llm; \ + git checkout tensorrt-llm-nitro-rel; \ + git submodule update --init --recursive; \ + pip install --no-cache-dir -r .\requirements-dev-windows.txt; \ + cd cpp/tensorrt_llm/nitro; \ + cmake -S ./nitro_deps -B ./build_deps/nitro_deps; \ + cmake --build ./build_deps/nitro_deps --config Release + +RUN setx Path "%Path%;C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\Common7\Tools" + +RUN VsDevCmd.bat -arch=amd64 && \ + powershell.exe -NoLogo -ExecutionPolicy Bypass "cd nitro-tensorrt-llm; python .\scripts\build_wheel.py -a '80-real;86-real' --trt_root 'C:\workspace\TensorRT-9.2.0.5\'" + +# # ----------------------------------------------------------------------------- + +# Requirements to build tensorrt-llm on windows +ARG RUNNER_VERSION=2.314.1 + +# Define the entry point for the docker container. +# This entry point launches the 64-bit PowerShell developer shell. +# We need to launch with amd64 arch otherwise Powershell defaults to x86 32-bit build commands which don't jive with CUDA +# ENTRYPOINT ["C:\\Program Files (x86)\\Microsoft Visual Studio\\2022\\BuildTools\\Common7\\Tools\\VsDevCmd.bat", "-arch=amd64", "&&", "powershell.exe", "-NoLogo", "-ExecutionPolicy", "Bypass"] + +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + Invoke-WebRequest \ + -Uri https://github.com/actions/runner/releases/download/v$env:RUNNER_VERSION/actions-runner-win-x64-$env:RUNNER_VERSION.zip \ + -OutFile runner.zip; \ + Expand-Archive -Path ./runner.zip -DestinationPath ./actions-runner; \ + Remove-Item -Path .\runner.zip; \ + setx /M PATH $(${Env:PATH} + \";${Env:ProgramFiles}\Git\bin\") + +ADD runner.ps1 ./runner.ps1 + +RUN powershell -Command New-ItemProperty -Path "HKLM:\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 -PropertyType DWORD -Force + +RUN powershell -Command icacls 'C:\workspace\nitro-tensorrt-llm' /grant 'Everyone:F' /T + +CMD ["powershell.exe", "-ExecutionPolicy", "Unrestricted", "-File", ".\\runner.ps1"] \ No newline at end of file diff --git a/.github/runners/Dockerfile.window.runner-turing b/.github/runners/Dockerfile.window.runner-turing new file mode 100644 index 00000000000..ee35f0428c1 --- /dev/null +++ b/.github/runners/Dockerfile.window.runner-turing @@ -0,0 +1,291 @@ +# Use the Windows Server Core 2019 image. +# https://learn.microsoft.com/en-us/visualstudio/install/build-tools-container?view=vs-2022 + +# Use the Windows Server Core 2019 image. +FROM mcr.microsoft.com/windows/servercore:ltsc2019 + +# Restore the default Windows shell for correct batch processing. +# (Used for VS Build Tools installation) +SHELL ["cmd", "/S", "/C"] + +# ----------------------------------------------------------------------------- + +# Install CUDA 12.2 + +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/cuda_12.2.2_537.13_windows.exe \ + --output "cuda_installer.exe"; \ + Start-Process cuda_installer.exe -Wait -ArgumentList '-s'; \ + Remove-Item cuda_installer.exe -Force + +# ----------------------------------------------------------------------------- + +# Install Python 3.10.11 + +# Download and install Python +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/python-3.10.11-amd64.exe --output python-3.10.11.exe ; \ + Start-Process python-3.10.11.exe -Wait -ArgumentList '/quiet InstallAllUsers=1 PrependPath=1' ; \ + Remove-Item python-3.10.11.exe -Force + +# Add python3 command +RUN powershell -Command \ + cp "\"C:\\\\Program Files\\\\Python310\\\\python.exe\" \"C:\\\\Program Files\\\\Python310\\\\python3.exe\"" + +# ----------------------------------------------------------------------------- + +# Install Microsoft MPI + +# The latest version is 10.1.3, but it requires you to get a temporary download +# link. +# https://learn.microsoft.com/en-us/message-passing-interface/microsoft-mpi-release-notes +# We use 10.1.1 which has a release on the GitHub page +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/msmpisetup.exe \ + --output "msmpisetup.exe"; \ + Start-Process .\msmpisetup.exe -Wait ; \ + Remove-Item msmpisetup.exe -Force + +# Add MPI binaries to Path +RUN setx Path "%Path%;C:\Program Files\Microsoft MPI\Bin" + +# Download the MSMPI SDK +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/msmpisdk.msi \ + --output "msmpisdk.msi"; \ + Start-Process msiexec.exe -Wait -ArgumentList '/I msmpisdk.msi /quiet'; \ + Remove-Item msmpisdk.msi -Force + +# ----------------------------------------------------------------------------- + +# Install CMake + +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/cmake-3.27.7-windows-x86_64.msi \ + --output "cmake.msi"; \ + Start-Process msiexec.exe -Wait -ArgumentList '/I cmake.msi /quiet'; \ + Remove-Item cmake.msi -Force + +# Add CMake binaries to Path +RUN setx Path "%Path%;C:\Program Files\CMake\bin" + +# ----------------------------------------------------------------------------- + +# Install VS Build Tools + +RUN \ + # Download the Build Tools bootstrapper. + curl -SL --output vs_buildtools.exe https://aka.ms/vs/17/release/vs_buildtools.exe \ + \ + # Install Build Tools with the Microsoft.VisualStudio.Workload.AzureBuildTools workload, excluding workloads and components with known issues. + && (start /w vs_buildtools.exe --quiet --wait --norestart --nocache \ + --installPath "%ProgramFiles(x86)%\Microsoft Visual Studio\2022\BuildTools" \ + --includeRecommended \ + --add Microsoft.VisualStudio.Workload.MSBuildTools \ + --add Microsoft.VisualStudio.Workload.VCTools \ + --remove Microsoft.VisualStudio.Component.Windows10SDK.10240 \ + --remove Microsoft.VisualStudio.Component.Windows10SDK.10586 \ + --remove Microsoft.VisualStudio.Component.Windows10SDK.14393 \ + --remove Microsoft.VisualStudio.Component.Windows81SDK \ + || IF "%ERRORLEVEL%"=="3010" EXIT 0) \ + \ + # Cleanup + && del /q vs_buildtools.exe + +# ----------------------------------------------------------------------------- + +# Install Vim (can delete this but it's nice to have) + +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/gvim90.exe \ + --output "install_vim.exe"; \ + Start-Process install_vim.exe -Wait -ArgumentList '/S'; \ + Remove-Item install_vim.exe -Force + +# Add Vim binaries to Path +RUN setx Path "%Path%;C:\Program Files (x86)\Vim\vim90" + +# ----------------------------------------------------------------------------- + +# Install Chocolatey +# Chocolatey is a package manager for Windows +# I probably could've used it to install some of the above, but I didn't... + +# If you try to install Chocolatey 2.0.0, it fails on .NET Framework 4.8 installation +# https://stackoverflow.com/a/76470753 +ENV chocolateyVersion=1.4.0 + +# https://docs.chocolatey.org/en-us/choco/setup#install-with-cmd.exe +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + powershell.exe -NoProfile -InputFormat None -ExecutionPolicy Bypass \ + -Command "[System.Net.ServicePointManager]::SecurityProtocol = 3072; \ + iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))" && \ + SET "PATH=%PATH%;%ALLUSERSPROFILE%\chocolatey\bin" + +# ----------------------------------------------------------------------------- + +# Install Git via Chocolatey +RUN powershell -Command \ + choco install git -y + +# ----------------------------------------------------------------------------- + +# Install CUDA 11.8 NVTX + +#RUN powershell -Command \ +# $ErrorActionPreference = 'Stop'; \ +# curl.exe https://developer.download.nvidia.com/compute/cuda/11.8.0/network_installers/cuda_11.8.0_windows_network.exe \ +# --output "cuda_11_installer.exe"; \ +# Start-Process cuda_11_installer.exe -Wait -ArgumentList '-s nvtx_11.8'; \ +# Remove-Item cuda_11_installer.exe -Force + +# The above command-line installation method installs NVTX headers at +# C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\include\nvtx3\ +# CMake can't find this location for some reason. +# Instead, we just copy the older NvToolsExt version to where CMake expects. +# This assumes NvToolsExt was installed on the host machine using the +# CUDA 11.8 GUI installer and copied to the build context + +# COPY ["NvToolsExt", "C:\\\\Program Files\\\\NVIDIA Corporation\\\\NvToolsExt"] +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/NvToolsExt.zip \ + --output NvToolsExt.zip; \ + Expand-Archive .\NvToolsExt.zip -DestinationPath 'C:\Program Files\NVIDIA Corporation\'; \ + Remove-Item NvToolsExt.zip -Force + +# ----------------------------------------------------------------------------- + +# Create a working directory +WORKDIR "C:\\\\workspace" + +# ----------------------------------------------------------------------------- + +# Download and unzip TensorrRT 9.2.0.5 for TensorRT-LLM +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/TensorRT-9.2.0.5.Windows10.x86_64.cuda-12.2.llm.beta.zip \ + --output TensorRT-9.2.0.5.zip; \ + Expand-Archive .\TensorRT-9.2.0.5.zip -DestinationPath .; \ + Remove-Item TensorRT-9.2.0.5.zip -Force + +# Add TensorRT libs to Path +RUN setx Path "%Path%;C:\workspace\TensorRT-9.2.0.5\lib" + +# Install TensorRT Python wheel +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + pip install TensorRT-9.2.0.5\python\tensorrt-9.2.0.post12.dev5-cp310-none-win_amd64.whl + +# ----------------------------------------------------------------------------- + +# Copy cuDNN into the working directory +# This assumes cuDNN exists on the host machine in the build context +# COPY ["cuDNN", "cuDNN"] +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/cuDNN.zip \ + --output cuDNN.zip; \ + Expand-Archive .\cuDNN.zip -DestinationPath .; \ + Remove-Item cuDNN.zip -Force + +# Add cuDNN libs and bin to Path. +RUN setx Path "%Path%;C:\workspace\cuDNN\lib;C:\workspace\cuDNN\bin;" + +# ----------------------------------------------------------------------------- + +# Define the entry point for the docker container. +# This entry point launches the 64-bit PowerShell developer shell. +# We need to launch with amd64 arch otherwise Powershell defaults to x86 32-bit build commands which don't jive with CUDA +ENTRYPOINT ["C:\\Program Files (x86)\\Microsoft Visual Studio\\2022\\BuildTools\\Common7\\Tools\\VsDevCmd.bat", "-arch=amd64", "&&", "powershell.exe", "-NoLogo", "-ExecutionPolicy", "Bypass"] + +# ----------------------------------------------------------------------------- + +# Additional dependencies to build Nitro + +# This bellow command lt MSVC recognize cuda compiler +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + Copy-Item -Path 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.2\extras\visual_studio_integration\MSBuildExtensions\*' -Destination 'C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\MSBuild\Microsoft\VC\v170\BuildCustomizations' + +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + Copy-Item -Path 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.2\extras\visual_studio_integration\MSBuildExtensions\*' -Destination 'C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\MSBuild\Microsoft\VC\v160\BuildCustomizations' + + +# Set git safe directory for nitro clone dependencies +RUN powershell -Command \ + git config --global --add safe.directory '*' + +# Package for nitro compile +RUN powershell -Command \ + choco install pkgconfiglite --allow-empty-checksums -y + +RUN powershell -Command \ + choco install Ninja -y + +RUN choco install 7zip -y; \ + 7z --help + +# Requirements to build tensorrt-llm on windows +# COPY ./requirements-windows.txt ./tensorrt-llm-nitro/requirements-windows.txt +# COPY ./requirements-dev-windows.txt ./tensorrt-llm-nitro/requirements-dev-windows.txt +# RUN powershell -Command \ +# cd tensorrt-llm-nitro; \ +# pip install --no-cache-dir -r .\requirements-dev-windows.txt + +# COPY ./.git ./tensorrt-llm-nitro/.git + +# COPY ./3rdparty ./tensorrt-llm-nitro/3rdparty + +# COPY ./cpp ./tensorrt-llm-nitro/cpp + +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + git clone https://github.com/janhq/nitro-tensorrt-llm.git; \ + cd nitro-tensorrt-llm; \ + git checkout tensorrt-llm-nitro-rel; \ + git submodule update --init --recursive; \ + pip install --no-cache-dir -r .\requirements-dev-windows.txt; \ + cd cpp/tensorrt_llm/nitro; \ + cmake -S ./nitro_deps -B ./build_deps/nitro_deps; \ + cmake --build ./build_deps/nitro_deps --config Release + +RUN setx Path "%Path%;C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\Common7\Tools" + +RUN VsDevCmd.bat -arch=amd64 && \ + powershell.exe -NoLogo -ExecutionPolicy Bypass "cd nitro-tensorrt-llm; python .\scripts\build_wheel.py -a '75-real' --trt_root 'C:\workspace\TensorRT-9.2.0.5\'" + +# # ----------------------------------------------------------------------------- + +# Requirements to build tensorrt-llm on windows +ARG RUNNER_VERSION=2.314.1 + +# Define the entry point for the docker container. +# This entry point launches the 64-bit PowerShell developer shell. +# We need to launch with amd64 arch otherwise Powershell defaults to x86 32-bit build commands which don't jive with CUDA +# ENTRYPOINT ["C:\\Program Files (x86)\\Microsoft Visual Studio\\2022\\BuildTools\\Common7\\Tools\\VsDevCmd.bat", "-arch=amd64", "&&", "powershell.exe", "-NoLogo", "-ExecutionPolicy", "Bypass"] + +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + Invoke-WebRequest \ + -Uri https://github.com/actions/runner/releases/download/v$env:RUNNER_VERSION/actions-runner-win-x64-$env:RUNNER_VERSION.zip \ + -OutFile runner.zip; \ + Expand-Archive -Path ./runner.zip -DestinationPath ./actions-runner; \ + Remove-Item -Path .\runner.zip; \ + setx /M PATH $(${Env:PATH} + \";${Env:ProgramFiles}\Git\bin\") + +ADD runner.ps1 ./runner.ps1 + +RUN powershell -Command New-ItemProperty -Path "HKLM:\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 -PropertyType DWORD -Force + +RUN powershell -Command icacls 'C:\workspace\nitro-tensorrt-llm' /grant 'Everyone:F' /T + +CMD ["powershell.exe", "-ExecutionPolicy", "Unrestricted", "-File", ".\\runner.ps1"] \ No newline at end of file diff --git a/.github/runners/runner.ps1 b/.github/runners/runner.ps1 new file mode 100644 index 00000000000..a08f3725bf1 --- /dev/null +++ b/.github/runners/runner.ps1 @@ -0,0 +1,2 @@ +.\actions-runner\config.cmd --unattended --replace --url https://github.com/${env:RUNNER_REPO} --pat $env:RUNNER_PAT --runnergroup $env:RUNNER_GROUP --labels $env:RUNNER_LABELS --name $env:RUNNER_NAME --work $env:RUNNER_WORKDIR; +.\actions-runner\run.cmd; \ No newline at end of file diff --git a/.github/workflows/python-windows-build-release.yml b/.github/workflows/python-windows-build-release.yml new file mode 100644 index 00000000000..fbfe5e76ba6 --- /dev/null +++ b/.github/workflows/python-windows-build-release.yml @@ -0,0 +1,87 @@ +name: Release for python Windows +on: + push: + tags: ["python-windows-*"] + +jobs: + create-draft-release: + runs-on: ubuntu-latest + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') + outputs: + upload_url: ${{ steps.create_release.outputs.upload_url }} + version: ${{ steps.get_version.outputs.version }} + permissions: + contents: write + steps: + - name: Extract tag name prefix + id: get_version + run: echo "VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV && echo "::set-output name=version::${GITHUB_REF#refs/tags/}" + env: + GITHUB_REF: ${{ github.ref }} + - name: Create Draft Release + id: create_release + uses: actions/create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ github.ref_name }} + release_name: "${{ env.VERSION }}" + draft: true + prerelease: false + windows-build: + needs: create-draft-release + runs-on: windows-nitro-tensorrt-llm-${{ matrix.cuda_arch_name }} + strategy: + matrix: + include: + - cuda_arch: '80-real;86-real' + cuda_arch_name: 'ampere' + - cuda_arch: '89-real' + cuda_arch_name: 'ada' + - cuda_arch: '75-real' + cuda_arch_name: 'turing' + permissions: + contents: write + steps: + - uses: actions/setup-dotnet@v3 + with: + dotnet-version: "6.0.x" + - name: Clone + id: checkout + uses: actions/checkout@v3 + with: + submodules: recursive + lfs: true + + - run: cp -r -Force C:\w\nitro-tensorrt-llm\nitro-tensorrt-llm\cpp\tensorrt_llm\nitro C:\workspace\nitro-tensorrt-llm\cpp\tensorrt_llm + + - uses: nick-fields/retry@v3 + with: + timeout_minutes: 45 + max_attempts: 3 + shell: powershell + command: | + cd C:\workspace\nitro-tensorrt-llm\cpp\tensorrt_llm\nitro; powershell -Command cmake -S ./nitro_deps -B ./build_deps/nitro_deps; powershell -Command cmake --build ./build_deps/nitro_deps --config Release + + - name: Build Python + shell: powershell + run: | + cd C:\workspace\nitro-tensorrt-llm; powershell -Command "python .\scripts\build_wheel.py -a '${{ matrix.cuda_arch }}' --trt_root 'C:\workspace\TensorRT-9.2.0.5\'" + + - name: Build nitro + shell: powershell + run: | + cd C:\workspace\nitro-tensorrt-llm\cpp\build + powershell -Command "cmake .. -DCMAKE_CUDA_ARCHITECTURES='${{ matrix.cuda_arch }}' -DTRT_LIB_DIR='C:/workspace/TensorRT-9.2.0.5/lib' -DTRT_INCLUDE_DIR='C:/workspace/TensorRT-9.2.0.5/include' -DBUILD_NITRO=ON -DCMAKE_CUDA_COMPILER='C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.2/bin/nvcc.exe' -DENABLE_MULTI_DEVICE=0 -G Ninja" + powershell -Command "cmake --build . --parallel 2 --config Release" + tar -czvf python.tar.gz .\build\*.whl + + - uses: actions/upload-release-asset@v1.0.1 + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ needs.create-draft-release.outputs.upload_url }} + asset_path: ./python.tar.gz + asset_name: ${{ needs.create-draft-release.outputs.version }}-tensorrt-llm-${{ matrix.cuda_arch_name }}.tar.gz + asset_content_type: application/gzip diff --git a/.github/workflows/windows-build-manual.yml b/.github/workflows/windows-build-manual.yml new file mode 100644 index 00000000000..b3e324ae6ed --- /dev/null +++ b/.github/workflows/windows-build-manual.yml @@ -0,0 +1,73 @@ +name: Manuall Build for Windows +on: + workflow_dispatch: + +jobs: + windows-build: + runs-on: windows-nitro-tensorrt-llm-${{ matrix.cuda_arch_name }} + strategy: + matrix: + include: + - cuda_arch: '80-real;86-real' + cuda_arch_name: 'ampere' + - cuda_arch: '89-real' + cuda_arch_name: 'ada' + - cuda_arch: '75-real' + cuda_arch_name: 'turing' + permissions: + contents: write + steps: + - name: Clone + id: checkout + uses: actions/checkout@v3 + with: + submodules: recursive + lfs: true + + - run: cp -r -Force C:\w\nitro-tensorrt-llm\nitro-tensorrt-llm\cpp\tensorrt_llm\nitro C:\workspace\nitro-tensorrt-llm\cpp\tensorrt_llm + + - uses: nick-fields/retry@v3 + with: + timeout_minutes: 45 + max_attempts: 3 + shell: powershell + command: | + cd C:\workspace\nitro-tensorrt-llm\cpp\tensorrt_llm\nitro; powershell -Command cmake -S ./nitro_deps -B ./build_deps/nitro_deps; powershell -Command cmake --build ./build_deps/nitro_deps --config Release + + - name: Build Python + shell: powershell + run: | + cd C:\workspace\nitro-tensorrt-llm; powershell -Command "python .\scripts\build_wheel.py -a '${{ matrix.cuda_arch }}' --trt_root 'C:\workspace\TensorRT-9.2.0.5\'" + + - name: Build nitro + shell: powershell + run: | + cd C:\workspace\nitro-tensorrt-llm\cpp\build + powershell -Command "cmake .. -DCMAKE_CUDA_ARCHITECTURES='${{ matrix.cuda_arch }}' -DTRT_LIB_DIR='C:/workspace/TensorRT-9.2.0.5/lib' -DTRT_INCLUDE_DIR='C:/workspace/TensorRT-9.2.0.5/include' -DBUILD_NITRO=ON -DCMAKE_CUDA_COMPILER='C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.2/bin/nvcc.exe' -DENABLE_MULTI_DEVICE=0 -G Ninja" + powershell -Command "cmake --build . --parallel 2 --config Release" + + - name: create nitro artifact with dll file + shell: powershell + run: | + mkdir build_nitro + cp -Force C:\workspace\nitro-tensorrt-llm\cpp\build\tensorrt_llm\nitro\nitro.exe .\build_nitro + cp -Force C:\workspace\nitro-tensorrt-llm\cpp\build\tensorrt_llm\tensorrt_llm.dll .\build_nitro + cp -Force C:\workspace\nitro-tensorrt-llm\cpp\build\tensorrt_llm\plugins\nvinfer_plugin_tensorrt_llm.dll .\build_nitro + cp -Force C:\workspace\nitro-tensorrt-llm\cpp\tensorrt_llm\nitro\build_deps\_install\bin\zlib.dll .\build_nitro + cp -Force C:\workspace\cuDNN\bin\cudnn_ops_infer64_8.dll .\build_nitro + cp -Force C:\workspace\TensorRT-9.2.0.5\lib\nvinfer.dll .\build_nitro + cp -Force C:\Windows\SysWOW64\msmpi.dll .\build_nitro + cp -Force C:\workspace\cuDNN\bin\cudnn64_8.dll .\build_nitro + ls .\build_nitro + + - name: Upload Artifact + uses: actions/upload-artifact@v2 + with: + name: nitro-tensorrt-llm-windows-${{ matrix.cuda_arch_name }} + path: ./build_nitro + + - name: Upload Artifact + uses: actions/upload-artifact@v2 + with: + name: python-tensorrt-llm-${{ matrix.cuda_arch }}-wheel + path: C:/workspace/nitro-tensorrt-llm/build diff --git a/.github/workflows/windows-build-release.yml b/.github/workflows/windows-build-release.yml new file mode 100644 index 00000000000..d4922a537a1 --- /dev/null +++ b/.github/workflows/windows-build-release.yml @@ -0,0 +1,103 @@ +name: Release for Windows +on: + push: + tags: ["windows-v[0-9]+.[0-9]+.[0-9]+"] + +jobs: + create-draft-release: + runs-on: ubuntu-latest + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') + outputs: + upload_url: ${{ steps.create_release.outputs.upload_url }} + version: ${{ steps.get_version.outputs.version }} + permissions: + contents: write + steps: + - name: Extract tag name without v prefix + id: get_version + run: echo "VERSION=${GITHUB_REF#refs/tags/windows-v}" >> $GITHUB_ENV && echo "::set-output name=version::${GITHUB_REF#refs/tags/windows-v}" + env: + GITHUB_REF: ${{ github.ref }} + - name: Create Draft Release + id: create_release + uses: actions/create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ github.ref_name }} + release_name: "${{ env.VERSION }}" + draft: true + prerelease: false + windows-build: + needs: create-draft-release + runs-on: windows-nitro-tensorrt-llm-${{ matrix.cuda_arch_name }} + strategy: + matrix: + include: + - cuda_arch: '80-real;86-real' + cuda_arch_name: 'ampere' + - cuda_arch: '89-real' + cuda_arch_name: 'ada' + - cuda_arch: '75-real' + cuda_arch_name: 'turing' + permissions: + contents: write + steps: + - uses: actions/setup-dotnet@v3 + with: + dotnet-version: "6.0.x" + - name: Clone + id: checkout + uses: actions/checkout@v3 + with: + submodules: recursive + lfs: true + + - run: cp -r -Force C:\w\nitro-tensorrt-llm\nitro-tensorrt-llm\cpp\tensorrt_llm\nitro C:\workspace\nitro-tensorrt-llm\cpp\tensorrt_llm + + - uses: nick-fields/retry@v3 + with: + timeout_minutes: 45 + max_attempts: 3 + shell: powershell + command: | + cd C:\workspace\nitro-tensorrt-llm\cpp\tensorrt_llm\nitro; powershell -Command cmake -S ./nitro_deps -B ./build_deps/nitro_deps; powershell -Command cmake --build ./build_deps/nitro_deps --config Release + + - name: Build Python + shell: powershell + run: | + cd C:\workspace\nitro-tensorrt-llm; powershell -Command "python .\scripts\build_wheel.py -a '${{ matrix.cuda_arch }}' --trt_root 'C:\workspace\TensorRT-9.2.0.5\'" + + - name: Build nitro + shell: powershell + run: | + cd C:\workspace\nitro-tensorrt-llm\cpp\build + powershell -Command "cmake .. -DCMAKE_CUDA_ARCHITECTURES='${{ matrix.cuda_arch }}' -DTRT_LIB_DIR='C:/workspace/TensorRT-9.2.0.5/lib' -DTRT_INCLUDE_DIR='C:/workspace/TensorRT-9.2.0.5/include' -DBUILD_NITRO=ON -DCMAKE_CUDA_COMPILER='C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.2/bin/nvcc.exe' -DENABLE_MULTI_DEVICE=0 -G Ninja" + powershell -Command "cmake --build . --parallel 2 --config Release" + + - name: create nitro artifact with dll file + shell: powershell + run: | + mkdir build_nitro + cp -Force C:\workspace\nitro-tensorrt-llm\cpp\build\tensorrt_llm\nitro\nitro.exe .\build_nitro + cp -Force C:\workspace\nitro-tensorrt-llm\cpp\build\tensorrt_llm\tensorrt_llm.dll .\build_nitro + cp -Force C:\workspace\nitro-tensorrt-llm\cpp\build\tensorrt_llm\plugins\nvinfer_plugin_tensorrt_llm.dll .\build_nitro + cp -Force C:\workspace\nitro-tensorrt-llm\cpp\tensorrt_llm\nitro\build_deps\_install\bin\zlib.dll .\build_nitro + cp -Force C:\workspace\cuDNN\bin\cudnn_ops_infer64_8.dll .\build_nitro + cp -Force C:\workspace\TensorRT-9.2.0.5\lib\nvinfer.dll .\build_nitro + cp -Force C:\Windows\SysWOW64\msmpi.dll .\build_nitro + cp -Force C:\workspace\cuDNN\bin\cudnn64_8.dll .\build_nitro + ls .\build_nitro + dotnet tool install --global AzureSignTool + %USERPROFILE%\.dotnet\tools\azuresigntool.exe sign -kvu "${{ secrets.AZURE_KEY_VAULT_URI }}" -kvi "${{ secrets.AZURE_CLIENT_ID }}" -kvt "${{ secrets.AZURE_TENANT_ID }}" -kvs "${{ secrets.AZURE_CLIENT_SECRET }}" -kvc ${{ secrets.AZURE_CERT_NAME }} -tr http://timestamp.globalsign.com/tsa/r6advanced1 -v ".\build_nitro\nitro.exe" + tar -czvf nitro.tar.gz .\build_nitro + + - uses: actions/upload-release-asset@v1.0.1 + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ needs.create-draft-release.outputs.upload_url }} + asset_path: ./nitro.tar.gz + asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-win-amd64-tensorrt-llm-${{ matrix.cuda_arch_name }}.tar.gz + asset_content_type: application/gzip diff --git a/3rdparty/cutlass b/3rdparty/cutlass index 39c6a83f231..a8f2c80db05 160000 --- a/3rdparty/cutlass +++ b/3rdparty/cutlass @@ -1 +1 @@ -Subproject commit 39c6a83f231d6db2bc6b9c251e7add77d68cbfb4 +Subproject commit a8f2c80db0564c74f4efccac71993b971dfc448b diff --git a/BUILD_ENGINE_MODEL.md b/BUILD_ENGINE_MODEL.md new file mode 100644 index 00000000000..e69de29bb2d diff --git a/BUILD_NITRO.md b/BUILD_NITRO.md new file mode 100644 index 00000000000..e69de29bb2d diff --git a/Dockerfile.nitro.windows b/Dockerfile.nitro.windows new file mode 100644 index 00000000000..5dcbcde66ae --- /dev/null +++ b/Dockerfile.nitro.windows @@ -0,0 +1,266 @@ +# Use the Windows Server Core 2019 image. +# https://learn.microsoft.com/en-us/visualstudio/install/build-tools-container?view=vs-2022 + +# Use the Windows Server Core 2019 image. +FROM mcr.microsoft.com/windows/servercore:ltsc2019 + +# Restore the default Windows shell for correct batch processing. +# (Used for VS Build Tools installation) +SHELL ["cmd", "/S", "/C"] + +# ----------------------------------------------------------------------------- + +# Install CUDA 12.2 + +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/cuda_12.2.2_537.13_windows.exe \ + --output "cuda_installer.exe"; \ + Start-Process cuda_installer.exe -Wait -ArgumentList '-s'; \ + Remove-Item cuda_installer.exe -Force + +# ----------------------------------------------------------------------------- + +# Install Python 3.10.11 + +# Download and install Python +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/python-3.10.11-amd64.exe --output python-3.10.11.exe ; \ + Start-Process python-3.10.11.exe -Wait -ArgumentList '/quiet InstallAllUsers=1 PrependPath=1' ; \ + Remove-Item python-3.10.11.exe -Force + +# Add python3 command +RUN powershell -Command \ + cp "\"C:\\\\Program Files\\\\Python310\\\\python.exe\" \"C:\\\\Program Files\\\\Python310\\\\python3.exe\"" + +# ----------------------------------------------------------------------------- + +# Install Microsoft MPI + +# The latest version is 10.1.3, but it requires you to get a temporary download +# link. +# https://learn.microsoft.com/en-us/message-passing-interface/microsoft-mpi-release-notes +# We use 10.1.1 which has a release on the GitHub page +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/msmpisetup.exe \ + --output "msmpisetup.exe"; \ + Start-Process .\msmpisetup.exe -Wait ; \ + Remove-Item msmpisetup.exe -Force + +# Add MPI binaries to Path +RUN setx Path "%Path%;C:\Program Files\Microsoft MPI\Bin" + +# Download the MSMPI SDK +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/msmpisdk.msi \ + --output "msmpisdk.msi"; \ + Start-Process msiexec.exe -Wait -ArgumentList '/I msmpisdk.msi /quiet'; \ + Remove-Item msmpisdk.msi -Force + +# ----------------------------------------------------------------------------- + +# Install CMake + +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/cmake-3.27.7-windows-x86_64.msi \ + --output "cmake.msi"; \ + Start-Process msiexec.exe -Wait -ArgumentList '/I cmake.msi /quiet'; \ + Remove-Item cmake.msi -Force + +# Add CMake binaries to Path +RUN setx Path "%Path%;C:\Program Files\CMake\bin" + +# ----------------------------------------------------------------------------- + +# Install VS Build Tools + +RUN \ + # Download the Build Tools bootstrapper. + curl -SL --output vs_buildtools.exe https://aka.ms/vs/17/release/vs_buildtools.exe \ + \ + # Install Build Tools with the Microsoft.VisualStudio.Workload.AzureBuildTools workload, excluding workloads and components with known issues. + && (start /w vs_buildtools.exe --quiet --wait --norestart --nocache \ + --installPath "%ProgramFiles(x86)%\Microsoft Visual Studio\2022\BuildTools" \ + --includeRecommended \ + --add Microsoft.VisualStudio.Workload.MSBuildTools \ + --add Microsoft.VisualStudio.Workload.VCTools \ + --remove Microsoft.VisualStudio.Component.Windows10SDK.10240 \ + --remove Microsoft.VisualStudio.Component.Windows10SDK.10586 \ + --remove Microsoft.VisualStudio.Component.Windows10SDK.14393 \ + --remove Microsoft.VisualStudio.Component.Windows81SDK \ + || IF "%ERRORLEVEL%"=="3010" EXIT 0) \ + \ + # Cleanup + && del /q vs_buildtools.exe + +# ----------------------------------------------------------------------------- + +# Install Vim (can delete this but it's nice to have) + +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/gvim90.exe \ + --output "install_vim.exe"; \ + Start-Process install_vim.exe -Wait -ArgumentList '/S'; \ + Remove-Item install_vim.exe -Force + +# Add Vim binaries to Path +RUN setx Path "%Path%;C:\Program Files (x86)\Vim\vim90" + +# ----------------------------------------------------------------------------- + +# Install Chocolatey +# Chocolatey is a package manager for Windows +# I probably could've used it to install some of the above, but I didn't... + +# If you try to install Chocolatey 2.0.0, it fails on .NET Framework 4.8 installation +# https://stackoverflow.com/a/76470753 +ENV chocolateyVersion=1.4.0 + +# https://docs.chocolatey.org/en-us/choco/setup#install-with-cmd.exe +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + powershell.exe -NoProfile -InputFormat None -ExecutionPolicy Bypass \ + -Command "[System.Net.ServicePointManager]::SecurityProtocol = 3072; \ + iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))" && \ + SET "PATH=%PATH%;%ALLUSERSPROFILE%\chocolatey\bin" + +# ----------------------------------------------------------------------------- + +# Install Git via Chocolatey +RUN powershell -Command \ + choco install git -y + +# ----------------------------------------------------------------------------- + +# Install CUDA 11.8 NVTX + +#RUN powershell -Command \ +# $ErrorActionPreference = 'Stop'; \ +# curl.exe https://developer.download.nvidia.com/compute/cuda/11.8.0/network_installers/cuda_11.8.0_windows_network.exe \ +# --output "cuda_11_installer.exe"; \ +# Start-Process cuda_11_installer.exe -Wait -ArgumentList '-s nvtx_11.8'; \ +# Remove-Item cuda_11_installer.exe -Force + +# The above command-line installation method installs NVTX headers at +# C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\include\nvtx3\ +# CMake can't find this location for some reason. +# Instead, we just copy the older NvToolsExt version to where CMake expects. +# This assumes NvToolsExt was installed on the host machine using the +# CUDA 11.8 GUI installer and copied to the build context + +# COPY ["NvToolsExt", "C:\\\\Program Files\\\\NVIDIA Corporation\\\\NvToolsExt"] +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/NvToolsExt.zip \ + --output NvToolsExt.zip; \ + Expand-Archive .\NvToolsExt.zip -DestinationPath 'C:\Program Files\NVIDIA Corporation\'; \ + Remove-Item NvToolsExt.zip -Force + +# ----------------------------------------------------------------------------- + +# Create a working directory +WORKDIR "C:\\\\workspace" + +# ----------------------------------------------------------------------------- + +# Download and unzip TensorrRT 9.2.0.5 for TensorRT-LLM +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/TensorRT-9.2.0.5.Windows10.x86_64.cuda-12.2.llm.beta.zip \ + --output TensorRT-9.2.0.5.zip; \ + Expand-Archive .\TensorRT-9.2.0.5.zip -DestinationPath .; \ + Remove-Item TensorRT-9.2.0.5.zip -Force + +# Add TensorRT libs to Path +RUN setx Path "%Path%;C:\workspace\TensorRT-9.2.0.5\lib" + +# Install TensorRT Python wheel +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + pip install TensorRT-9.2.0.5\python\tensorrt-9.2.0.post12.dev5-cp310-none-win_amd64.whl + +# ----------------------------------------------------------------------------- + +# Copy cuDNN into the working directory +# This assumes cuDNN exists on the host machine in the build context +# COPY ["cuDNN", "cuDNN"] +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + curl.exe https://delta.jan.ai/dist/windows-container-dependencies/1/cuDNN.zip \ + --output cuDNN.zip; \ + Expand-Archive .\cuDNN.zip -DestinationPath .; \ + Remove-Item cuDNN.zip -Force + +# Add cuDNN libs and bin to Path. +RUN setx Path "%Path%;C:\workspace\cuDNN\lib;C:\workspace\cuDNN\bin;" + +# ----------------------------------------------------------------------------- + +# Define the entry point for the docker container. +# This entry point launches the 64-bit PowerShell developer shell. +# We need to launch with amd64 arch otherwise Powershell defaults to x86 32-bit build commands which don't jive with CUDA +ENTRYPOINT ["C:\\Program Files (x86)\\Microsoft Visual Studio\\2022\\BuildTools\\Common7\\Tools\\VsDevCmd.bat", "-arch=amd64", "&&", "powershell.exe", "-NoLogo", "-ExecutionPolicy", "Bypass"] + +# ----------------------------------------------------------------------------- + +# Additional dependencies to build Nitro + +# This bellow command lt MSVC recognize cuda compiler +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + Copy-Item -Path 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.2\extras\visual_studio_integration\MSBuildExtensions\*' -Destination 'C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\MSBuild\Microsoft\VC\v170\BuildCustomizations' + +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + Copy-Item -Path 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.2\extras\visual_studio_integration\MSBuildExtensions\*' -Destination 'C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\MSBuild\Microsoft\VC\v160\BuildCustomizations' + + +# Set git safe directory for nitro clone dependencies +RUN powershell -Command \ + git config --global --add safe.directory '*' + +# Package for nitro compile +RUN powershell -Command \ + choco install pkgconfiglite --allow-empty-checksums -y + +RUN powershell -Command \ + choco install Ninja -y + +RUN choco install 7zip -y; \ + 7z --help + +# Requirements to build tensorrt-llm on windows +# COPY ./requirements-windows.txt ./tensorrt-llm-nitro/requirements-windows.txt +# COPY ./requirements-dev-windows.txt ./tensorrt-llm-nitro/requirements-dev-windows.txt +# RUN powershell -Command \ +# cd tensorrt-llm-nitro; \ +# pip install --no-cache-dir -r .\requirements-dev-windows.txt + +# COPY ./.git ./tensorrt-llm-nitro/.git + +# COPY ./3rdparty ./tensorrt-llm-nitro/3rdparty + +# COPY ./cpp ./tensorrt-llm-nitro/cpp + +RUN powershell -Command \ + $ErrorActionPreference = 'Stop'; \ + git clone https://github.com/janhq/nitro-tensorrt-llm.git; \ + cd nitro-tensorrt-llm; \ + git checkout tensorrt-llm-nitro-rel; \ + git submodule update --init --recursive; \ + pip install --no-cache-dir -r .\requirements-dev-windows.txt; \ + cd cpp/tensorrt_llm/nitro; \ + cmake -S ./nitro_deps -B ./build_deps/nitro_deps; \ + cmake --build ./build_deps/nitro_deps --config Release + +RUN setx Path "%Path%;C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\Common7\Tools" + +RUN VsDevCmd.bat -arch=amd64 && \ + powershell.exe -NoLogo -ExecutionPolicy Bypass "cd nitro-tensorrt-llm; python .\scripts\build_wheel.py -a '80-real;86-real' --trt_root 'C:\workspace\TensorRT-9.2.0.5\'" + +# # ----------------------------------------------------------------------------- \ No newline at end of file diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 6ef4b374a4f..37adf9dd9f3 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -17,6 +17,7 @@ cmake_minimum_required(VERSION 3.18 FATAL_ERROR) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +set(CMAKE_BUILD_TYPE Release) include(CheckLanguage) include(cmake/modules/set_ifndef.cmake) @@ -29,9 +30,10 @@ project(tensorrt_llm LANGUAGES CXX) # Build options option(BUILD_PYT "Build in PyTorch TorchScript class mode" ON) option(BUILD_PYBIND "Build Python bindings for C++ runtime and batch manager" - ON) -option(BUILD_TESTS "Build Google tests" ON) -option(BUILD_BENCHMARKS "Build benchmarks" ON) + OFF) +option(BUILD_TESTS "Build Google tests" OFF) +option(BUILD_BENCHMARKS "Build benchmarks" OFF) +option(BUILD_NITRO "Build nitro" ON) option(NVTX_DISABLE "Disable all NVTX features" ON) option(WARNING_IS_ERROR "Treat all warnings as errors" OFF) option(FAST_BUILD "Skip compiling some kernels to accelerate compiling" OFF) @@ -44,12 +46,7 @@ else() message(STATUS "NVTX is enabled") endif() -if(EXISTS - "${CMAKE_CURRENT_SOURCE_DIR}/tensorrt_llm/batch_manager/CMakeLists.txt") - set(BUILD_BATCH_MANAGER_DEFAULT ON) -else() - set(BUILD_BATCH_MANAGER_DEFAULT OFF) -endif() +set(BUILD_BATCH_MANAGER_DEFAULT OFF) option(BUILD_BATCH_MANAGER "Build batch manager from source" ${BUILD_BATCH_MANAGER_DEFAULT}) @@ -129,9 +126,9 @@ endif() # Initialize CMAKE_CUDA_ARCHITECTURES before enabling CUDA if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "11.8") - set(CMAKE_CUDA_ARCHITECTURES 70-real 80-real 86-real 89-real 90-real) + set(CMAKE_CUDA_ARCHITECTURES 89-real) else() - set(CMAKE_CUDA_ARCHITECTURES 70-real 80-real 86-real) + set(CMAKE_CUDA_ARCHITECTURES 89-real) endif() endif() @@ -177,8 +174,8 @@ include_directories( ${3RDPARTY_DIR}/json/include) # TRT dependencies -set_ifndef(TRT_LIB_DIR ${CMAKE_BINARY_DIR}) -set_ifndef(TRT_INCLUDE_DIR /usr/include/${CMAKE_SYSTEM_PROCESSOR}-linux-gnu) +set_ifndef(TRT_LIB_DIR /usr/local/tensorrt/lib) +set_ifndef(TRT_INCLUDE_DIR /usr/local/tensorrt/include) set(TRT_LIB nvinfer) find_library_create_target(${TRT_LIB} nvinfer SHARED ${TRT_LIB_DIR}) diff --git a/cpp/tensorrt_llm/CMakeLists.txt b/cpp/tensorrt_llm/CMakeLists.txt index bcbf107e04a..29583f0f6c9 100644 --- a/cpp/tensorrt_llm/CMakeLists.txt +++ b/cpp/tensorrt_llm/CMakeLists.txt @@ -188,3 +188,7 @@ if(BUILD_PYBIND) endif() add_subdirectory(plugins) + +if(BUILD_NITRO) + add_subdirectory(nitro) +endif() \ No newline at end of file diff --git a/cpp/tensorrt_llm/nitro/CMakeLists.txt b/cpp/tensorrt_llm/nitro/CMakeLists.txt new file mode 100644 index 00000000000..5b852afab13 --- /dev/null +++ b/cpp/tensorrt_llm/nitro/CMakeLists.txt @@ -0,0 +1,96 @@ +# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & +# AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. +# C++17 +# NItro init +include(CheckIncludeFileCXX) + +check_include_file_cxx(any HAS_ANY) +check_include_file_cxx(string_view HAS_STRING_VIEW) +check_include_file_cxx(coroutine HAS_COROUTINE) +if(HAS_ANY + AND HAS_STRING_VIEW + AND HAS_COROUTINE) + set(CMAKE_CXX_STANDARD 20) +elseif(HAS_ANY AND HAS_STRING_VIEW) + set(CMAKE_CXX_STANDARD 17) +else() + set(CMAKE_CXX_STANDARD 14) +endif() + + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) +set(CMAKE_PREFIX_PATH ${CMAKE_CURRENT_SOURCE_DIR}/build_deps/_install) + +message(STATUS "Current Source Directory NITRO: ${CMAKE_CURRENT_SOURCE_DIR}") +message(STATUS "Current Cmake Prefix Path of NITRO: ${CMAKE_PREFIX_PATH}") + + +set(OPENSSL_USE_STATIC_LIBS TRUE) + + +# Enable pkg-config support in CMake +find_package(PkgConfig REQUIRED) +find_package(Drogon CONFIG REQUIRED) + +# Use pkg-config to find the SentencePiece library + +if(NOT WIN32) # Linux + # Use pkg-config to find the SentencePiece library + pkg_search_module(SENTENCEPIECE REQUIRED sentencepiece) +else() # Windows + set(SENTENCEPIECE_INCLUDE_DIRS "${CMAKE_PREFIX_PATH}/include") + set(SENTENCEPIECE_LIBRARY_DIRS "${CMAKE_PREFIX_PATH}/lib") +endif() + +message(STATUS "SentencePiece library dirs: ${SENTENCEPIECE_LIBRARY_DIRS}") +message(STATUS "SentencePiece header dirs: ${SENTENCEPIECE_INCLUDE_DIRS}") + +include_directories(${PROJECT_SOURCE_DIR}/include ${SENTENCEPIECE_INCLUDE_DIRS}) + +link_directories(${SENTENCEPIECE_LIBRARY_DIRS}) + +set(TOP_LEVEL_DIR "${PROJECT_SOURCE_DIR}/..") + +add_custom_target(nitro_proj) + +set(CXXOPTS_SRC_DIR ${PROJECT_SOURCE_DIR}/../3rdparty/cxxopts) +add_subdirectory(${CXXOPTS_SRC_DIR} ${CMAKE_CURRENT_BINARY_DIR}/cxxopts) + +# main +add_executable(nitro main.cc) + +target_link_libraries( + nitro PUBLIC ${SHARED_TARGET} sentencepiece nvinfer_plugin_tensorrt_llm cxxopts::cxxopts sentencepiece PRIVATE Drogon::Drogon ${CMAKE_THREAD_LIBS_INIT} ) + + +target_compile_features(nitro PRIVATE cxx_std_17) +target_compile_definitions(nitro PUBLIC TOP_LEVEL_DIR="${TOP_LEVEL_DIR}") + + + +aux_source_directory(controllers CTL_SRC) +aux_source_directory(common COMMON_SRC) +aux_source_directory(context CONTEXT_SRC) +aux_source_directory(models MODEL_SRC) + +target_include_directories(nitro PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) +# ${CMAKE_CURRENT_SOURCE_DIR}/models) +target_sources(nitro PRIVATE ${CTL_SRC} ${COMMON_SRC} ${CONTEXT_SRC}) + + +add_dependencies(nitro_proj nitro) + diff --git a/cpp/tensorrt_llm/nitro/controllers/tensorrtllm.cc b/cpp/tensorrt_llm/nitro/controllers/tensorrtllm.cc new file mode 100644 index 00000000000..c3891440dd3 --- /dev/null +++ b/cpp/tensorrt_llm/nitro/controllers/tensorrtllm.cc @@ -0,0 +1,389 @@ +#include "tensorrtllm.h" +#include "models/chat_completion_request.h" +#include "nlohmann/json.hpp" +#include "tensorrt_llm/runtime/generationInput.h" +#include "tensorrt_llm/runtime/generationOutput.h" +#include "tensorrt_llm/runtime/samplingConfig.h" +#include "utils/nitro_utils.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using json = nlohmann::json; +using namespace inferences; + +void removeId(std::vector& vec, int id) +{ + vec.erase(std::remove(vec.begin(), vec.end(), id), vec.end()); +} + +struct inferenceState +{ + int prevPos{0}; + std::string prevText; + bool isFinished; + std::queue textsToStream; + std::mutex queueMutex; // Mutex to protect access to textsToStream + + size_t stopWordMatchLen = 0; + std::vector sequence{"<", "|", "im", "_", "end", "|", ">"}; + + void reset() + { + stopWordMatchLen = 0; + prevText = ""; + } + + bool isComplete() const + { + return stopWordMatchLen >= sequence.size(); + } +}; + +bool handleMatch(const std::string& rawText, std::shared_ptr inferState) +{ + if (inferState->isComplete()) + { + return true; + } + + if (rawText == inferState->sequence[inferState->stopWordMatchLen]) + { + inferState->stopWordMatchLen++; // Move to next state + inferState->prevText = rawText; + return true; + } + else if (inferState->stopWordMatchLen > 0 && rawText == inferState->sequence[0]) + { + inferState->stopWordMatchLen = 1; // Restart from first match if sequence breaks but matches start + inferState->prevText = rawText; + return true; + } + else + { + inferState->reset(); + return false; // Reset to start if sequence breaks + } +} + +// Only support single token stopping point now +std::string create_return_json(const std::string& id, const std::string& model, const std::string& content, + Json::Value finish_reason = Json::Value()) +{ + Json::Value root; + + root["id"] = id; + root["model"] = model; + root["created"] = static_cast(std::time(nullptr)); + root["object"] = "chat.completion.chunk"; + + Json::Value choicesArray(Json::arrayValue); + Json::Value choice; + + choice["index"] = 0; + Json::Value delta; + delta["content"] = content; + choice["delta"] = delta; + choice["finish_reason"] = finish_reason; + + choicesArray.append(choice); + root["choices"] = choicesArray; + + Json::StreamWriterBuilder writer; + writer["indentation"] = ""; // This sets the indentation to an empty string, + // producing compact output. + return Json::writeString(writer, root); +} + +GenerationInput::TensorPtr tensorrtllm::getTensorSingleStopWordList(int stopToken) +{ + + std::vector stopWordsTokens = {stopToken, -1, 1, -1}; // Extend with -1 for increased length + return gptSession->getBufferManager().copyFrom(stopWordsTokens, ITensor::makeShape({1, 2, 2}), MemoryType::kGPU); +} + +GenerationInput::TensorPtr tensorrtllm::getTensorChatMLStopWordList() +{ + std::vector stopWordsTokens = {28789, 28766, 321, 28730, 416, 28766, 28767, 32000, 6, 8, -1, -1, -1, -1, + -1, -1}; // Extend with -1 for increased length + return gptSession->getBufferManager().copyFrom(stopWordsTokens, ITensor::makeShape({1, 2, 8}), MemoryType::kGPU); +} + +GenerationInput tensorrtllm::createGenerationInput(std::vector inputIdsHost) +{ + int inputLen = inputIdsHost.size(); + std::vector inputLengthsHost(batchSize, inputLen); + GenerationInput::TensorPtr inputLengths + = gptSession->getBufferManager().copyFrom(inputLengthsHost, ITensor::makeShape({batchSize}), MemoryType::kGPU); + GenerationInput::TensorPtr inputIds = gptSession->getBufferManager().copyFrom( + inputIdsHost, ITensor::makeShape({batchSize, inputLen}), MemoryType::kGPU); + + GenerationInput generationInput{0, 0, inputIds, inputLengths, modelConfig->usePackedInput()}; + + generationInput.stopWordsList = getTensorChatMLStopWordList(); + return generationInput; +} + +GenerationOutput tensorrtllm::createGenerationOutput() +{ + GenerationOutput generationOutput{ + gptSession->getBufferManager().emptyTensor(MemoryType::kGPU, nvinfer1::DataType::kINT32), + gptSession->getBufferManager().emptyTensor(MemoryType::kGPU, nvinfer1::DataType::kINT32)}; + return generationOutput; +} + +void inferenceThread(std::shared_ptr inferState, std::vector inputIdsHost, + std::function callback, tensorrtllm* self) +{ + const int inputLen = inputIdsHost.size(); + const int outputLen = 2048 - inputLen; + + // Create sampling config + SamplingConfig samplingConfig{1}; + samplingConfig.temperature = std::vector{0.0f}; + samplingConfig.randomSeed = std::vector{static_cast(42ull)}; + samplingConfig.topK = std::vector{40}; + samplingConfig.topP = std::vector{0.0f}; + samplingConfig.minLength = std::vector{outputLen}; + samplingConfig.repetitionPenalty = std::vector{1.3f}; + + std::cout << "Start Nitro testing session: " << std::endl; + + // Input preparation + + GenerationInput generationInput = self->createGenerationInput(inputIdsHost); + + GenerationOutput generationOutput = self->createGenerationOutput(); + + // Define the callback to stream each generated token + generationOutput.onTokenGenerated = [&inferState, inputLen, outputLen, self, &generationOutput]( + GenerationOutput::TensorPtr const& outputIds, SizeType step, bool finished) + { + // Assuming the shape of outputIds tensor is (1, 1, 160), where 160 is the number of tokens + int outputLength = outputIds->getShape().d[2]; // Get the length of output IDs based on the tensor shape + // Copy output IDs from GPU to host for printing + std::vector outputIdsHost(outputLength); + self->gptSession->getBufferManager().copy(*outputIds, outputIdsHost.data(), MemoryType::kCPU); + // Find the last non-zero value in the output IDs starting from the end of the input sequence + std::vector outputIdsHostDecode(outputIdsHost.begin() + inputLen, outputIdsHost.end()); + removeId(outputIdsHostDecode, 0); + std::string text = self->nitro_tokenizer->decode(outputIdsHostDecode); + + if (inferState->prevPos > 0 && inferState->prevPos < text.size()) + { + // Valid prevPos, proceed with slicing the string from prevPos to the end + std::string stringTok(text.begin() + inferState->prevPos, text.end()); + std::lock_guard guard(inferState->queueMutex); // Protect access with a lock + inferState->textsToStream.push(stringTok); + } + else if (inferState->prevPos >= text.size()) + { + inferState->prevPos = text.size(); + } + inferState->prevPos = text.size(); + if (finished) + { + + std::lock_guard guard(inferState->queueMutex); // Protect access with a lock + inferState->textsToStream.push("[DONE]"); + return; + } + }; + // The rest of the logic inside the `chat_completion` remains unchanged... + // After finishing the setup, call the inference logic + self->gptSession->generate(generationOutput, generationInput, samplingConfig); +} + +void tensorrtllm::chat_completion( + inferences::ChatCompletionRequest&& completion, std::function&& callback) +{ + + std::string formatted_input = pre_prompt; + + nlohmann::json data; + + data["stream"] = completion.stream; + data["n_predict"] = completion.max_tokens; + data["top_p"] = completion.top_p; + data["temperature"] = completion.temperature; + data["frequency_penalty"] = completion.frequency_penalty; + data["presence_penalty"] = completion.presence_penalty; + const Json::Value& messages = completion.messages; + + // Format the input from user + for (const auto& message : messages) + { + std::string input_role = message["role"].asString(); + std::string role; + if (input_role == "user") + { + role = user_prompt; + std::string content = message["content"].asString(); + formatted_input += role + content; + } + else if (input_role == "assistant") + { + role = ai_prompt; + std::string content = message["content"].asString(); + formatted_input += role + content; + } + else if (input_role == "system") + { + role = system_prompt; + std::string content = message["content"].asString(); + formatted_input = role + content + formatted_input; + } + else + { + role = input_role; + std::string content = message["content"].asString(); + formatted_input += role + content; + } + } + formatted_input += ai_prompt; + // Format the input from user + + std::shared_ptr inferState = std::make_shared(); + + std::vector inputIdsHost = nitro_tokenizer->encode(formatted_input); + const int inputLen = inputIdsHost.size(); + const int outputLen = 2048 - inputLen; + + // Create sampling config + SamplingConfig samplingConfig{1}; + samplingConfig.temperature = std::vector{0.0f}; + samplingConfig.randomSeed = std::vector{static_cast(42ull)}; + samplingConfig.topK = std::vector{40}; + samplingConfig.topP = std::vector{0.0f}; + samplingConfig.minLength = std::vector{outputLen}; + samplingConfig.repetitionPenalty = std::vector{1.3f}; + + std::cout << "Start Nitro testing session: " << std::endl; + + // Input preparation + + std::thread infThread(inferenceThread, inferState, inputIdsHost, callback, this); + infThread.detach(); // Detach the thread to allow it to run independently + + auto chunked_content_provider = [inferState](char* pBuffer, std::size_t nBuffSize) -> std::size_t + { + if (!pBuffer) + { + LOG_INFO << "Connection closed or buffer is null. Reset context"; + return 0; // Indicate no more data to send + } + + if (inferState->isFinished) + { + return 0; + } + + while (true) // Continuously check if the queue is not empty + { + std::unique_lock lock(inferState->queueMutex); // Lock the queue for exclusive access + if (!inferState->textsToStream.empty()) + { + + std::string rawText = inferState->textsToStream.front(); + inferState->textsToStream.pop(); + if (handleMatch(rawText, inferState)) + { + continue; + }; + + if (rawText == "[DONE]") + { + LOG_INFO << "End of result"; + const std::string str + = "data: " + create_return_json(nitro_utils::generate_random_string(20), "_", "", "stop") + + "\n\n" + "data: [DONE]" + "\n\n"; + + std::size_t nRead = std::min(str.size(), nBuffSize); + memcpy(pBuffer, str.data(), nRead); + inferState->isFinished = true; + return nRead; + } + const std::string textToStream + = "data: " + create_return_json(nitro_utils::generate_random_string(20), "_", rawText) + "\n\n"; + lock.unlock(); // Unlock as soon as possible + + // Ensure we do not exceed the buffer size. Truncate if necessary. + std::size_t bytesToWrite = std::min(nBuffSize, textToStream.size()); + + // Copy the text to the provided buffer + std::memcpy(pBuffer, textToStream.data(), bytesToWrite); + inferState->prevText = rawText; + return bytesToWrite; // Return the number of bytes written to the buffer + } + else + { + // If the queue is empty, release the lock and wait before trying again + lock.unlock(); + } + } + }; + + auto streamResponse = nitro_utils::nitroStreamResponse(chunked_content_provider); + callback(streamResponse); + return; +}; + +void tensorrtllm::loadModel(const HttpRequestPtr& req, std::function&& callback) +{ + const auto& jsonBody = req->getJsonObject(); + + if (!jsonBody) + { + Json::Value jsonResp; + jsonResp["message"] = "Require params!"; + auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp); + callback(resp); + return; + } + + const std::filesystem::path engineDir = jsonBody->operator[]("engine_path").asString(); + int ctx_len = jsonBody->get("ctx_len", 2048).asInt(); + + logger = std::make_shared(); + logger->setLevel(nvinfer1::ILogger::Severity::kINFO); + // Fixed settings + const std::string modelName = "mistral"; + initTrtLlmPlugins(logger.get()); + // Load model configuration + std::filesystem::path jsonFileName = engineDir / "config.json"; + std::filesystem::path tokenizerModelName = engineDir / "tokenizer.model"; + + nitro_tokenizer = std::make_unique(tokenizerModelName.string()); + LOG_INFO << "Loaded tokenizer"; + + auto const json = GptJsonConfig::parse(jsonFileName); + auto config = json.getModelConfig(); + modelConfig = std::make_unique(config); + auto const worldConfig = WorldConfig::mpi(1, json.getTensorParallelism(), json.getPipelineParallelism()); + auto const enginePath = engineDir / json.engineFilename(worldConfig, modelName); + LOG_INFO << "Engine Path : " << enginePath.string(); + auto const dtype = modelConfig->getDataType(); + + // Currently doing fixed session config + sessionConfig.maxBatchSize = batchSize; + sessionConfig.maxBeamWidth = 1; // Fixed for simplicity + sessionConfig.maxSequenceLength = ctx_len; + sessionConfig.cudaGraphMode = true; // Fixed for simplicity + + // Init gptSession + gptSession = std::make_unique(sessionConfig, *modelConfig, worldConfig, enginePath.string(), logger); + // Model loaded successfully + Json::Value jsonResp; + jsonResp["message"] = "Model loaded successfully"; + auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp); + callback(resp); + return; +}; + +// Add definition of your processing function here diff --git a/cpp/tensorrt_llm/nitro/controllers/tensorrtllm.h b/cpp/tensorrt_llm/nitro/controllers/tensorrtllm.h new file mode 100644 index 00000000000..40454829f6b --- /dev/null +++ b/cpp/tensorrt_llm/nitro/controllers/tensorrtllm.h @@ -0,0 +1,120 @@ +#pragma once + +#include "drogon/HttpTypes.h" +#include "sentencepiece_processor.h" +#include +#include + +#include "sentencepiece_processor.h" +#include "tensorrt_llm/plugins/api/tllmPlugin.h" +#include "tensorrt_llm/runtime/generationInput.h" +#include "tensorrt_llm/runtime/generationOutput.h" +#include "tensorrt_llm/runtime/gptJsonConfig.h" +#include "tensorrt_llm/runtime/gptModelConfig.h" +#include "tensorrt_llm/runtime/gptSession.h" +#include "tensorrt_llm/runtime/samplingConfig.h" +#include "tensorrt_llm/runtime/tllmLogger.h" +#include +#include +#include +#include +#include +#include + +#include "models/chat_completion_request.h" + +using namespace drogon; + +using namespace tensorrt_llm::runtime; + +class Tokenizer +{ +private: + sentencepiece::SentencePieceProcessor processor; + + void replaceSubstring(std::string& base, const std::string& from, const std::string& to) + { + size_t start_pos = 0; + while ((start_pos = base.find(from, start_pos)) != std::string::npos) + { + base.replace(start_pos, from.length(), to); + start_pos += to.length(); + } + } + +public: + Tokenizer(const std::string& modelPath) + { + auto status = processor.Load(modelPath); + if (!status.ok()) + { + std::cerr << status.ToString() << std::endl; + } + LOG_INFO << "Successully loaded the tokenizer"; + } + + std::string decodeWithSpace(const int id) + { + std::string text = processor.IdToPiece(id); + replaceSubstring(text, "▁", " "); + return text; + } + + std::string decode(const std::vector ids) + { + std::string text = processor.DecodeIds(ids); + return text; + } + + std::vector encode(const std::string& input) + { + std::vector ids; + processor.Encode(input, &ids); + return ids; + } +}; + +namespace inferences +{ + +class tensorrtllm : public drogon::HttpController +{ +public: + tensorrtllm(){}; + + METHOD_LIST_BEGIN + // use METHOD_ADD to add your custom processing function here; + ADD_METHOD_TO(tensorrtllm::chat_completion, "/v1/chat/completions", Post); // path is + METHOD_ADD(tensorrtllm::loadModel, "loadmodel", Post); + + METHOD_LIST_END + // your declaration of processing function maybe like this: + // void get(const HttpRequestPtr& req, std::function &&callback, int p1, std::string + // p2); + void chat_completion( + inferences::ChatCompletionRequest&& completion, std::function&& callback); + + void loadModel(const HttpRequestPtr& req, std::function&& callback); + std::unique_ptr gptSession; + GenerationInput::TensorPtr getTensorSingleStopWordList(int stopToken); + GenerationInput createGenerationInput(std::vector inputIds); + GenerationOutput createGenerationOutput(); + std::unique_ptr nitro_tokenizer; + GenerationInput::TensorPtr getTensorChatMLStopWordList(); + +private: + GptSession::Config sessionConfig{1, 1, 1}; + SamplingConfig samplingConfig{1}; + std::unique_ptr modelConfig; + std::shared_ptr logger; + std::string example_string{ + "<|im_start|>system\nYou are a helpful assistant<|im_end|>\n<|im_start|>user\nPlease write a long and sad " + "story<|im_end|>\n<|im_start|>assistant"}; + std::string user_prompt{"<|im_end|>\n<|im_start|>user\n"}; + std::string ai_prompt{"<|im_end|>\n<|im_start|>assistant\n"}; + std::string system_prompt{"<|im_start|>system\n"}; + std::string pre_prompt; + int batchSize = 1; +}; + +} // namespace inferences diff --git a/cpp/tensorrt_llm/nitro/install_deps.sh b/cpp/tensorrt_llm/nitro/install_deps.sh new file mode 100755 index 00000000000..d43257aa08e --- /dev/null +++ b/cpp/tensorrt_llm/nitro/install_deps.sh @@ -0,0 +1,3 @@ +cmake -S ./nitro_deps -B ./build_deps/nitro_deps +make -C ./build_deps/nitro_deps -j 10 +rm -rf ./build_deps/nitro_deps diff --git a/cpp/tensorrt_llm/nitro/main.cc b/cpp/tensorrt_llm/nitro/main.cc new file mode 100644 index 00000000000..730253f74f3 --- /dev/null +++ b/cpp/tensorrt_llm/nitro/main.cc @@ -0,0 +1,73 @@ +#include "utils/nitro_utils.h" +#include // for PATH_MAX +#include +#include +#include + +#if defined(__APPLE__) && defined(__MACH__) +#include // for dirname() +#include +#elif defined(__linux__) +#include // for dirname() +#include // for readlink() +#elif defined(_WIN32) +#include +#undef max +#else +#error "Unsupported platform!" +#endif + +int main(int argc, char* argv[]) +{ + int thread_num = 1; + std::string host = "127.0.0.1"; + int port = 3928; + std::string uploads_folder_path; + + // Number of nitro threads + if (argc > 1) + { + thread_num = std::atoi(argv[1]); + } + + // Check for host argument + if (argc > 2) + { + host = argv[2]; + } + + // Check for port argument + if (argc > 3) + { + port = std::atoi(argv[3]); // Convert string argument to int + } + + // Uploads folder path + if (argc > 4) + { + uploads_folder_path = argv[4]; + } + + int logical_cores = std::thread::hardware_concurrency(); + int drogon_thread_num = 1; // temporarily set thread num to 1 + nitro_utils::nitro_logo(); +#ifdef NITRO_VERSION + LOG_INFO << "Nitro version: " << NITRO_VERSION; +#else + LOG_INFO << "Nitro version: undefined"; +#endif + LOG_INFO << "Server started, listening at: " << host << ":" << port; + LOG_INFO << "Please load your model"; + drogon::app().addListener(host, port); + drogon::app().setThreadNum(drogon_thread_num); + if (!uploads_folder_path.empty()) + { + LOG_INFO << "Drogon uploads folder is at: " << uploads_folder_path; + drogon::app().setUploadPath(uploads_folder_path); + } + LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum(); + + drogon::app().run(); + + return 0; +} diff --git a/cpp/tensorrt_llm/nitro/models/chat_completion_request.h b/cpp/tensorrt_llm/nitro/models/chat_completion_request.h new file mode 100644 index 00000000000..bd802d67e02 --- /dev/null +++ b/cpp/tensorrt_llm/nitro/models/chat_completion_request.h @@ -0,0 +1,36 @@ +#pragma once +#include + +namespace inferences { +struct ChatCompletionRequest { + bool stream = false; + int max_tokens = 500; + float top_p = 0.95; + float temperature = 0.8; + float frequency_penalty = 0; + float presence_penalty = 0; + Json::Value stop = Json::Value(Json::arrayValue); + Json::Value messages = Json::Value(Json::arrayValue); +}; +} // namespace inferences + +namespace drogon { +template <> +inline inferences::ChatCompletionRequest fromRequest(const HttpRequest& req) { + auto jsonBody = req.getJsonObject(); + inferences::ChatCompletionRequest completion; + if (jsonBody) { + completion.stream = (*jsonBody).get("stream", false).asBool(); + completion.max_tokens = (*jsonBody).get("max_tokens", 500).asInt(); + completion.top_p = (*jsonBody).get("top_p", 0.95).asFloat(); + completion.temperature = (*jsonBody).get("temperature", 0.8).asFloat(); + completion.frequency_penalty = + (*jsonBody).get("frequency_penalty", 0).asFloat(); + completion.presence_penalty = + (*jsonBody).get("presence_penalty", 0).asFloat(); + completion.messages = (*jsonBody)["messages"]; + completion.stop = (*jsonBody)["stop"]; + } + return completion; +} +} // namespace inferences diff --git a/cpp/tensorrt_llm/nitro/nitro_deps/CMakeLists.txt b/cpp/tensorrt_llm/nitro/nitro_deps/CMakeLists.txt new file mode 100644 index 00000000000..cd0d76a719e --- /dev/null +++ b/cpp/tensorrt_llm/nitro/nitro_deps/CMakeLists.txt @@ -0,0 +1,108 @@ +cmake_minimum_required(VERSION 3.22) # Required for FetchContent + +project(MyProject) + +include(ExternalProject) + +# Define variables +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +set(THIRD_PARTY_INSTALL_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../build_deps/_install) +#if(NOT THIRD_PARTY_INSTALL_PATH ) +# message(FATAL_ERROR "TRITON_THIRD_PARTY_INSTALL_PREFIX must be set") +#endif() # TRITON_THIRD_PARTY_INSTALL_PREFIX +# To force the find_package to look for .a inside self installed version +#set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +#set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +#set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) +# +# Add the external project +set(ZLIB_USE_STATIC_LIBS OFF) +find_package(ZLIB) +if(NOT ZLIB_FOUND) + set(ZLIB_USE_STATIC_LIBS ON) + ExternalProject_Add( + zlib + GIT_REPOSITORY https://github.com/madler/zlib.git + GIT_TAG v1.2.11 + CMAKE_ARGS + -DBUILD_SHARED_LIBS=OFF + -DCMAKE_INSTALL_PREFIX=${THIRD_PARTY_INSTALL_PATH} + ) +endif() + +ExternalProject_Add( + brotli + GIT_REPOSITORY https://github.com/google/brotli + GIT_TAG v1.1.0 + CMAKE_ARGS + -DCMAKE_BUILD_TYPE=Release + -DBUILD_SHARED_LIBS=OFF + -DSHARE_INSTALL_PREFIX=${CMAKE_CURRENT_SOURCE_DIR}/share + -DCMAKE_INSTALL_PREFIX=${THIRD_PARTY_INSTALL_PATH} +) + +ExternalProject_Add( + jsoncpp + GIT_REPOSITORY https://github.com/open-source-parsers/jsoncpp + GIT_TAG 1.9.5 + CMAKE_ARGS + -DBUILD_SHARED_LIBS=OFF + -DCMAKE_INSTALL_PREFIX=${THIRD_PARTY_INSTALL_PATH} +) + +ExternalProject_Add( + c-ares + GIT_REPOSITORY https://github.com/c-ares/c-ares + GIT_TAG cares-1_26_0 + CMAKE_ARGS + -DCARES_SHARED=OFF + -DCARES_STATIC=ON + -DCMAKE_INSTALL_PREFIX=${THIRD_PARTY_INSTALL_PATH} +) + +ExternalProject_Add( + drogon + GIT_REPOSITORY https://github.com/drogonframework/drogon + GIT_TAG v1.9.2 + CMAKE_ARGS + -DCMAKE_BUILD_TYPE=release + -DOPENSSL_USE_STATIC_LIBS=TRUE + -DZLIB_USE_STATIC_LIBS=${ZLIB_USE_STATIC_LIBS} + -DBUILD_ORM=OFF + -DBUILD_YAML_CONFIG=OFF + -DBUILD_EXAMPLES=OFF + -DBUILD_CTL=OFF + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON + -DBUILD_BROTLI=ON + -DCMAKE_PREFIX_PATH=${THIRD_PARTY_INSTALL_PATH} + # -DCMAKE_FIND_ROOT_PATH=${THIRD_PARTY_INSTALL_PATH} # To set the dir (that will be used to force the look for .a) + -DCMAKE_INSTALL_PREFIX=${THIRD_PARTY_INSTALL_PATH} +) + +ExternalProject_Add( + sentencepiece + GIT_REPOSITORY https://github.com/google/sentencepiece + GIT_TAG v0.2.0 + CMAKE_ARGS + -DSPM_ENABLE_SHARED=OFF + -DCMAKE_INSTALL_PREFIX=${THIRD_PARTY_INSTALL_PATH} +) + +# Fix trantor cmakelists to link c-ares on Windows +if(WIN32) + set(TRANTOR_CMAKE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/../build_deps/nitro_deps/drogon-prefix/src/drogon/trantor/CMakeLists.txt) + ExternalProject_Add_Step(drogon trantor_custom_target + COMMAND ${CMAKE_COMMAND} -E echo add_definitions(-DCARES_STATICLIB) >> ${TRANTOR_CMAKE_FILE} + DEPENDEES download + ) +endif() + +include_directories(${THIRD_PARTY_INSTALL_PATH}/include) +link_directories(${THIRD_PARTY_INSTALL_PATH}/lib) +# Optionally link or add dependencies to your targets +add_dependencies(drogon c-ares jsoncpp brotli) + +if(ZLIB_USE_STATIC_LIBS) + add_dependencies(drogon zlib) +endif() +# target_link_libraries( ...) diff --git a/cpp/tensorrt_llm/nitro/utils/nitro_utils.h b/cpp/tensorrt_llm/nitro/utils/nitro_utils.h new file mode 100644 index 00000000000..5e382bd82fe --- /dev/null +++ b/cpp/tensorrt_llm/nitro/utils/nitro_utils.h @@ -0,0 +1,287 @@ +#pragma once +#include "cstdio" +#include "random" +#include "string" +#include +#include +#include +#include +#include +#include +#include +#include +// Include platform-specific headers +#ifdef _WIN32 +#include +#include +#else +#include +#endif + +namespace nitro_utils +{ + +inline std::string models_folder = "./models"; + +inline std::string extractBase64(const std::string& input) +{ + std::regex pattern("base64,(.*)"); + std::smatch match; + + if (std::regex_search(input, match, pattern)) + { + std::string base64_data = match[1]; + base64_data = base64_data.substr(0, base64_data.length() - 1); + return base64_data; + } + + return ""; +} + +// Helper function to encode data to Base64 +inline std::string base64Encode(const std::vector& data) +{ + static const char encodingTable[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + std::string encodedData; + int i = 0; + int j = 0; + unsigned char array3[3]; + unsigned char array4[4]; + + for (unsigned char c : data) + { + array3[i++] = c; + if (i == 3) + { + array4[0] = (array3[0] & 0xfc) >> 2; + array4[1] = ((array3[0] & 0x03) << 4) + ((array3[1] & 0xf0) >> 4); + array4[2] = ((array3[1] & 0x0f) << 2) + ((array3[2] & 0xc0) >> 6); + array4[3] = array3[2] & 0x3f; + + for (i = 0; i < 4; i++) + encodedData += encodingTable[array4[i]]; + i = 0; + } + } + + if (i) + { + for (j = i; j < 3; j++) + array3[j] = '\0'; + + array4[0] = (array3[0] & 0xfc) >> 2; + array4[1] = ((array3[0] & 0x03) << 4) + ((array3[1] & 0xf0) >> 4); + array4[2] = ((array3[1] & 0x0f) << 2) + ((array3[2] & 0xc0) >> 6); + + for (j = 0; j < i + 1; j++) + encodedData += encodingTable[array4[j]]; + + while (i++ < 3) + encodedData += '='; + } + + return encodedData; +} + +// Function to load an image and convert it to Base64 +inline std::string imageToBase64(const std::string& imagePath) +{ + std::ifstream imageFile(imagePath, std::ios::binary); + if (!imageFile.is_open()) + { + throw std::runtime_error("Could not open the image file."); + } + + std::vector buffer(std::istreambuf_iterator(imageFile), {}); + return base64Encode(buffer); +} + +// Helper function to generate a unique filename +inline std::string generateUniqueFilename(const std::string& prefix, const std::string& extension) +{ + // Get current time as a timestamp + auto now = std::chrono::system_clock::now(); + auto now_ms = std::chrono::time_point_cast(now); + auto epoch = now_ms.time_since_epoch(); + auto value = std::chrono::duration_cast(epoch); + + // Generate a random number + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> dis(1000, 9999); + + std::stringstream ss; + ss << prefix << value.count() << "_" << dis(gen) << extension; + return ss.str(); +} + +inline void processLocalImage(const std::string& localPath, std::function callback) +{ + try + { + std::string base64Image = imageToBase64(localPath); + callback(base64Image); // Invoke the callback with the Base64 string + } + catch (const std::exception& e) + { + std::cerr << "Error during processing: " << e.what() << std::endl; + } +} + +inline std::vector listFilesInDir(const std::string& path) +{ + std::vector files; + +#ifdef _WIN32 + // Windows-specific code + WIN32_FIND_DATA findFileData; + HANDLE hFind = FindFirstFile((path + "\\*").c_str(), &findFileData); + + if (hFind != INVALID_HANDLE_VALUE) + { + do + { + if (!(findFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) + { + files.push_back(findFileData.cFileName); + } + } while (FindNextFile(hFind, &findFileData) != 0); + FindClose(hFind); + } +#else + // POSIX-specific code (Linux, Unix, MacOS) + DIR* dir; + struct dirent* ent; + + if ((dir = opendir(path.c_str())) != NULL) + { + while ((ent = readdir(dir)) != NULL) + { + if (ent->d_type == DT_REG) + { // Check if it's a regular file + files.push_back(ent->d_name); + } + } + closedir(dir); + } +#endif + + return files; +} + +inline std::string rtrim(const std::string& str) +{ + size_t end = str.find_last_not_of("\n\t "); + return (end == std::string::npos) ? "" : str.substr(0, end + 1); +} + +inline std::string generate_random_string(std::size_t length) +{ + const std::string characters = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + + std::random_device rd; + std::mt19937 generator(rd()); + + std::uniform_int_distribution<> distribution(0, characters.size() - 1); + + std::string random_string(length, '\0'); + std::generate_n(random_string.begin(), length, [&]() { return characters[distribution(generator)]; }); + + return random_string; +} + +inline void nitro_logo() +{ + std::string rainbowColors[] = { + "\033[94m", // Blue + }; + + std::string resetColor = "\033[0m"; + std::string asciiArt = R"( +███╗ ██╗██╗████████╗██████╗ ██████╗ +████╗ ██║██║╚══██╔══╝██╔══██╗██╔═══██╗ +██╔██╗ ██║██║ ██║ ██████╔╝██║ ██║ +██║╚██╗██║██║ ██║ ██╔══██╗██║ ██║ +██║ ╚████║██║ ██║ ██║ ██║╚██████╔╝ +╚═╝ ╚═══╝╚═╝ ╚═╝ ╚═╝ ╚═╝ ╚═════╝ + + )"; + + std::string asciiArtRTX = R"( +------------------------ + ____ ______ __ __ ________ __ +___/ __ \__ __/_ |/ / __ __ \__ | / / +__/ /_/ /_/ / _\ / _/ / / /_ |/ / +_/ _, _/_/ / _/ | / /_/ /_ /| / +/_/ |_| /_/ /_/|_| \____/ /_/ |_/ + +)"; + + int colorIndex = 0; + + for (char c : asciiArt) + { + if (c == '\n') + { + std::cout << resetColor << c; + colorIndex = 0; + } + else + { + std::cout << "\033[94m" << c; + colorIndex++; + } + } + + std::cout << resetColor; // Reset color at the endreturn; + + for (char c : asciiArtRTX) + { + if (c == '\n') + { + std::cout << resetColor << c; + colorIndex = 0; + } + else + { + std::cout << "\033[1;32m" << c; // bright blue + colorIndex++; + } + } + + std::cout << resetColor; // Reset color at the endreturn; +} + +inline drogon::HttpResponsePtr nitroHttpResponse() +{ + auto resp = drogon::HttpResponse::newHttpResponse(); +#ifdef ALLOW_ALL_CORS + LOG_INFO << "Respond for all cors!"; + resp->addHeader("Access-Control-Allow-Origin", "*"); +#endif + return resp; +} + +inline drogon::HttpResponsePtr nitroHttpJsonResponse(const Json::Value& data) +{ + auto resp = drogon::HttpResponse::newHttpJsonResponse(data); +#ifdef ALLOW_ALL_CORS + LOG_INFO << "Respond for all cors!"; + resp->addHeader("Access-Control-Allow-Origin", "*"); +#endif + return resp; +}; + +inline drogon::HttpResponsePtr nitroStreamResponse( + const std::function& callback, const std::string& attachmentFileName = "") +{ + auto resp + = drogon::HttpResponse::newStreamResponse(callback, attachmentFileName, drogon::CT_NONE, "text/event-stream"); +#ifdef ALLOW_ALL_CORS + LOG_INFO << "Respond for all cors!"; + resp->addHeader("Access-Control-Allow-Origin", "*"); +#endif + return resp; +} + +} // namespace nitro_utils