Skip to content

Pin TensorRT-LLM version #47

Pin TensorRT-LLM version

Pin TensorRT-LLM version #47

Workflow file for this run

name: GPU Enabled Integration Test on PRs
on:
pull_request:
branches:
- main
push:
branches:
- ci-*
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
env:
OPTIMUM_NVIDIA_IS_CI: ON
HF_HUB_ENABLE_HF_TRANSFER: ON
jobs:
run_gpu_tests:
strategy:
fail-fast: false
matrix:
config:
- name: GPU-enabled Optimum-Nvidia Test Suite
image: nvidia/cuda:12.4.1-devel-ubuntu22.04
gpu_target: ["nvidia-multi-gpu-l4-runners", "nvidia-multi-gpu-a10-runners"]
name: ${{ matrix.config.name }}
runs-on:
group: "${{matrix.gpu_target}}"
container:
image: ${{ matrix.config.image }}
options: --mount type=tmpfs,destination=/tmp --shm-size 64gb --gpus all --ipc host -v /mnt/hf_cache:/mnt/cache/
env:
HF_TOKEN: ${{ secrets.OPTIMUM_NVIDIA_HUB_READ_TOKEN }}
defaults:
run:
shell: bash
steps:
- uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Checkout optimum-nvidia
uses: actions/checkout@v4
with:
fetch-depth: 1
- name: Install dependencies
run: |
apt update && apt install -y openmpi-bin libopenmpi-dev git
python3 -m pip install --upgrade -e .[quality,tests] --pre --extra-index-url https://pypi.nvidia.com
- name: Run nvidia-smi
run: |
nvidia-smi
- name: Print TensorRT-LLM version
run: |
python -c "from tensorrt_llm import __version__; print(__version__)"
- name: Run optimum-nvidia test-suite
run: |
pytest -s -vvvvv -n 4 -p no:warnings -o log_cli=true --ignore=tests/integration tests/