huggingface · regisss · May 6, 2024 · Dec 4, 2023 · Dec 5, 2023 · Dec 11, 2023
diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -5,14 +5,14 @@ body:
     id: system-info
     attributes:
       label: System Info
-      description: | 
+      description: |
         Please share your system info with us (`text-generation-launcher --env` if installed locally).
-        The full command line used that causes issues: 
+        The full command line used that causes issues:
         OS version:
         Rust version (if self-compiling, `cargo version`):
         Model being used (`curl 127.0.0.1:8080/info | jq`):
           If local model please explicit the kind of model and/or equivalents.
-        Hardware used (GPUs, how many, on which cloud) (`nvidia-smi`): 
+        Hardware used (GPUs, how many, on which cloud) (`nvidia-smi`):
         Deployment specificities (Kubernetes, EKS, AKS, any particular deployments):
         The current version being used:
 
@@ -52,11 +52,11 @@ body:
 
       placeholder: |
         Steps to reproduce the behavior:
-          
+
           1.
           2.
           3.
-          
+
 
   - type: textarea
     id: expected-behavior

diff --git a/.github/ISSUE_TEMPLATE/feature-request.yml b/.github/ISSUE_TEMPLATE/feature-request.yml
@@ -19,7 +19,7 @@ body:
       label: Motivation
       description: |
         Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too.
-        
+
 
   - type: textarea
     id: contribution

diff --git a/.github/workflows/autodocs.yml b/.github/workflows/autodocs.yml
@@ -6,15 +6,15 @@ on:
 jobs:
   update_docs:
     runs-on: ubuntu-latest
-    
+
     steps:
     - name: Checkout code
       uses: actions/checkout@v2
-    
+
     - name: Install Launcher
       id: install-launcher
       run: cargo install --git https://github.com/${{ github.repository }} --branch ${{ github.head_ref }} text-generation-launcher
-    
+
     - name: Check launcher Docs are up-to-date
       run: |
         echo text-generation-launcher --help

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
@@ -146,11 +146,50 @@ jobs:
           cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=min
           cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=min
 
+  integration-tests:
+    concurrency:
+      group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }}
+      cancel-in-progress: true
+    needs:
+      - start-runner
+      - build-and-push-image # Wait for the docker image to be built
+    runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
+    env:
+      DOCKER_VOLUME: /cache
+    steps:
+      - uses: actions/checkout@v2
+      - name: Inject slug/short variables
+        uses: rlespinasse/github-slug-action@v4.4.1
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.9
+      - name: Tailscale
+        uses: tailscale/github-action@7bd8039bf25c23c4ab1b8d6e2cc2da2280601966
+        with:
+          authkey: ${{ secrets.TAILSCALE_AUTHKEY }}
+      - name: Prepare disks
+        run: |
+          sudo mkfs -t ext4 /dev/nvme1n1
+          sudo mkdir ${{ env.DOCKER_VOLUME }}
+          sudo mount /dev/nvme1n1 ${{ env.DOCKER_VOLUME }}
+      - name: Install
+        run: |
+          make install-integration-tests
+      - name: Run tests
+        run: |
+          export DOCKER_IMAGE=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }}
+          export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+          pytest -s -vv integration-tests
+
   build-and-push-image-rocm:
     concurrency:
       group: ${{ github.workflow }}-build-and-push-image-rocm-${{ github.head_ref || github.run_id }}
       cancel-in-progress: true
-    needs: start-runner # required to start the main job when the runner is ready
+    needs:
+      - start-runner
+      - build-and-push-image # Wait for the main docker image to be built
+      - integration-tests # Wait for the main integration-tests
     runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
     permissions:
       contents: write
@@ -235,43 +274,6 @@ jobs:
           cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache-rocm,mode=min
           cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache-rocm,mode=min
 
-  integration-tests:
-    concurrency:
-      group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }}
-      cancel-in-progress: true
-    needs:
-      - start-runner
-      - build-and-push-image # Wait for the docker image to be built
-      - build-and-push-image-rocm
-    runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
-    env:
-      DOCKER_VOLUME: /cache
-    steps:
-      - uses: actions/checkout@v2
-      - name: Inject slug/short variables
-        uses: rlespinasse/github-slug-action@v4.4.1
-      - name: Set up Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: 3.9
-      - name: Tailscale
-        uses: tailscale/github-action@7bd8039bf25c23c4ab1b8d6e2cc2da2280601966
-        with:
-          authkey: ${{ secrets.TAILSCALE_AUTHKEY }}
-      - name: Prepare disks
-        run: |
-          sudo mkfs -t ext4 /dev/nvme1n1
-          sudo mkdir ${{ env.DOCKER_VOLUME }}
-          sudo mount /dev/nvme1n1 ${{ env.DOCKER_VOLUME }}
-      - name: Install
-        run: |
-          make install-integration-tests
-      - name: Run tests
-        run: |
-          export DOCKER_IMAGE=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }}
-          export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-          pytest -s -vv integration-tests
-
   stop-runner:
     name: Stop self-hosted EC2 runner
     needs:

diff --git a/.github/workflows/build_pr_documentation.yml b/.github/workflows/build_pr_documentation.yml
@@ -16,4 +16,4 @@ jobs:
       commit_sha: ${{ github.event.pull_request.head.sha }}
       pr_number: ${{ github.event.number }}
       package: text-generation-inference
-      additional_args: --not_python_module 
+      additional_args: --not_python_module
diff --git a/.github/workflows/delete_doc_comment.yml b/.github/workflows/delete_doc_comment.yml
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
@@ -0,0 +1,14 @@
+name: 'Close stale issues and PRs'
+on:
+  schedule:
+    - cron: '30 1 * * *'
+
+jobs:
+  stale:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/stale@v8
+        with:
+          stale-issue-message: 'This issue is stale because it has been open 30 days with no activity. Remove stale label or comment or this will be closed in 5 days.'
+          days-before-stale: 30
+          days-before-close: 5
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
@@ -33,11 +33,18 @@ jobs:
       - name: Install Rust
         uses: actions-rs/toolchain@v1
         with:
-          toolchain: 1.71.0
+          # Released on: 28 December, 2023
+          # Branched from master on: 10 November, 2023
+          # https://releases.rs/docs/1.75.0/
+          toolchain: 1.75.0
           override: true
           components: rustfmt, clippy
       - name: Install Protoc
         uses: arduino/setup-protoc@v1
+      - name: Clean unused files
+        run: |
+          sudo rm -rf /usr/local/lib/android # will release about 10 GB if you don't need Android
+          sudo rm -rf /usr/share/dotnet # will release about 20GB if you don't need .NET
       - name: Install sccache
         run: |
           curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache
@@ -68,12 +75,11 @@ jobs:
           pip install pytest
           export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }}
           pytest -s -vv server/tests
-      - name: Run Rust fmt
-        run: |
-          cargo fmt --check
-      - name: Run Rust clippy
+      - name: Pre-commit checks
         run: |
-          cargo clippy
+          pip install pre-commit
+          pre-commit install
+          pre-commit run --all-files
       - name: Run Rust tests
         run: |
           cargo test

diff --git a/.github/workflows/upload_pr_documentation.yml b/.github/workflows/upload_pr_documentation.yml
@@ -13,4 +13,4 @@ jobs:
       package_name: text-generation-inference
     secrets:
       hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
-      comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }}
+      comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }}
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,12 @@
 target
 router/tokenizer.json
 *__pycache__*
+
+# ROCm auto-generated files
+*.hip
+server/exllamav2_kernels/exllamav2_kernels/hip/
+server/exllama_kernels/exllama_kernels/hip/
+server/exllama_kernels/exllama_kernels/hip_func/
+*_hip.cuh
+server/exllama_kernels/exllama_kernels/hip_buffers.cuh
+server/exllama_kernels/exllama_kernels/exllama_ext_hip.cpp
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,18 @@
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+    -   id: check-yaml
+    -   id: end-of-file-fixer
+    -   id: trailing-whitespace
+        exclude: docs/source/basic_tutorials/launcher.md
+-   repo: https://github.com/psf/black
+    rev: 24.2.0
+    hooks:
+    -   id: black
+-   repo: https://github.com/doublify/pre-commit-rust
+    rev: v1.0
+    hooks:
+    -   id: fmt
+    -   id: cargo-check
+    -   id: clippy