Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into sl/moe-rework-2
Browse files Browse the repository at this point in the history
  • Loading branch information
slaren committed Apr 12, 2024
2 parents 47c3867 + fbbc030 commit 137fbb8
Show file tree
Hide file tree
Showing 83 changed files with 7,236 additions and 4,748 deletions.
20 changes: 13 additions & 7 deletions .github/workflows/bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,18 @@ jobs:
sleep 0.1
done
- name: Install k6
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: '1.21'

- name: Install k6 and xk6-sse
id: k6_installation
run: |
cd examples/server/bench
wget --quiet https://github.com/grafana/k6/releases/download/v0.49.0/k6-v0.49.0-linux-amd64.tar.gz
tar xzf k6*.tar.gz --strip-components=1
go install go.k6.io/xk6/cmd/xk6@latest
xk6 build master \
--with github.com/phymbert/xk6-sse
- name: Build
id: cmake_build
Expand Down Expand Up @@ -118,7 +124,7 @@ jobs:
cd examples/server/bench
source venv/bin/activate
BENCH_K6_BIN_PATH=./k6 python bench.py \
python bench.py \
--runner-label ${{ env.RUNNER_LABEL }} \
--name ${{ github.job }} \
--branch ${{ github.head_ref || github.ref_name }} \
Expand Down Expand Up @@ -228,9 +234,9 @@ jobs:
<summary>Expand details for performance related PR only</summary>
- Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }}
- HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(90)=${{ env.HTTP_REQ_DURATION_P_90_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
- Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_TOKENS_AVG }}tk/s p(90)=${{ env.LLAMACPP_PROMPT_TOKENS_P_90_ }}tk/s **total=${{ env.LLAMACPP_PROMPT_TOKENS_TOTAL_COUNTER_RATE }}tk/s**
- Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(90)=${{ env.LLAMACPP_TOKENS_SECOND_P_90_ }}tk/s **total=${{ env.LLAMACPP_COMPLETION_TOKENS_TOTAL_COUNTER_RATE }}tk/s**
- HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(95)=${{ env.HTTP_REQ_DURATION_P_95_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
- Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_95_ }}tk/s
- Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_TOKENS_SECOND_P_95_ }}tk/s
- ${{ env.BENCH_GRAPH_XLABEL }}
Expand Down
22 changes: 15 additions & 7 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ jobs:
id: cmake_test
run: |
cd build
ctest -L main --verbose --timeout 900
ctest -L 'main|curl' --verbose --timeout 900
- name: Determine tag name
id: tag
Expand Down Expand Up @@ -101,7 +101,9 @@ jobs:
sysctl -a
mkdir build
cd build
cmake -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON ..
# Metal is disabled due to intermittent failures with Github runners not having a GPU:
# https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
cmake -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL=OFF -DLLAMA_CURL=ON ..
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
- name: Test
Expand Down Expand Up @@ -209,21 +211,21 @@ jobs:
id: depends
run: |
sudo apt-get update
sudo apt-get install build-essential
sudo apt-get install build-essential libcurl4-openssl-dev
- name: Build
id: cmake_build
run: |
mkdir build
cd build
cmake .. -DLLAMA_FATAL_WARNINGS=ON
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON
cmake --build . --config Release -j $(nproc)
- name: Test
id: cmake_test
run: |
cd build
ctest -L main --verbose --timeout 900
ctest -L 'main|curl' --verbose --timeout 900
- name: Test llama2c conversion
id: llama2c_test
Expand Down Expand Up @@ -938,6 +940,12 @@ jobs:
- name: Download artifacts
id: download-artifact
uses: actions/download-artifact@v4
with:
path: ./artifact

- name: Move artifacts
id: move_artifacts
run: mkdir -p ./artifact/release && mv ./artifact/*/*.zip ./artifact/release

- name: Create release
id: create_release
Expand All @@ -956,15 +964,15 @@ jobs:
const path = require('path');
const fs = require('fs');
const release_id = '${{ steps.create_release.outputs.id }}';
for (let file of await fs.readdirSync('./artifact')) {
for (let file of await fs.readdirSync('./artifact/release')) {
if (path.extname(file) === '.zip') {
console.log('uploadReleaseAsset', file);
await github.repos.uploadReleaseAsset({
owner: context.repo.owner,
repo: context.repo.repo,
release_id: release_id,
name: file,
data: await fs.readFileSync(`./artifact/${file}`)
data: await fs.readFileSync(`./artifact/release/${file}`)
});
}
}
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ models-mnt
/convert-llama2c-to-ggml
/embd-input-test
/embedding
/eval-callback
/gguf
/gguf-llama-simple
/gguf-split
Expand Down

0 comments on commit 137fbb8

Please sign in to comment.