Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 6a3fdf6

Browse files
authored
Linux cuda separate cpu instruction (#718)
Co-authored-by: Hien To <tominhhien97@gmail.com>
1 parent b34c62a commit 6a3fdf6

File tree

4 files changed

+122
-34
lines changed

4 files changed

+122
-34
lines changed

.github/workflows/cortex-cpp-build.yml

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -49,38 +49,62 @@ jobs:
4949
include:
5050
- os: "linux"
5151
name: "amd64-avx2"
52-
runs-on: "ubuntu-18-04"
52+
runs-on: "ubuntu-20-04"
5353
cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF"
5454
run-e2e: true
5555

5656
- os: "linux"
5757
name: "amd64-avx"
58-
runs-on: "ubuntu-18-04"
58+
runs-on: "ubuntu-20-04"
5959
cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF"
6060
run-e2e: false
6161

6262
- os: "linux"
6363
name: "amd64-avx512"
64-
runs-on: "ubuntu-18-04"
64+
runs-on: "ubuntu-20-04"
6565
cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF"
6666
run-e2e: false
6767

6868
- os: "linux"
6969
name: "amd64-vulkan"
70-
runs-on: "ubuntu-18-04-cuda-11-7"
70+
runs-on: "ubuntu-20-04-cuda-11-7"
7171
cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF"
7272
run-e2e: false
7373

7474
- os: "linux"
75-
name: "amd64-cuda-11-7"
76-
runs-on: "ubuntu-18-04-cuda-11-7"
77-
cmake-flags: "-DCUDA_11_7=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON"
75+
name: "amd64-avx2-cuda-11-7"
76+
runs-on: "ubuntu-20-04-cuda-11-7"
77+
cmake-flags: "-DCUDA_11_7=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX2=ON"
78+
run-e2e: false
79+
80+
- os: "linux"
81+
name: "amd64-avx-cuda-11-7"
82+
runs-on: "ubuntu-20-04-cuda-11-7"
83+
cmake-flags: "-DCUDA_11_7=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX2=OFF"
84+
run-e2e: false
85+
86+
- os: "linux"
87+
name: "amd64-avx512-cuda-11-7"
88+
runs-on: "ubuntu-20-04-cuda-11-7"
89+
cmake-flags: "-DCUDA_11_7=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX512=ON"
7890
run-e2e: false
7991

8092
- os: "linux"
81-
name: "amd64-cuda-12-0"
82-
runs-on: "ubuntu-18-04-cuda-12-0"
83-
cmake-flags: "-DCUDA_12_0=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON"
93+
name: "amd64-avx2-cuda-12-0"
94+
runs-on: "ubuntu-20-04-cuda-12-0"
95+
cmake-flags: "-DCUDA_12_0=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX2=ON"
96+
run-e2e: false
97+
98+
- os: "linux"
99+
name: "amd64-avx-cuda-12-0"
100+
runs-on: "ubuntu-20-04-cuda-12-0"
101+
cmake-flags: "-DCUDA_12_0=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX2=OFF"
102+
run-e2e: false
103+
104+
- os: "linux"
105+
name: "amd64-avx512-cuda-12-0"
106+
runs-on: "ubuntu-20-04-cuda-12-0"
107+
cmake-flags: "-DCUDA_12_0=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX512=ON"
84108
run-e2e: false
85109

86110
- os: "mac"

.github/workflows/cortex-cpp-quality-gate.yml

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,43 +24,71 @@ jobs:
2424
include:
2525
- os: "linux"
2626
name: "amd64-avx2"
27-
runs-on: "ubuntu-18-04"
27+
runs-on: "ubuntu-20-04"
2828
cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF"
2929
run-e2e: true
3030
run-python-e2e: true
3131

3232
- os: "linux"
3333
name: "amd64-avx"
34-
runs-on: "ubuntu-18-04"
34+
runs-on: "ubuntu-20-04"
3535
cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF"
3636
run-e2e: false
3737
run-python-e2e: false
3838

3939
- os: "linux"
4040
name: "amd64-avx512"
41-
runs-on: "ubuntu-18-04"
41+
runs-on: "ubuntu-20-04"
4242
cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF"
4343
run-e2e: false
4444
run-python-e2e: false
4545

4646
- os: "linux"
4747
name: "amd64-vulkan"
48-
runs-on: "ubuntu-18-04-cuda-11-7"
48+
runs-on: "ubuntu-20-04-cuda-11-7"
4949
cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF"
5050
run-e2e: false
5151
run-python-e2e: false
5252

5353
- os: "linux"
54-
name: "amd64-cuda-11-7"
55-
runs-on: "ubuntu-18-04-cuda-11-7"
56-
cmake-flags: "-DCUDA_11_7=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON"
54+
name: "amd64-avx2-cuda-11-7"
55+
runs-on: "ubuntu-20-04-cuda-11-7"
56+
cmake-flags: "-DCUDA_11_7=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX2=ON"
57+
run-e2e: false
58+
run-python-e2e: false
59+
60+
- os: "linux"
61+
name: "amd64-avx-cuda-11-7"
62+
runs-on: "ubuntu-20-04-cuda-11-7"
63+
cmake-flags: "-DCUDA_11_7=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX2=OFF"
64+
run-e2e: false
65+
run-python-e2e: false
66+
67+
- os: "linux"
68+
name: "amd64-avx512-cuda-11-7"
69+
runs-on: "ubuntu-20-04-cuda-11-7"
70+
cmake-flags: "-DCUDA_11_7=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX512=ON"
5771
run-e2e: false
5872
run-python-e2e: false
5973

6074
- os: "linux"
61-
name: "amd64-cuda-12-0"
62-
runs-on: "ubuntu-18-04-cuda-12-0"
63-
cmake-flags: "-DCUDA_12_0=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON"
75+
name: "amd64-avx2-cuda-12-0"
76+
runs-on: "ubuntu-20-04-cuda-12-0"
77+
cmake-flags: "-DCUDA_12_0=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX2=ON"
78+
run-e2e: false
79+
run-python-e2e: false
80+
81+
- os: "linux"
82+
name: "amd64-avx-cuda-12-0"
83+
runs-on: "ubuntu-20-04-cuda-12-0"
84+
cmake-flags: "-DCUDA_12_0=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX2=OFF"
85+
run-e2e: false
86+
run-python-e2e: false
87+
88+
- os: "linux"
89+
name: "amd64-avx512-cuda-12-0"
90+
runs-on: "ubuntu-20-04-cuda-12-0"
91+
cmake-flags: "-DCUDA_12_0=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX512=ON"
6492
run-e2e: false
6593
run-python-e2e: false
6694

.github/workflows/nightly-build.yml

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -50,38 +50,62 @@ jobs:
5050
include:
5151
- os: "linux"
5252
name: "amd64-avx2"
53-
runs-on: "ubuntu-18-04"
53+
runs-on: "ubuntu-20-04"
5454
cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF"
5555
run-e2e: true
5656

5757
- os: "linux"
5858
name: "amd64-avx"
59-
runs-on: "ubuntu-18-04"
59+
runs-on: "ubuntu-20-04"
6060
cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF"
6161
run-e2e: false
6262

6363
- os: "linux"
6464
name: "amd64-avx512"
65-
runs-on: "ubuntu-18-04"
65+
runs-on: "ubuntu-20-04"
6666
cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF"
6767
run-e2e: false
6868

6969
- os: "linux"
7070
name: "amd64-vulkan"
71-
runs-on: "ubuntu-18-04-cuda-11-7"
71+
runs-on: "ubuntu-20-04-cuda-11-7"
7272
cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF"
7373
run-e2e: false
7474

7575
- os: "linux"
76-
name: "amd64-cuda-11-7"
77-
runs-on: "ubuntu-18-04-cuda-11-7"
78-
cmake-flags: "-DCUDA_11_7=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON"
76+
name: "amd64-avx2-cuda-11-7"
77+
runs-on: "ubuntu-20-04-cuda-11-7"
78+
cmake-flags: "-DCUDA_11_7=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX2=ON"
79+
run-e2e: false
80+
81+
- os: "linux"
82+
name: "amd64-avx-cuda-11-7"
83+
runs-on: "ubuntu-20-04-cuda-11-7"
84+
cmake-flags: "-DCUDA_11_7=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX2=OFF"
85+
run-e2e: false
86+
87+
- os: "linux"
88+
name: "amd64-avx512-cuda-11-7"
89+
runs-on: "ubuntu-20-04-cuda-11-7"
90+
cmake-flags: "-DCUDA_11_7=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX512=ON"
7991
run-e2e: false
8092

8193
- os: "linux"
82-
name: "amd64-cuda-12-0"
83-
runs-on: "ubuntu-18-04-cuda-12-0"
84-
cmake-flags: "-DCUDA_12_0=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON"
94+
name: "amd64-avx2-cuda-12-0"
95+
runs-on: "ubuntu-20-04-cuda-12-0"
96+
cmake-flags: "-DCUDA_12_0=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX2=ON"
97+
run-e2e: false
98+
99+
- os: "linux"
100+
name: "amd64-avx-cuda-12-0"
101+
runs-on: "ubuntu-20-04-cuda-12-0"
102+
cmake-flags: "-DCUDA_12_0=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX2=OFF"
103+
run-e2e: false
104+
105+
- os: "linux"
106+
name: "amd64-avx512-cuda-12-0"
107+
runs-on: "ubuntu-20-04-cuda-12-0"
108+
cmake-flags: "-DCUDA_12_0=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DLLAMA_AVX512=ON"
85109
run-e2e: false
86110

87111
- os: "mac"

cortex-cpp/engines/cortex.llamacpp/engine.cmake

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,28 @@
11
# cortex.llamacpp release version
2-
set(VERSION 0.1.17-15.06.24)
2+
set(VERSION 0.1.18)
33
set(ENGINE_VERSION v${VERSION})
44
add_compile_definitions(CORTEX_LLAMACPP_VERSION="${VERSION}")
55

66
# MESSAGE("ENGINE_VERSION=" ${ENGINE_VERSION})
77

88
# Download library based on instructions
99
if(UNIX AND NOT APPLE)
10-
if(CUDA_12_0)
11-
set(LIBRARY_NAME cortex.llamacpp-${VERSION}-linux-amd64-cuda-12-0.tar.gz)
10+
if(CUDA_12_0)
11+
if(LLAMA_AVX512)
12+
set(LIBRARY_NAME cortex.llamacpp-${VERSION}-linux-amd64-avx512-cuda-12-0.tar.gz)
13+
elseif(NOT LLAMA_AVX2)
14+
set(LIBRARY_NAME cortex.llamacpp-${VERSION}-linux-amd64-avx-cuda-12-0.tar.gz)
15+
else()
16+
set(LIBRARY_NAME cortex.llamacpp-${VERSION}-linux-amd64-avx2-cuda-12-0.tar.gz)
17+
endif()
1218
elseif(CUDA_11_7)
13-
set(LIBRARY_NAME cortex.llamacpp-${VERSION}-linux-amd64-cuda-11-7.tar.gz)
19+
if(LLAMA_AVX512)
20+
set(LIBRARY_NAME cortex.llamacpp-${VERSION}-linux-amd64-avx512-cuda-11-7.tar.gz)
21+
elseif(NOT LLAMA_AVX2)
22+
set(LIBRARY_NAME cortex.llamacpp-${VERSION}-linux-amd64-avx-cuda-11-7.tar.gz)
23+
else()
24+
set(LIBRARY_NAME cortex.llamacpp-${VERSION}-linux-amd64-avx2-cuda-11-7.tar.gz)
25+
endif()
1426
elseif(LLAMA_VULKAN)
1527
set(LIBRARY_NAME cortex.llamacpp-${VERSION}-linux-amd64-vulkan.tar.gz)
1628
elseif(LLAMA_AVX512)

0 commit comments

Comments
 (0)