Skip to content

Commit cbe8ac2

Browse files
author
Arda Aytekin
committed
Add SIMD implementations via target_clones
1 parent e9c4ac9 commit cbe8ac2

File tree

4 files changed

+236
-0
lines changed

4 files changed

+236
-0
lines changed

CMakeLists.txt

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,36 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
1111
target_link_libraries(simdvec PRIVATE m)
1212
endif()
1313

14+
add_library(simdvecnoattr src/simdvec-attribute.c)
15+
target_compile_definitions(simdvecnoattr PRIVATE NO_ATTRIBUTE)
16+
target_compile_features(simdvecnoattr PRIVATE c_std_11)
17+
target_include_directories(simdvecnoattr PUBLIC include)
18+
if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
19+
target_link_libraries(simdvecnoattr PRIVATE m)
20+
endif()
21+
22+
add_library(simdvecattr src/simdvec-attribute.c)
23+
target_compile_features(simdvecattr PRIVATE c_std_11)
24+
target_include_directories(simdvecattr PUBLIC include)
25+
if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
26+
target_link_libraries(simdvecattr PRIVATE m)
27+
endif()
28+
1429
add_executable(bench src/benchmark.cpp)
1530
target_compile_features(bench PRIVATE cxx_std_11)
1631
target_link_libraries(bench PRIVATE simdvec)
1732
target_link_libraries(bench PRIVATE benchmark::benchmark)
1833

34+
add_executable(bench-noattr src/benchmark-attribute.cpp)
35+
target_compile_features(bench-noattr PRIVATE cxx_std_11)
36+
target_link_libraries(bench-noattr PRIVATE simdvecnoattr)
37+
target_link_libraries(bench-noattr PRIVATE benchmark::benchmark)
38+
39+
add_executable(bench-attr src/benchmark-attribute.cpp)
40+
target_compile_features(bench-attr PRIVATE cxx_std_11)
41+
target_link_libraries(bench-attr PRIVATE simdvecattr)
42+
target_link_libraries(bench-attr PRIVATE benchmark::benchmark)
43+
1944
if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
2045
find_package(GTest REQUIRED)
2146
enable_testing()

include/simdvec-attribute.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#ifndef __SIMDVEC_ATTRIBUTE_H__
2+
#define __SIMDVEC_ATTRIBUTE_H__
3+
4+
#ifdef __cplusplus
5+
extern "C" {
6+
#define restrict __restrict
7+
#endif
8+
9+
#include <stdint.h>
10+
11+
float l2_distance(const int16_t, const float *restrict, const float *restrict);
12+
float inner_product(const int16_t, const float *restrict,
13+
const float *restrict);
14+
float cosine_distance(const int16_t, const float *restrict,
15+
const float *restrict);
16+
float l1_distance(const int16_t, const float *restrict, const float *restrict);
17+
float l2_norm(const int16_t, const float *);
18+
19+
#ifdef __cplusplus
20+
}
21+
#endif
22+
23+
#endif // __SIMDVEC_ATTRIBUTE_H__

src/benchmark-attribute.cpp

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
#include <iostream>
2+
#include <random>
3+
#include <vector>
4+
5+
#include <benchmark/benchmark.h>
6+
7+
#include "simdvec-attribute.h"
8+
9+
static void BM_dot_product(benchmark::State &state) {
10+
const int16_t dim = state.range(0);
11+
std::mt19937 gen(42);
12+
std::normal_distribution<float> dist(0.0f, 1.0f);
13+
std::vector<float> x(dim), y(dim);
14+
std::generate(x.begin(), x.end(), [&]() { return dist(gen); });
15+
std::generate(y.begin(), y.end(), [&]() { return dist(gen); });
16+
17+
for (auto _ : state) {
18+
benchmark::DoNotOptimize(inner_product(dim, x.data(), y.data()));
19+
benchmark::ClobberMemory();
20+
}
21+
}
22+
23+
BENCHMARK(BM_dot_product)
24+
->ArgsProduct({
25+
benchmark::CreateRange(1 << 10, 16 * (1 << 10), 2),
26+
});
27+
28+
static void BM_cosine_distance(benchmark::State &state) {
29+
const int16_t dim = state.range(0);
30+
std::mt19937 gen(42);
31+
std::normal_distribution<float> dist(0.0f, 1.0f);
32+
std::vector<float> x(dim), y(dim);
33+
std::generate(x.begin(), x.end(), [&]() { return dist(gen); });
34+
std::generate(y.begin(), y.end(), [&]() { return dist(gen); });
35+
36+
for (auto _ : state) {
37+
benchmark::DoNotOptimize(cosine_distance(dim, x.data(), y.data()));
38+
benchmark::ClobberMemory();
39+
}
40+
}
41+
42+
BENCHMARK(BM_cosine_distance)
43+
->ArgsProduct({
44+
benchmark::CreateRange(1 << 10, 16 * (1 << 10), 2),
45+
});
46+
47+
static void BM_l1_distance(benchmark::State &state) {
48+
const int16_t dim = state.range(0);
49+
std::mt19937 gen(42);
50+
std::normal_distribution<float> dist(0.0f, 1.0f);
51+
std::vector<float> x(dim), y(dim);
52+
std::generate(x.begin(), x.end(), [&]() { return dist(gen); });
53+
std::generate(y.begin(), y.end(), [&]() { return dist(gen); });
54+
55+
for (auto _ : state) {
56+
benchmark::DoNotOptimize(l1_distance(dim, x.data(), y.data()));
57+
benchmark::ClobberMemory();
58+
}
59+
}
60+
61+
BENCHMARK(BM_l1_distance)
62+
->ArgsProduct({
63+
benchmark::CreateRange(1 << 10, 16 * (1 << 10), 2),
64+
});
65+
66+
static void BM_l2_distance(benchmark::State &state) {
67+
const int16_t dim = state.range(0);
68+
std::mt19937 gen(42);
69+
std::normal_distribution<float> dist(0.0f, 1.0f);
70+
std::vector<float> x(dim), y(dim);
71+
std::generate(x.begin(), x.end(), [&]() { return dist(gen); });
72+
std::generate(y.begin(), y.end(), [&]() { return dist(gen); });
73+
74+
for (auto _ : state) {
75+
benchmark::DoNotOptimize(l2_distance(dim, x.data(), y.data()));
76+
benchmark::ClobberMemory();
77+
}
78+
}
79+
80+
BENCHMARK(BM_l2_distance)
81+
->ArgsProduct({
82+
benchmark::CreateRange(1 << 10, 16 * (1 << 10), 2),
83+
});
84+
85+
static void BM_l2_norm(benchmark::State &state) {
86+
const int16_t dim = state.range(0);
87+
std::mt19937 gen(42);
88+
std::normal_distribution<float> dist(0.0f, 1.0f);
89+
std::vector<float> x(dim);
90+
std::generate(x.begin(), x.end(), [&]() { return dist(gen); });
91+
92+
for (auto _ : state) {
93+
benchmark::DoNotOptimize(l2_norm(dim, x.data()));
94+
benchmark::ClobberMemory();
95+
}
96+
}
97+
98+
BENCHMARK(BM_l2_norm)
99+
->ArgsProduct({
100+
benchmark::CreateRange(1 << 10, 16 * (1 << 10), 2),
101+
});
102+
103+
BENCHMARK_MAIN();

src/simdvec-attribute.c

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
#include <math.h>
2+
#include <stdint.h>
3+
4+
#include "simdvec-attribute.h"
5+
6+
#ifdef _MSC_VER
7+
#define __attribute__(x)
8+
#elif defined(NO_ATTRIBUTE)
9+
#define __attribute__(x)
10+
#endif
11+
12+
__attribute__((target_clones("default", "avx", "fma", "avx512f"))) static float
13+
l2_distance_impl(const int16_t dim, const float *restrict x,
14+
const float *restrict y) {
15+
float distance = 0.0;
16+
for (int16_t i = 0; i < dim; i++) {
17+
const float diff = x[i] - y[i];
18+
distance += diff * diff;
19+
}
20+
return distance;
21+
}
22+
23+
__attribute__((target_clones("default", "avx", "fma", "avx512f"))) static float
24+
inner_product_impl(const int16_t dim, const float *restrict x,
25+
const float *restrict y) {
26+
float dot = 0.0;
27+
for (int16_t i = 0; i < dim; i++)
28+
dot += x[i] * y[i];
29+
return dot;
30+
}
31+
32+
__attribute__((target_clones("default", "avx", "fma", "avx512f"))) static float
33+
cosine_distance_impl(const int16_t dim, const float *restrict x,
34+
const float *restrict y) {
35+
float dot = 0.0;
36+
float normx = 0.0;
37+
float normy = 0.0;
38+
for (int16_t i = 0; i < dim; i++) {
39+
dot += x[i] * y[i];
40+
normx += x[i] * x[i];
41+
normy += y[i] * y[i];
42+
}
43+
return dot / sqrtf(normx * normy);
44+
}
45+
46+
__attribute__((target_clones("default", "avx", "fma", "avx512f"))) static float
47+
l1_distance_impl(const int16_t dim, const float *restrict x,
48+
const float *restrict y) {
49+
float distance = 0.0;
50+
for (int16_t i = 0; i < dim; i++)
51+
distance += fabsf(x[i] - y[i]);
52+
return distance;
53+
}
54+
55+
__attribute__((target_clones("default", "avx", "fma", "avx512f"))) static float
56+
l2_norm_impl(const int16_t dim, const float *x) {
57+
float norm = 0.0;
58+
for (int16_t i = 0; i < dim; i++)
59+
norm += x[i] * x[i];
60+
return norm;
61+
}
62+
63+
float l2_distance(const int16_t dim, const float *restrict x,
64+
const float *restrict y) {
65+
return l2_distance_impl(dim, x, y);
66+
}
67+
68+
float inner_product(const int16_t dim, const float *restrict x,
69+
const float *restrict y) {
70+
return inner_product_impl(dim, x, y);
71+
}
72+
73+
float cosine_distance(const int16_t dim, const float *restrict x,
74+
const float *restrict y) {
75+
return cosine_distance_impl(dim, x, y);
76+
}
77+
78+
float l1_distance(const int16_t dim, const float *restrict x,
79+
const float *restrict y) {
80+
return l1_distance_impl(dim, x, y);
81+
}
82+
83+
float l2_norm(const int16_t dim, const float *x) {
84+
return l2_norm_impl(dim, x);
85+
}

0 commit comments

Comments
 (0)