Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion libs/simdvec/native/publish_vec_binaries.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ if [ -z "$ARTIFACTORY_API_KEY" ]; then
exit 1;
fi

VERSION="1.0.13"
VERSION="1.0.14"
ARTIFACTORY_REPOSITORY="${ARTIFACTORY_REPOSITORY:-https://artifactory.elastic.dev/artifactory/elasticsearch-native/}"
TEMP=$(mktemp -d)

Expand Down
25 changes: 25 additions & 0 deletions libs/simdvec/native/src/vec/c/aarch64/vec.c
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,31 @@ EXPORT int32_t dot7u(int8_t* a, int8_t* b, size_t dims) {
return res;
}

EXPORT void dot7u_bulk(int8_t* a, int8_t* b, size_t dims, size_t count, float_t* results) {
int32_t res = 0;
if (dims > DOT7U_STRIDE_BYTES_LEN) {
int limit = dims & ~(DOT7U_STRIDE_BYTES_LEN - 1);
for (size_t c = 0; c < count; c++) {
int i = limit;
res = dot7u_inner(a, b, i);
for (; i < dims; i++) {
res += a[i] * b[i];
}
results[c] = (float_t)res;
a += dims;
}
} else {
for (size_t c = 0; c < count; c++) {
res = 0;
for (size_t i = 0; i < dims; i++) {
res += a[i] * b[i];
}
results[c] = (float_t)res;
a += dims;
}
}
}

static inline int32_t sqr7u_inner(int8_t *a, int8_t *b, size_t dims) {
int32x4_t acc1 = vdupq_n_s32(0);
int32x4_t acc2 = vdupq_n_s32(0);
Expand Down
25 changes: 25 additions & 0 deletions libs/simdvec/native/src/vec/c/amd64/vec.c
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,31 @@ EXPORT int32_t dot7u(int8_t* a, int8_t* b, size_t dims) {
return res;
}

EXPORT void dot7u_bulk(int8_t* a, int8_t* b, size_t dims, size_t count, float_t* results) {
int32_t res = 0;
if (dims > STRIDE_BYTES_LEN) {
int limit = dims & ~(STRIDE_BYTES_LEN - 1);
for (size_t c = 0; c < count; c++) {
int i = limit;
res = dot7u_inner(a, b, i);
for (; i < dims; i++) {
res += a[i] * b[i];
}
results[c] = (float_t)res;
a += dims;
}
} else {
for (size_t c = 0; c < count; c++) {
res = 0;
for (size_t i = 0; i < dims; i++) {
res += a[i] * b[i];
}
results[c] = (float_t)res;
a += dims;
}
}
}

static inline int32_t sqr7u_inner(int8_t *a, int8_t *b, size_t dims) {
// Init accumulator(s) with 0
__m256i acc1 = _mm256_setzero_si256();
Expand Down
26 changes: 26 additions & 0 deletions libs/simdvec/native/src/vec/c/amd64/vec_2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,32 @@ EXPORT int32_t dot7u_2(int8_t* a, int8_t* b, size_t dims) {
return res;
}

extern "C"
EXPORT void dot7u_bulk_2(int8_t* a, int8_t* b, size_t dims, size_t count, float_t* results) {
int32_t res = 0;
if (dims > STRIDE_BYTES_LEN) {
int limit = dims & ~(STRIDE_BYTES_LEN - 1);
for (size_t c = 0; c < count; c++) {
int i = limit;
res = dot7u_inner_avx512(a, b, i);
for (; i < dims; i++) {
res += a[i] * b[i];
}
results[c] = (float_t)res;
a += dims;
}
} else {
for (size_t c = 0; c < count; c++) {
res = 0;
for (size_t i = 0; i < dims; i++) {
res += a[i] * b[i];
}
results[c] = (float_t)res;
a += dims;
}
}
}

template<int offsetRegs>
inline __m512i sqr8(__m512i acc, const int8_t* p1, const int8_t* p2) {
constexpr int lanes = offsetRegs * STRIDE_BYTES_LEN;
Expand Down
2 changes: 2 additions & 0 deletions libs/simdvec/native/src/vec/headers/vec.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ EXPORT int vec_caps();

EXPORT int32_t dot7u(int8_t* a, int8_t* b, size_t dims);

EXPORT void dot7u_bulk(int8_t* a, int8_t* b, size_t dims, size_t count, float_t* results);

EXPORT int32_t sqr7u(int8_t *a, int8_t *b, size_t length);

EXPORT float cosf32(const float *a, const float *b, size_t elementCount);
Expand Down