Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 4 additions & 8 deletions erasure_code/aarch64/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,9 @@ lsrc_aarch64 += \
erasure_code/aarch64/gf_4vect_mad_sve.S \
erasure_code/aarch64/gf_5vect_mad_sve.S \
erasure_code/aarch64/gf_6vect_mad_sve.S \
erasure_code/aarch64/gf_vect_dot_prod_sve.S \
erasure_code/aarch64/gf_2vect_dot_prod_sve.S \
erasure_code/aarch64/gf_3vect_dot_prod_sve.S \
erasure_code/aarch64/gf_4vect_dot_prod_sve.S \
erasure_code/aarch64/gf_5vect_dot_prod_sve.S \
erasure_code/aarch64/gf_6vect_dot_prod_sve.S \
erasure_code/aarch64/gf_7vect_dot_prod_sve.S \
erasure_code/aarch64/gf_8vect_dot_prod_sve.S \
erasure_code/aarch64/gf_vect_mul_sve.S \
erasure_code/aarch64/gf_nvect_dot_prod_sve.c \
erasure_code/aarch64/ec_multibinary_arm.S

# Ensure SVE intrinsics are compiled with maximum optimization
erasure_code/aarch64/gf_nvect_dot_prod_sve.lo: CFLAGS += -O3
52 changes: 14 additions & 38 deletions erasure_code/aarch64/ec_aarch64_dispatcher.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,43 +30,18 @@
#include "erasure_code.h"
#include "gf_vect_mul.h"

#ifdef __ARM_FEATURE_SVE
// If the compiler defines SVE intrinsics, include that header
#include <arm_sve.h>

#elif defined(__linux__)
// Otherwise include these headers and define these constants as a fallback for Linux only
#include <stddef.h>
#ifdef __linux__
#include <sys/auxv.h>
#include <sys/prctl.h>
#ifndef PR_SVE_GET_VL
#define PR_SVE_GET_VL 51
#endif
#ifndef PR_SVE_VL_LEN_MASK
#define PR_SVE_VL_LEN_MASK 0xffff
#endif

#ifndef HWCAP2_SVE2
#define HWCAP2_SVE2 (1 << 1)
#endif

static inline size_t
get_sve_vector_length_bytes(void)
{
#ifdef __ARM_FEATURE_SVE
// Use intrinsic if available at compile time
return svcntb();
#elif defined(__linux__)
// Fall back to prctl on Linux
long sve_vl = prctl(PR_SVE_GET_VL);
if (sve_vl != -1) {
return sve_vl & PR_SVE_VL_LEN_MASK;
}
#endif
return 0; // Unknown or unavailable
}

extern void
gf_vect_dot_prod_sve(int, int, unsigned char *, unsigned char **, unsigned char *);
extern void
gf_vect_dot_prod_sve2(int, int, unsigned char *, unsigned char **, unsigned char *);
extern void
gf_vect_dot_prod_neon(int, int, unsigned char *, unsigned char **, unsigned char *);

extern void
Expand All @@ -75,7 +50,9 @@ extern void
gf_vect_mad_neon(int, int, int, unsigned char *, unsigned char *, unsigned char *);

extern void
ec_encode_data_sve(int, int, int, unsigned char *, unsigned char **, unsigned char **coding);
ec_encode_data_sve(int, int, int, unsigned char *, unsigned char **, unsigned char **);
extern void
ec_encode_data_sve2(int, int, int, unsigned char *, unsigned char **, unsigned char **);
extern void
ec_encode_data_neon(int, int, int, unsigned char *, unsigned char **, unsigned char **);

Expand All @@ -93,7 +70,10 @@ DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
unsigned long auxval2 = getauxval(AT_HWCAP2);

if ((auxval & HWCAP_SVE) && (auxval2 & HWCAP2_SVE2))
return gf_vect_dot_prod_sve2;
if (auxval & HWCAP_SVE)
return gf_vect_dot_prod_sve;
if (auxval & HWCAP_ASIMD)
Expand Down Expand Up @@ -127,15 +107,11 @@ DEFINE_INTERFACE_DISPATCHER(ec_encode_data)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
unsigned long auxval2 = getauxval(AT_HWCAP2);

if ((auxval & HWCAP_SVE) && (auxval2 & HWCAP2_SVE2))
return ec_encode_data_sve2;
if (auxval & HWCAP_SVE) {
size_t vector_length = get_sve_vector_length_bytes();

// If 128-bit SVE (16 bytes), use NEON instead
if (vector_length == 16 && (auxval & HWCAP_ASIMD)) {
return ec_encode_data_neon;
}

return ec_encode_data_sve;
}
if (auxval & HWCAP_ASIMD)
Expand Down
119 changes: 86 additions & 33 deletions erasure_code/aarch64/ec_aarch64_highlevel_func.c
Original file line number Diff line number Diff line change
Expand Up @@ -161,12 +161,34 @@ gf_6vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **
extern void
gf_7vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);

/* SVE2 */
extern void
gf_8vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
gf_vect_dot_prod_sve2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char *dest);
extern void
gf_2vect_dot_prod_sve2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
extern void
gf_3vect_dot_prod_sve2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
extern void
gf_4vect_dot_prod_sve2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
extern void
gf_5vect_dot_prod_sve2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
extern void
gf_6vect_dot_prod_sve2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
extern void
gf_7vect_dot_prod_sve2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);

extern void
gf_vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char *dest);

extern void
gf_2vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
Expand All @@ -192,62 +214,93 @@ ec_encode_data_sve(int len, int k, int rows, unsigned char *g_tbls, unsigned cha
return;
}

while (rows > 11) {
gf_6vect_dot_prod_sve(len, k, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
while (rows > 7) {
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
g_tbls += 4 * k * 32;
coding += 4;
rows -= 4;
}

switch (rows) {
case 11:
/* 7 + 4 */
gf_7vect_dot_prod_sve(len, k, g_tbls, data, coding);
g_tbls += 7 * k * 32;
coding += 7;
case 7:
/* 4 + 3 */
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
g_tbls += 4 * k * 32;
coding += 4;
gf_3vect_dot_prod_sve(len, k, g_tbls, data, coding);
break;
case 10:
/* 6 + 4 */
gf_6vect_dot_prod_sve(len, k, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
case 6:
/* 4 + 2 */
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
g_tbls += 4 * k * 32;
coding += 4;
gf_2vect_dot_prod_sve(len, k, g_tbls, data, coding);
break;
case 9:
/* 5 + 4 */
case 5:
gf_5vect_dot_prod_sve(len, k, g_tbls, data, coding);
g_tbls += 5 * k * 32;
coding += 5;
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
break;
case 8:
/* 4 + 4 */
case 4:
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
break;
case 3:
gf_3vect_dot_prod_sve(len, k, g_tbls, data, coding);
break;
case 2:
gf_2vect_dot_prod_sve(len, k, g_tbls, data, coding);
break;
case 1:
gf_vect_dot_prod_sve(len, k, g_tbls, data, *coding);
break;
default:
break;
}
}

void
ec_encode_data_sve2(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding)
{
if (len < 16) {
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
return;
}

while (rows > 7) {
gf_4vect_dot_prod_sve2(len, k, g_tbls, data, coding);
g_tbls += 4 * k * 32;
coding += 4;
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
break;
rows -= 4;
}

switch (rows) {
case 7:
gf_7vect_dot_prod_sve(len, k, g_tbls, data, coding);
/* 4 + 3 */
gf_4vect_dot_prod_sve2(len, k, g_tbls, data, coding);
g_tbls += 4 * k * 32;
coding += 4;
gf_3vect_dot_prod_sve2(len, k, g_tbls, data, coding);
break;
case 6:
gf_6vect_dot_prod_sve(len, k, g_tbls, data, coding);
/* 4 + 2 */
gf_4vect_dot_prod_sve2(len, k, g_tbls, data, coding);
g_tbls += 4 * k * 32;
coding += 4;
gf_2vect_dot_prod_sve2(len, k, g_tbls, data, coding);
break;
case 5:
gf_5vect_dot_prod_sve(len, k, g_tbls, data, coding);
gf_5vect_dot_prod_sve2(len, k, g_tbls, data, coding);
break;
case 4:
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
gf_4vect_dot_prod_sve2(len, k, g_tbls, data, coding);
break;
case 3:
gf_3vect_dot_prod_sve(len, k, g_tbls, data, coding);
gf_3vect_dot_prod_sve2(len, k, g_tbls, data, coding);
break;
case 2:
gf_2vect_dot_prod_sve(len, k, g_tbls, data, coding);
gf_2vect_dot_prod_sve2(len, k, g_tbls, data, coding);
break;
case 1:
gf_vect_dot_prod_sve(len, k, g_tbls, data, *coding);
gf_vect_dot_prod_sve2(len, k, g_tbls, data, *coding);
break;
default:
break;
Expand Down
Loading
Loading