Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions ggml/src/ggml-cpu/ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9427,12 +9427,22 @@ static void ggml_compute_forward_rwkv_wkv6_f32(
#define GGML_F32X_MUL GGML_F32x4_MUL
#define GGML_F32X_FMA GGML_F32x4_FMA
#define WKV_VECTOR_SIZE 4
#elif defined(__riscv_v_intrinsic)
#define GGML_F32X GGML_F32xt
#define GGML_F32X_SET1 GGML_F32xt_SET1
#define GGML_F32X_LOAD GGML_F32xt_LOAD
#define GGML_F32X_STORE GGML_F32xt_STORE
#define GGML_F32X_MUL GGML_F32xt_MUL
#define GGML_F32X_FMA GGML_F32xt_FMA
#define WKV_VECTOR_SIZE 4
#endif

#ifdef WKV_VECTOR_SIZE
int wkv_vector_size;
#if defined(__ARM_FEATURE_SVE)
wkv_vector_size = svcntw();
#elif defined(__riscv_v_intrinsic)
wkv_vector_size = __riscv_vlenb() / sizeof(float);
#else
wkv_vector_size = WKV_VECTOR_SIZE;
#endif
Expand Down
34 changes: 17 additions & 17 deletions ggml/src/ggml-cpu/simd-mappings.h
Original file line number Diff line number Diff line change
Expand Up @@ -1182,24 +1182,24 @@ static inline void __lzs_f16cx4_store(ggml_fp16_t * x, float32x4_t v_y) {
#define GGML_F32_STEP 16
#define GGML_F32_EPR 4

#define GGML_F32x4 vfloat32m1_t
#define GGML_F32x4_ZERO __riscv_vfmv_v_f_f32m1(0.0f, GGML_F32_EPR)
#define GGML_F32x4_SET1(x) __riscv_vfmv_v_f_f32m1(x, GGML_F32_EPR)
#define GGML_F32x4_LOAD(x) __riscv_vle32_v_f32m1(x, GGML_F32_EPR)
#define GGML_F32x4_STORE(b, v) __riscv_vse32_v_f32m1(b, v, GGML_F32_EPR)
#define GGML_F32x4_FMA(a, b, c) __riscv_vfmacc_vv_f32m1(a, b, c, GGML_F32_EPR)
#define GGML_F32x4_ADD(a, b) __riscv_vfadd_vv_f32m1(a, b, GGML_F32_EPR)
#define GGML_F32x4_MUL(a, b) __riscv_vfmul_vv_f32m1(a, b, GGML_F32_EPR)
#define GGML_F32xt vfloat32m1_t
#define GGML_F32xt_ZERO __riscv_vfmv_v_f_f32m1(0.0f, __riscv_vsetvlmax_e32m1())
#define GGML_F32xt_SET1(x) __riscv_vfmv_v_f_f32m1(x, __riscv_vsetvlmax_e32m1())
#define GGML_F32xt_LOAD(x) __riscv_vle32_v_f32m1(x, __riscv_vsetvlmax_e32m1())
#define GGML_F32xt_STORE(b, v) __riscv_vse32_v_f32m1(b, v, __riscv_vsetvlmax_e32m1())
#define GGML_F32xt_FMA(a, b, c) __riscv_vfmacc_vv_f32m1(a, b, c, __riscv_vsetvlmax_e32m1())
#define GGML_F32xt_ADD(a, b) __riscv_vfadd_vv_f32m1(a, b, __riscv_vsetvlmax_e32m1())
#define GGML_F32xt_MUL(a, b) __riscv_vfmul_vv_f32m1(a, b, __riscv_vsetvlmax_e32m1())

#define GGML_F32_VEC GGML_F32x4
#define GGML_F32_VEC_ZERO GGML_F32x4_ZERO
#define GGML_F32_VEC_SET1 GGML_F32x4_SET1
#define GGML_F32_VEC_LOAD GGML_F32x4_LOAD
#define GGML_F32_VEC_STORE GGML_F32x4_STORE
#define GGML_F32_VEC_FMA GGML_F32x4_FMA
#define GGML_F32_VEC_ADD GGML_F32x4_ADD
#define GGML_F32_VEC_MUL GGML_F32x4_MUL
#define GGML_F32_VEC_REDUCE GGML_F32x4_REDUCE
#define GGML_F32_VEC GGML_F32xt
#define GGML_F32_VEC_ZERO GGML_F32xt_ZERO
#define GGML_F32_VEC_SET1 GGML_F32xt_SET1
#define GGML_F32_VEC_LOAD GGML_F32xt_LOAD
#define GGML_F32_VEC_STORE GGML_F32xt_STORE
#define GGML_F32_VEC_FMA GGML_F32xt_FMA
#define GGML_F32_VEC_ADD GGML_F32xt_ADD
#define GGML_F32_VEC_MUL GGML_F32xt_MUL
#define GGML_F32_VEC_REDUCE GGML_F32xt_REDUCE

#endif

Expand Down
Loading