diff --git a/common.h b/common.h index cca57dd..c2ea9bc 100644 --- a/common.h +++ b/common.h @@ -20,4 +20,10 @@ #define ARMV7_FUNC_PI 0x1.921FB54442D18p1f #define ARMV7_FUNC_2PI 0x1.921FB54442D18p2f +// Standard types +namespace ARM7_FUNC_NAMESPACE { + typedef float32x4_t vector3_t; + typedef float32x4_t vector4_t; + typedef float32x4x4_t matrix44_t; +} diff --git a/config-defaults.h b/config-defaults.h index db27e3c..20bda61 100644 --- a/config-defaults.h +++ b/config-defaults.h @@ -1,6 +1,10 @@ #pragma once #ifndef ARM7_FUNC_NORMALIZATION_RECIPROCAL_STEPS -#define ARM7_FUNC_NORMALIZATION_RECIPROCAL_STEPS 2 + #define ARM7_FUNC_NORMALIZATION_RECIPROCAL_STEPS 2 +#endif + +#ifndef ARM7_FUNC_NAMESPACE + #define ARM7_FUNC_NAMESPACE armv7func #endif diff --git a/config.h b/config.h index 3b9dcff..5c54667 100644 --- a/config.h +++ b/config.h @@ -5,4 +5,7 @@ // Default to 2 //#define ARM7_FUNC_NORMALIZATION_RECIPROCAL_STEPS 2 +// Name of our namespace +// Default is "armv7func" +//#define ARM7_FUNC_NAMESPACE armv7_is_cool diff --git a/vectormath/mat44_multiply.h b/vectormath/mat44_multiply.h index 7aef4c5..5e8ddae 100644 --- a/vectormath/mat44_multiply.h +++ b/vectormath/mat44_multiply.h @@ -1,7 +1,9 @@ #pragma once #include -ARMV7_FUNC_API void mat44_multiply(float32x4x4_t& result, const float32x4x4_t& a, const float32x4x4_t& b) { +namespace ARM7_FUNC_NAMESPACE { + +ARMV7_FUNC_API void mat44_multiply(matrix44_t& result, const matrix44_t& a, const matrix44_t& b) { // result = first column of B x first row of A result.val[0] = vmulq_lane_f32(b.val[0], vget_low_f32(a.val[0]), 0); result.val[1] = vmulq_lane_f32(b.val[0], vget_low_f32(a.val[1]), 0); @@ -64,3 +66,5 @@ ARMV7_FUNC_API void mat44_multiply(float32x4x4_t& result, const float32x4x4_t& a #endif } +} // ARM7_FUNC_NAMESPACE + diff --git a/vectormath/vec3_dot.h b/vectormath/vec3_dot.h index d433ba9..268a956 100644 --- a/vectormath/vec3_dot.h +++ b/vectormath/vec3_dot.h @@ -1,8 +1,10 @@ #pragma once #include -ARMV7_FUNC_API void vec3_dot(float& result, const float32x4_t& a, const float32x4_t& b) { - register float32x4_t tmp; +namespace ARM7_FUNC_NAMESPACE { + +ARMV7_FUNC_API void vec3_dot(float& result, const vector3_t& a, const vector3_t& b) { + register vector3_t tmp; register const int zero(0); asm volatile ( "# %q[tmp].x = dot(%q[a].xyz, %q[b].xyz);\n\t" @@ -14,3 +16,5 @@ ARMV7_FUNC_API void vec3_dot(float& result, const float32x4_t& a, const float32x result = vgetq_lane_f32(tmp, 0); } +} // ARM7_FUNC_NAMESPACE + diff --git a/vectormath/vec4_dot.h b/vectormath/vec4_dot.h index 7051cc0..47717fe 100644 --- a/vectormath/vec4_dot.h +++ b/vectormath/vec4_dot.h @@ -1,8 +1,10 @@ #pragma once #include -ARMV7_FUNC_API void vec4_dot(float& result, const float32x4_t& a, const float32x4_t& b) { - register float32x4_t tmp; +namespace ARM7_FUNC_NAMESPACE { + +ARMV7_FUNC_API void vec4_dot(float& result, const vector4_t& a, const vector4_t& b) { + register vector4_t tmp; asm volatile ( "# %q[tmp].x = dot(%q[a].xyzw, %q[b].xyzw);\n\t" "vmul.f32 %q[tmp], %q[a], %q[b]\n\t" @@ -12,3 +14,5 @@ ARMV7_FUNC_API void vec4_dot(float& result, const float32x4_t& a, const float32x result = vgetq_lane_f32(tmp, 0); } +} // ARM7_FUNC_NAMESPACE + diff --git a/vectormath/vec4_homogenize.h b/vectormath/vec4_homogenize.h index c11325c..600b607 100644 --- a/vectormath/vec4_homogenize.h +++ b/vectormath/vec4_homogenize.h @@ -2,10 +2,11 @@ #include #include -ARMV7_FUNC_API void vec4_homogenize(float32x4_t& vec) { +namespace ARM7_FUNC_NAMESPACE { + +ARMV7_FUNC_API void vec4_homogenize(vector4_t& vec) { asm volatile("#begin vec4_homogenize" :::); - register float32x4_t wwww; - register float32x4_t wwww_recp; + register vector4_t wwww, wwww_recp; // This use a neon>arm transfer! why?? // tmp1 = vdupq_n_f32(vgetq_lane_f32(vec, 3)); @@ -16,3 +17,5 @@ ARMV7_FUNC_API void vec4_homogenize(float32x4_t& vec) { asm volatile("#end vec4_homogenize" :::); } +} // ARM7_FUNC_NAMESPACE + diff --git a/vectormath/vec4_reciprocal.h b/vectormath/vec4_reciprocal.h index d3a69ff..f781e56 100644 --- a/vectormath/vec4_reciprocal.h +++ b/vectormath/vec4_reciprocal.h @@ -1,6 +1,8 @@ #pragma once #include +namespace ARM7_FUNC_NAMESPACE { + namespace details { template @@ -17,10 +19,12 @@ namespace details { }; } -ARMV7_FUNC_API void vec4_reciprocal(float32x4_t& result, const float32x4_t& vec) { +ARMV7_FUNC_API void vec4_reciprocal(vector4_t& result, const vector4_t& vec) { asm volatile("#begin vec4_reciprocal" :::); result = vrecpeq_f32(vec); details::NewtonRaphsonStepper::exec(result, vec); asm volatile("#end vec4_reciprocal" :::); } +} // ARM7_FUNC_NAMESPACE + diff --git a/vectormath/vec4n_dot.h b/vectormath/vec4n_dot.h index 294382e..2df15b8 100644 --- a/vectormath/vec4n_dot.h +++ b/vectormath/vec4n_dot.h @@ -1,9 +1,11 @@ #pragma once #include +namespace ARM7_FUNC_NAMESPACE { + // Compute the dot product of two vectors of 4n floats -ARMV7_FUNC_API void vec4n_dot(float& result, const float32x4_t* a, const float32x4_t* b, unsigned int n) { - register float32x4_t tmp, va, vb; +ARMV7_FUNC_API void vec4n_dot(float& result, const vector4_t* a, const vector4_t* b, unsigned int n) { + register vector4_t tmp, va, vb; asm volatile ( "# %q[tmp].x = dot(%q[va][i], %q[vb][i]) for (i=0; i