Skip to content
This repository has been archived by the owner on Jun 9, 2019. It is now read-only.

Commit

Permalink
ARM7_FUNC_NAMESPACE namespace
Browse files Browse the repository at this point in the history
  • Loading branch information
jcayzac committed Dec 26, 2010
1 parent 1e03bb6 commit df8a5c4
Show file tree
Hide file tree
Showing 9 changed files with 48 additions and 12 deletions.
6 changes: 6 additions & 0 deletions common.h
Expand Up @@ -20,4 +20,10 @@
#define ARMV7_FUNC_PI 0x1.921FB54442D18p1f
#define ARMV7_FUNC_2PI 0x1.921FB54442D18p2f

// Standard types
namespace ARM7_FUNC_NAMESPACE {
typedef float32x4_t vector3_t;
typedef float32x4_t vector4_t;
typedef float32x4x4_t matrix44_t;
}

6 changes: 5 additions & 1 deletion config-defaults.h
@@ -1,6 +1,10 @@
#pragma once

#ifndef ARM7_FUNC_NORMALIZATION_RECIPROCAL_STEPS
#define ARM7_FUNC_NORMALIZATION_RECIPROCAL_STEPS 2
#define ARM7_FUNC_NORMALIZATION_RECIPROCAL_STEPS 2
#endif

#ifndef ARM7_FUNC_NAMESPACE
#define ARM7_FUNC_NAMESPACE armv7func
#endif

3 changes: 3 additions & 0 deletions config.h
Expand Up @@ -5,4 +5,7 @@
// Default to 2
//#define ARM7_FUNC_NORMALIZATION_RECIPROCAL_STEPS 2

// Name of our namespace
// Default is "armv7func"
//#define ARM7_FUNC_NAMESPACE armv7_is_cool

6 changes: 5 additions & 1 deletion vectormath/mat44_multiply.h
@@ -1,7 +1,9 @@
#pragma once
#include <armv7-functions/common.h>

ARMV7_FUNC_API void mat44_multiply(float32x4x4_t& result, const float32x4x4_t& a, const float32x4x4_t& b) {
namespace ARM7_FUNC_NAMESPACE {

ARMV7_FUNC_API void mat44_multiply(matrix44_t& result, const matrix44_t& a, const matrix44_t& b) {
// result = first column of B x first row of A
result.val[0] = vmulq_lane_f32(b.val[0], vget_low_f32(a.val[0]), 0);
result.val[1] = vmulq_lane_f32(b.val[0], vget_low_f32(a.val[1]), 0);
Expand Down Expand Up @@ -64,3 +66,5 @@ ARMV7_FUNC_API void mat44_multiply(float32x4x4_t& result, const float32x4x4_t& a
#endif
}

} // ARM7_FUNC_NAMESPACE

8 changes: 6 additions & 2 deletions vectormath/vec3_dot.h
@@ -1,8 +1,10 @@
#pragma once
#include <armv7-functions/common.h>

ARMV7_FUNC_API void vec3_dot(float& result, const float32x4_t& a, const float32x4_t& b) {
register float32x4_t tmp;
namespace ARM7_FUNC_NAMESPACE {

ARMV7_FUNC_API void vec3_dot(float& result, const vector3_t& a, const vector3_t& b) {
register vector3_t tmp;
register const int zero(0);
asm volatile (
"# %q[tmp].x = dot(%q[a].xyz, %q[b].xyz);\n\t"
Expand All @@ -14,3 +16,5 @@ ARMV7_FUNC_API void vec3_dot(float& result, const float32x4_t& a, const float32x
result = vgetq_lane_f32(tmp, 0);
}

} // ARM7_FUNC_NAMESPACE

8 changes: 6 additions & 2 deletions vectormath/vec4_dot.h
@@ -1,8 +1,10 @@
#pragma once
#include <armv7-functions/common.h>

ARMV7_FUNC_API void vec4_dot(float& result, const float32x4_t& a, const float32x4_t& b) {
register float32x4_t tmp;
namespace ARM7_FUNC_NAMESPACE {

ARMV7_FUNC_API void vec4_dot(float& result, const vector4_t& a, const vector4_t& b) {
register vector4_t tmp;
asm volatile (
"# %q[tmp].x = dot(%q[a].xyzw, %q[b].xyzw);\n\t"
"vmul.f32 %q[tmp], %q[a], %q[b]\n\t"
Expand All @@ -12,3 +14,5 @@ ARMV7_FUNC_API void vec4_dot(float& result, const float32x4_t& a, const float32x
result = vgetq_lane_f32(tmp, 0);
}

} // ARM7_FUNC_NAMESPACE

9 changes: 6 additions & 3 deletions vectormath/vec4_homogenize.h
Expand Up @@ -2,10 +2,11 @@
#include <armv7-functions/common.h>
#include <armv7-functions/vectormath/vec4_reciprocal.h>

ARMV7_FUNC_API void vec4_homogenize(float32x4_t& vec) {
namespace ARM7_FUNC_NAMESPACE {

ARMV7_FUNC_API void vec4_homogenize(vector4_t& vec) {
asm volatile("#begin vec4_homogenize" :::);
register float32x4_t wwww;
register float32x4_t wwww_recp;
register vector4_t wwww, wwww_recp;

// This use a neon>arm transfer! why??
// tmp1 = vdupq_n_f32(vgetq_lane_f32(vec, 3));
Expand All @@ -16,3 +17,5 @@ ARMV7_FUNC_API void vec4_homogenize(float32x4_t& vec) {
asm volatile("#end vec4_homogenize" :::);
}

} // ARM7_FUNC_NAMESPACE

6 changes: 5 additions & 1 deletion vectormath/vec4_reciprocal.h
@@ -1,6 +1,8 @@
#pragma once
#include <armv7-functions/common.h>

namespace ARM7_FUNC_NAMESPACE {

namespace details {

template<int steps>
Expand All @@ -17,10 +19,12 @@ namespace details {
};
}

ARMV7_FUNC_API void vec4_reciprocal(float32x4_t& result, const float32x4_t& vec) {
ARMV7_FUNC_API void vec4_reciprocal(vector4_t& result, const vector4_t& vec) {
asm volatile("#begin vec4_reciprocal" :::);
result = vrecpeq_f32(vec);
details::NewtonRaphsonStepper<ARM7_FUNC_NORMALIZATION_RECIPROCAL_STEPS>::exec(result, vec);
asm volatile("#end vec4_reciprocal" :::);
}

} // ARM7_FUNC_NAMESPACE

8 changes: 6 additions & 2 deletions vectormath/vec4n_dot.h
@@ -1,9 +1,11 @@
#pragma once
#include <armv7-functions/common.h>

namespace ARM7_FUNC_NAMESPACE {

// Compute the dot product of two vectors of 4n floats
ARMV7_FUNC_API void vec4n_dot(float& result, const float32x4_t* a, const float32x4_t* b, unsigned int n) {
register float32x4_t tmp, va, vb;
ARMV7_FUNC_API void vec4n_dot(float& result, const vector4_t* a, const vector4_t* b, unsigned int n) {
register vector4_t tmp, va, vb;
asm volatile (
"# %q[tmp].x = dot(%q[va][i], %q[vb][i]) for (i=0; i<n; i++);\n\t"
"vmov.f32 %q[tmp], #0.0\n\t"
Expand All @@ -22,3 +24,5 @@ ARMV7_FUNC_API void vec4n_dot(float& result, const float32x4_t* a, const float32
result = vgetq_lane_f32(tmp, 0);
}

} // ARM7_FUNC_NAMESPACE

0 comments on commit df8a5c4

Please sign in to comment.