-
Notifications
You must be signed in to change notification settings - Fork 31
/
vfp.S
152 lines (139 loc) · 3.48 KB
/
vfp.S
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
/* https://github.com/cirosantilli/arm-assembly-cheat#vfp
* Adapted from: https://mindplusplus.wordpress.com/2013/06/27/arm-vfp-vector-programming-part-2-examples/ */
#include "common.h"
.data;
a1:
.float 0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5
a2:
.float 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0, 8.5
sum:
.skip 32
sum_expect:
.float 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0
ENTRY
/* Minimal single precision floating point example.
* TODO: floating point representation constraints due to 4-byte instruction?
*/
vmov s0, 1.5
vmov s1, 2.5
vadd.f32 s2, s0, s1
vmov s3, 4.0
/* Compare two floating point registers. Stores results in fpscr:
* (floating point status and control register).
*/
vcmp.f32 s2, s3
/* Move the nzcv bits from fpscr to apsr */
vmrs apsr_nzcv, fpscr
/* This branch uses the Z bit of apsr, which was set accordingly. */
ASSERT(beq)
/* Now the same from memory with vldr and vstr. */
.data
my_float_0:
.float 1.5
my_float_1:
.float 2.5
my_float_sum_expect:
.float 4.0
.bss
my_float_sum:
.skip 4
.text
ldr r0, =my_float_0
vldr s0, [r0]
ldr r0, =my_float_1
vldr s1, [r0]
vadd.f32 s2, s0, s1
ldr r0, =my_float_sum
vstr.f32 s2, [r0]
ASSERT_MEMCMP(my_float_sum, my_float_sum_expect, 4)
#if 0
/* We can't do pseudo vldr as for ldr, fails with:
* Error: cannot represent CP_OFF_IMM relocation in this object file format
* It works on ARMv8 however, so the relocation must have been added.
*/
vldr s0, my_float_0
#endif
/* Minimal double precision floating point example. */
vmov.f64 d0, 1.5
vmov.f64 d1, 2.5
vadd.f64 d2, d0, d1
vmov.f64 d3, 4.0
vcmp.f64 d2, d3
vmrs apsr_nzcv, fpscr
ASSERT(beq)
/* vmov can also move to general purpose registers.
*
* Just remember that we can't use float immediates with general purpose registers:
* https://stackoverflow.com/questions/6514537/how-do-i-specify-immediate-floating-point-numbers-with-inline-assembly/52906126#52906126
*/
mov r1, 2
mov r0, 1
vmov s0, r0
vmov s1, s0
vmov r1, s1
ASSERT_EQ_REG(r0, r1)
/* Now a more complex test function. */
ldr r0, =sum
ldr r1, =a1
ldr r2, =a2
mov r3, 8
bl vec_sum
/* The assert works easily because all floats used
* have exact base-2 representation.
*/
ASSERT_MEMCMP(sum, sum_expect, 0x20)
EXIT
/* void vec_sum(float *sum, float *a1, float *a2, int length) {
* int i;
* for (i=0; i < length; i++)
* *(sum+i) = *(a1+i) + *(a2+i);
* }
*/
vec_sum:
/* Setup */
push {r0, r1, r4, lr}
push {r0, r1}
mov r0, 1
mov r1, 8
bl reconfig
pop {r0, r1}
asr r3, 3
/* Do the sum. */
1:
fldmias r1!, {s8-s15}
fldmias r2!, {s16-s23}
vadd.f32 s24, s8, s16
fstmias r0!, {s24-s31}
subs r3, r3, 1
bne 1b
/* Teardown. */
bl deconfig
pop {r0, r1, r4, pc}
/* inputs:
* r0: desired vector stride (1 or 2)
* r1: desired vector length (min. 1, max. 8)
* outputs: (none)
* modified: r0, r1, FPSCR
* notes:
* r0 and r1 will be truncated before fitting into FPSCR
*/
reconfig:
push {r0-r2}
and r0, r0, 3
eor r0, r0, 1
sub r1, r1, 1
and r1, r1, 7
mov r0, r0, lsl 20
orr r0, r0, r1, lsl 16
vmrs r2, fpscr
bic r2, 55*65536
orr r2, r2, r0
vmsr fpscr, r0
pop {r0-r2}
bx lr
deconfig:
push {r0, r1, lr}
mov r0, 1
mov r1, 1
bl reconfig
pop {r0, r1, pc}