-
-
Notifications
You must be signed in to change notification settings - Fork 335
/
arch_string.S
168 lines (132 loc) · 3.7 KB
/
arch_string.S
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
/*
** Copyright 2001, Travis Geiselbrecht. All rights reserved.
** Distributed under the terms of the NewOS License.
*/
#include <asm_defs.h>
#if 1
/* that should be enough for now */
.align 4
FUNCTION(memcpy):
// check for zero length copy or the same pointer
cmp r2, #0
cmpne r1, r0
bxeq lr
// save a few registers for use and the return code (input dst)
stmfd sp!, {r0, r4, r5, lr}
// check for forwards overlap (src > dst, distance < len)
subs r3, r0, r1
cmpgt r2, r3
bgt .L_forwardoverlap
// check for a short copy len.
// 20 bytes is enough so that if a 16 byte alignment needs to happen there is at least a
// wordwise copy worth of work to be done.
cmp r2, #(16+4)
blt .L_bytewise
// see if they are similarly aligned on 4 byte boundaries
eor r3, r0, r1
tst r3, #3
bne .L_bytewise // dissimilarly aligned, nothing we can do (for now)
// check for 16 byte alignment on dst.
// this will also catch src being not 4 byte aligned, since it is similarly 4 byte
// aligned with dst at this point.
tst r0, #15
bne .L_not16bytealigned
// check to see if we have at least 32 bytes of data to copy.
// if not, just revert to wordwise copy
cmp r2, #32
blt .L_wordwise
.L_bigcopy:
// copy 32 bytes at a time. src & dst need to be at least 4 byte aligned,
// and we need at least 32 bytes remaining to copy
// save r6-r7 for use in the big copy
stmfd sp!, {r6-r7}
sub r2, r2, #32 // subtract an extra 32 to the len so we can avoid an extra compare
.L_bigcopy_loop:
ldmia r1!, {r4, r5, r6, r7}
stmia r0!, {r4, r5, r6, r7}
ldmia r1!, {r4, r5, r6, r7}
subs r2, r2, #32
stmia r0!, {r4, r5, r6, r7}
bge .L_bigcopy_loop
// restore r6-r7
ldmfd sp!, {r6-r7}
// see if we are done
adds r2, r2, #32
beq .L_done
// less then 4 bytes left?
cmp r2, #4
blt .L_bytewise
.L_wordwise:
// copy 4 bytes at a time.
// src & dst are guaranteed to be word aligned, and at least 4 bytes are left to copy.
subs r2, r2, #4
.L_wordwise_loop:
ldr r3, [r1], #4
subs r2, r2, #4
str r3, [r0], #4
bge .L_wordwise_loop
// correct the remaining len and test for completion
adds r2, r2, #4
beq .L_done
.L_bytewise:
// simple bytewise copy
ldrb r3, [r1], #1
subs r2, r2, #1
strb r3, [r0], #1
bgt .L_bytewise
.L_done:
// load dst for return and restore r4,r5
//#if ARM_ARCH_LEVEL >= 5
// ldmfd sp!, {r0, r4, r5, pc}
//#else
ldmfd sp!, {r0, r4, r5, lr}
bx lr
//#endif
.L_not16bytealigned:
// dst is not 16 byte aligned, so we will copy up to 15 bytes to get it aligned.
// src is guaranteed to be similarly word aligned with dst.
// set the condition flags based on the alignment.
lsl r12, r0, #28
rsb r12, r12, #0
msr CPSR_f, r12 // move into NZCV fields in CPSR
// move as many bytes as necessary to get the dst aligned
#ifdef __clang__
ldrbvs r3, [r1], #1 // V set
ldrhcs r4, [r1], #2 // C set
ldreq r5, [r1], #4 // Z set
strbvs r3, [r0], #1
strhcs r4, [r0], #2
streq r5, [r0], #4
ldmiami r1!, {r3-r4} // N set
stmiami r0!, {r3-r4}
#else
ldrvsb r3, [r1], #1 // V set
ldrcsh r4, [r1], #2 // C set
ldreq r5, [r1], #4 // Z set
strvsb r3, [r0], #1
strcsh r4, [r0], #2
streq r5, [r0], #4
ldmmiia r1!, {r3-r4} // N set
stmmiia r0!, {r3-r4}
#endif
// fix the remaining len
sub r2, r2, r12, lsr #28
// test to see what we should do now
cmp r2, #32
bge .L_bigcopy
b .L_wordwise
// src and dest overlap 'forwards' or dst > src
.L_forwardoverlap:
// do a bytewise reverse copy for now
add r1, r1, r2
add r0, r0, r2
.L_bytewisereverse:
// simple bytewise reverse copy
ldrb r3, [r1], #-1
subs r2, r2, #1
strb r3, [r0], #-1
bgt .L_bytewisereverse
b .L_done
// check for zero length copy or the same pointer
FUNCTION_END(memcpy)
#endif