-
Notifications
You must be signed in to change notification settings - Fork 109
/
aarch64.h
227 lines (201 loc) · 7.62 KB
/
aarch64.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
/*
* Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved.
* Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved.
* Copyright (c) 1999-2003 by Hewlett-Packard Company. All rights reserved.
*
*
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
* OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
*
* Permission is hereby granted to use or copy this program
* for any purpose, provided the above notices are retained on all copies.
* Permission to modify the code and to distribute modified code is granted,
* provided the above notices are retained, and a notice that the code was
* modified is included with the above copyright notice.
*
*/
/* As of clang-5.0 (and gcc-5.4), __atomic_thread_fence is always */
/* translated to DMB (which is inefficient for AO_nop_write). */
/* TODO: Update it for newer Clang and GCC releases. */
#if !defined(AO_PREFER_BUILTIN_ATOMICS) && !defined(AO_THREAD_SANITIZER) \
&& !defined(AO_UNIPROCESSOR)
AO_INLINE void
AO_nop_write(void)
{
__asm__ __volatile__("dmb ishst" : : : "memory");
}
# define AO_HAVE_nop_write
#endif
/* There were some bugs in the older clang releases (related to */
/* optimization of functions dealing with __int128 values, supposedly), */
/* so even asm-based implementation did not work correctly. */
#if !defined(__clang__) || AO_CLANG_PREREQ(3, 9)
# include "../standard_ao_double_t.h"
/* As of gcc-5.4, all built-in load/store and CAS atomics for double */
/* word require -latomic, are not lock-free and cause test_stack */
/* failure, so the asm-based implementation is used for now. */
/* TODO: Update it for newer GCC releases. */
#if !defined(__clang__) || defined(AO_AARCH64_ASM_LOAD_STORE_CAS)
# ifndef AO_PREFER_GENERALIZED
AO_INLINE AO_double_t
AO_double_load(const volatile AO_double_t *addr)
{
AO_double_t result;
int status;
/* Note that STXP cannot be discarded because LD[A]XP is not */
/* single-copy atomic (unlike LDREXD for 32-bit ARM). */
do {
__asm__ __volatile__("//AO_double_load\n"
" ldxp %0, %1, %3\n"
" stxp %w2, %0, %1, %3"
: "=&r" (result.AO_val1), "=&r" (result.AO_val2), "=&r" (status)
: "Q" (*addr));
} while (AO_EXPECT_FALSE(status));
return result;
}
# define AO_HAVE_double_load
AO_INLINE AO_double_t
AO_double_load_acquire(const volatile AO_double_t *addr)
{
AO_double_t result;
int status;
do {
__asm__ __volatile__("//AO_double_load_acquire\n"
" ldaxp %0, %1, %3\n"
" stxp %w2, %0, %1, %3"
: "=&r" (result.AO_val1), "=&r" (result.AO_val2), "=&r" (status)
: "Q" (*addr));
} while (AO_EXPECT_FALSE(status));
return result;
}
# define AO_HAVE_double_load_acquire
AO_INLINE void
AO_double_store(volatile AO_double_t *addr, AO_double_t value)
{
AO_double_t old_val;
int status;
do {
__asm__ __volatile__("//AO_double_store\n"
" ldxp %0, %1, %3\n"
" stxp %w2, %4, %5, %3"
: "=&r" (old_val.AO_val1), "=&r" (old_val.AO_val2), "=&r" (status),
"=Q" (*addr)
: "r" (value.AO_val1), "r" (value.AO_val2));
/* Compared to the arm.h implementation, the 'cc' (flags) are */
/* not clobbered because A64 has no concept of conditional */
/* execution. */
} while (AO_EXPECT_FALSE(status));
}
# define AO_HAVE_double_store
AO_INLINE void
AO_double_store_release(volatile AO_double_t *addr, AO_double_t value)
{
AO_double_t old_val;
int status;
do {
__asm__ __volatile__("//AO_double_store_release\n"
" ldxp %0, %1, %3\n"
" stlxp %w2, %4, %5, %3"
: "=&r" (old_val.AO_val1), "=&r" (old_val.AO_val2), "=&r" (status),
"=Q" (*addr)
: "r" (value.AO_val1), "r" (value.AO_val2));
} while (AO_EXPECT_FALSE(status));
}
# define AO_HAVE_double_store_release
# endif /* !AO_PREFER_GENERALIZED */
AO_INLINE int
AO_double_compare_and_swap(volatile AO_double_t *addr,
AO_double_t old_val, AO_double_t new_val)
{
AO_double_t tmp;
int result = 1;
do {
__asm__ __volatile__("//AO_double_compare_and_swap\n"
" ldxp %0, %1, %2\n"
: "=&r" (tmp.AO_val1), "=&r" (tmp.AO_val2)
: "Q" (*addr));
if (tmp.AO_val1 != old_val.AO_val1 || tmp.AO_val2 != old_val.AO_val2)
break;
__asm__ __volatile__(
" stxp %w0, %2, %3, %1\n"
: "=&r" (result), "=Q" (*addr)
: "r" (new_val.AO_val1), "r" (new_val.AO_val2));
} while (AO_EXPECT_FALSE(result));
return !result;
}
# define AO_HAVE_double_compare_and_swap
AO_INLINE int
AO_double_compare_and_swap_acquire(volatile AO_double_t *addr,
AO_double_t old_val, AO_double_t new_val)
{
AO_double_t tmp;
int result = 1;
do {
__asm__ __volatile__("//AO_double_compare_and_swap_acquire\n"
" ldaxp %0, %1, %2\n"
: "=&r" (tmp.AO_val1), "=&r" (tmp.AO_val2)
: "Q" (*addr));
if (tmp.AO_val1 != old_val.AO_val1 || tmp.AO_val2 != old_val.AO_val2)
break;
__asm__ __volatile__(
" stxp %w0, %2, %3, %1\n"
: "=&r" (result), "=Q" (*addr)
: "r" (new_val.AO_val1), "r" (new_val.AO_val2));
} while (AO_EXPECT_FALSE(result));
return !result;
}
# define AO_HAVE_double_compare_and_swap_acquire
AO_INLINE int
AO_double_compare_and_swap_release(volatile AO_double_t *addr,
AO_double_t old_val, AO_double_t new_val)
{
AO_double_t tmp;
int result = 1;
do {
__asm__ __volatile__("//AO_double_compare_and_swap_release\n"
" ldxp %0, %1, %2\n"
: "=&r" (tmp.AO_val1), "=&r" (tmp.AO_val2)
: "Q" (*addr));
if (tmp.AO_val1 != old_val.AO_val1 || tmp.AO_val2 != old_val.AO_val2)
break;
__asm__ __volatile__(
" stlxp %w0, %2, %3, %1\n"
: "=&r" (result), "=Q" (*addr)
: "r" (new_val.AO_val1), "r" (new_val.AO_val2));
} while (AO_EXPECT_FALSE(result));
return !result;
}
# define AO_HAVE_double_compare_and_swap_release
AO_INLINE int
AO_double_compare_and_swap_full(volatile AO_double_t *addr,
AO_double_t old_val, AO_double_t new_val)
{
AO_double_t tmp;
int result = 1;
do {
__asm__ __volatile__("//AO_double_compare_and_swap_full\n"
" ldaxp %0, %1, %2\n"
: "=&r" (tmp.AO_val1), "=&r" (tmp.AO_val2)
: "Q" (*addr));
if (tmp.AO_val1 != old_val.AO_val1 || tmp.AO_val2 != old_val.AO_val2)
break;
__asm__ __volatile__(
" stlxp %w0, %2, %3, %1\n"
: "=&r" (result), "=Q" (*addr)
: "r" (new_val.AO_val1), "r" (new_val.AO_val2));
} while (AO_EXPECT_FALSE(result));
return !result;
}
# define AO_HAVE_double_compare_and_swap_full
#endif /* !__clang__ || AO_AARCH64_ASM_LOAD_STORE_CAS */
/* As of clang-5.0 and gcc-5.4, __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16 */
/* macro is still missing (while the double-word CAS is available). */
# define AO_GCC_HAVE_double_SYNC_CAS
#endif /* !__clang__ || AO_CLANG_PREREQ(3, 9) */
#if (defined(__clang__) && !AO_CLANG_PREREQ(3, 8)) || defined(__APPLE_CC__)
/* __GCC_HAVE_SYNC_COMPARE_AND_SWAP_n macros are missing. */
# define AO_GCC_FORCE_HAVE_CAS
#endif
#include "generic.h"
#undef AO_GCC_FORCE_HAVE_CAS
#undef AO_GCC_HAVE_double_SYNC_CAS