/
volk_cpu.tmpl.c
246 lines (217 loc) · 6.58 KB
/
volk_cpu.tmpl.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
/*
* Copyright 2011-2012 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
* GNU Radio is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3, or (at your option)
* any later version.
*
* GNU Radio is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Radio; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street,
* Boston, MA 02110-1301, USA.
*/
#include <volk/volk_cpu.h>
#include <volk/volk_config_fixed.h>
#include <stdlib.h>
#include <string.h>
struct VOLK_CPU volk_cpu;
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
#define VOLK_CPU_x86
#endif
#if defined(VOLK_CPU_x86)
//implement get cpuid for gcc compilers using a system or local copy of cpuid.h
#if defined(__GNUC__)
#include <cpuid.h>
#define cpuid_x86(op, r) __get_cpuid(op, (unsigned int *)r+0, (unsigned int *)r+1, (unsigned int *)r+2, (unsigned int *)r+3)
#define cpuid_x86_count(op, count, regs) __cpuid_count(op, count, *((unsigned int*)regs), *((unsigned int*)regs+1), *((unsigned int*)regs+2), *((unsigned int*)regs+3))
/* Return Intel AVX extended CPU capabilities register.
* This function will bomb on non-AVX-capable machines, so
* check for AVX capability before executing.
*/
#if ((__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 2) || (__clang_major__ >= 3)) && defined(HAVE_XGETBV)
static inline unsigned long long _xgetbv(unsigned int index){
unsigned int eax, edx;
__VOLK_ASM __VOLK_VOLATILE ("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
return ((unsigned long long)edx << 32) | eax;
}
#define __xgetbv() _xgetbv(0)
#else
#define __xgetbv() 0
#endif
//implement get cpuid for MSVC compilers using __cpuid intrinsic
#elif defined(_MSC_VER) && defined(HAVE_INTRIN_H)
#include <intrin.h>
#define cpuid_x86(op, r) __cpuid(((int*)r), op)
#define cpuid_x86_count(op, count, regs) __cpuidex((int*)regs, op, count)
#if defined(_XCR_XFEATURE_ENABLED_MASK)
#define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK)
#else
#define __xgetbv() 0
#endif
#else
#error "A get cpuid for volk is not available on this compiler..."
#endif //defined(__GNUC__)
#endif //defined(VOLK_CPU_x86)
static inline unsigned int cpuid_count_x86_bit(unsigned int level, unsigned int count, unsigned int reg, unsigned int bit) {
#if defined(VOLK_CPU_x86)
unsigned int regs[4] = {0};
cpuid_x86_count(level, count, regs);
return regs[reg] >> bit & 0x01;
#else
return 0;
#endif
}
static inline unsigned int cpuid_x86_bit(unsigned int reg, unsigned int op, unsigned int bit) {
#if defined(VOLK_CPU_x86)
unsigned int regs[4];
memset(regs, 0, sizeof(unsigned int)*4);
cpuid_x86(op, regs);
return regs[reg] >> bit & 0x01;
#else
return 0;
#endif
}
static inline unsigned int check_extended_cpuid(unsigned int val) {
#if defined(VOLK_CPU_x86)
unsigned int regs[4];
memset(regs, 0, sizeof(unsigned int)*4);
cpuid_x86(0x80000000, regs);
return regs[0] >= val;
#else
return 0;
#endif
}
static inline unsigned int get_avx_enabled(void) {
#if defined(VOLK_CPU_x86)
return __xgetbv() & 0x6;
#else
return 0;
#endif
}
static inline unsigned int get_avx2_enabled(void) {
#if defined(VOLK_CPU_x86)
return __xgetbv() & 0x6;
#else
return 0;
#endif
}
//neon detection is linux specific
#if defined(__arm__) && defined(__linux__)
#include <asm/hwcap.h>
#include <linux/auxvec.h>
#include <stdio.h>
#define VOLK_CPU_ARMV7
#endif
static int has_neonv7(void){
#if defined(VOLK_CPU_ARMV7)
FILE *auxvec_f;
unsigned long auxvec[2];
unsigned int found_neon = 0;
auxvec_f = fopen("/proc/self/auxv", "rb");
if(!auxvec_f) return 0;
size_t r = 1;
//so auxv is basically 32b of ID and 32b of value
//so it goes like this
while(!found_neon && r) {
r = fread(auxvec, sizeof(unsigned long), 2, auxvec_f);
if((auxvec[0] == AT_HWCAP) && (auxvec[1] & HWCAP_NEON))
found_neon = 1;
}
fclose(auxvec_f);
return found_neon;
#else
return 0;
#endif
}
//\todo: Fix this to really check for neon on aarch64
//neon detection is linux specific
#if defined(__aarch64__) && defined(__linux__)
#include <asm/hwcap.h>
#include <linux/auxvec.h>
#include <stdio.h>
#define VOLK_CPU_ARMV8
#endif
static int has_neonv8(void){
#if defined(VOLK_CPU_ARMV8)
FILE *auxvec_f;
unsigned long auxvec[2];
unsigned int found_neon = 0;
auxvec_f = fopen("/proc/self/auxv", "rb");
if(!auxvec_f) return 0;
size_t r = 1;
//so auxv is basically 32b of ID and 32b of value
//so it goes like this
while(!found_neon && r) {
r = fread(auxvec, sizeof(unsigned long), 2, auxvec_f);
if((auxvec[0] == AT_HWCAP) && (auxvec[1] & HWCAP_ASIMD))
found_neon = 1;
}
fclose(auxvec_f);
return found_neon;
#else
return 0;
#endif
}
static int has_neon(void){
#if defined(VOLK_CPU_ARMV8) || defined(VOLK_CPU_ARMV7)
if (has_neonv7() || has_neonv8())
return 1;
else
return 0;
#else
return 0;
#endif
}
%for arch in archs:
static int i_can_has_${arch.name} (void) {
%for check, params in arch.checks:
if (${check}(<% joined_params = ', '.join(params)%>${joined_params}) == 0) return 0;
%endfor
return 1;
}
%endfor
#if defined(HAVE_FENV_H)
#if defined(FE_TONEAREST)
#include <fenv.h>
static inline void set_float_rounding(void){
fesetround(FE_TONEAREST);
}
#else
static inline void set_float_rounding(void){
//do nothing
}
#endif
#elif defined(_MSC_VER)
#include <float.h>
static inline void set_float_rounding(void){
unsigned int cwrd;
_controlfp_s(&cwrd, 0, 0);
_controlfp_s(&cwrd, _RC_NEAR, _MCW_RC);
}
#else
static inline void set_float_rounding(void){
//do nothing
}
#endif
void volk_cpu_init() {
%for arch in archs:
volk_cpu.has_${arch.name} = &i_can_has_${arch.name};
%endfor
set_float_rounding();
}
unsigned int volk_get_lvarch() {
unsigned int retval = 0;
volk_cpu_init();
%for arch in archs:
retval += volk_cpu.has_${arch.name}() << LV_${arch.name.upper()};
%endfor
return retval;
}