Permalink
Browse files

Detect presence of half precision conversion instructions (X86 only)

Several changes:
1) Add runtime/libpgmath/lib/x86_64/x86id.c and x86id.h
2) Add "X86IDFN(is_f16c())" and "X86IDFN(is_f16c_cached)" to cpuid/common/x86id.c
3) Add "is_f16c" to runtime/libpgmath/lib/x86_64/cpuid8664.h
4) runtime/libpgmath/lib/x86_64/x86id.h needs to export the correct function name
   and define global variable X86IDFN(is_f16c_cached)
5) rte/pgc/port/src/dispatch.c:
   Now that x86id.c has been moved in to libpgmath, change dispatch.c
   from using a subset of the CPUID "is_<FEATURE>" routines defined as
   local (static) in header file "cpuid8664.h" and use the functions
   defined in header file "x86id.h".
  • Loading branch information...
gklimowicz committed Dec 20, 2018
1 parent 87c7238 commit 3660d02c4290abdeac1dca8b2ede71554033482d
@@ -86,7 +86,7 @@
#include "mth_tbldefs.h"

#if defined(TARGET_LINUX_X8664) || defined(TARGET_OSX_X8664) || defined(TARGET_WIN_X8664)
#include "cpuid8664.h"
#include "x86id.h"
#endif

/*
@@ -1026,25 +1026,25 @@ __math_dispatch()

} else { /* Get processor architecture using CPUID information */
#if defined(TARGET_LINUX_X8664) || defined(TARGET_OSX_X8664) || defined(TARGET_WIN_X8664)
if (CPUIDX8664(is_avx512vl)() == 1) {
if (X86IDFN(is_avx512vl)() == 1) {
__math_target = arch_avx512;
} else if (CPUIDX8664(is_avx512f)() == 1) {
} else if (X86IDFN(is_avx512f)() == 1) {
__math_target = arch_avx512knl;
} else if (CPUIDX8664(is_avx2)() == 1) {
} else if (X86IDFN(is_avx2)() == 1) {
__math_target = arch_avx2;
} else if (CPUIDX8664(is_avx)() == 1) {
if (CPUIDX8664(is_intel)() == 1) {
} else if (X86IDFN(is_avx)() == 1) {
if (X86IDFN(is_intel)() == 1) {
__math_target = arch_avx;
}
if (CPUIDX8664(is_amd)() == 1) {
if (CPUIDX8664(is_fma4)() == 1) {
if (X86IDFN(is_amd)() == 1) {
if (X86IDFN(is_fma4)() == 1) {
__math_target = arch_avxfma4;
} else {
__math_target = arch_sse4;
}
}
} else {
if ((CPUIDX8664(is_sse4a)() == 1) || (CPUIDX8664(is_sse41)() == 1)) {
if ((X86IDFN(is_sse4a)() == 1) || (X86IDFN(is_sse41)() == 1)) {
__math_target = arch_sse4;
} else {
__math_target = arch_em64t;
@@ -58,9 +58,16 @@ set(SRCS
dsqrt.c
fabs.c
sqrt.c
pgcpuid.c
${ASM_SRCS})
libmath_add_object_library("${SRCS}" "${FLAGS}" "${DEFINITIONS}" "")

# Decorate entry points and global objects in x86id with an internal prefix.
set(SRCS
x86id.c)
list(APPEND DEFINITIONS_FOR_LIBPGC ${DEFINITIONS} FOR_LIBPGC)
libmath_add_object_library("${SRCS}" "${FLAGS}" "${DEFINITIONS_FOR_LIBPGC}" "for_libpgc")

# isoc99
set(SRCS
alog.c
@@ -61,6 +61,7 @@ static int CPUIDX8664(is_amd)();
static int CPUIDX8664(is_fma4)();
static int CPUIDX8664(is_sse4a)();
static int CPUIDX8664(is_sse41)();
static int CPUIDX8664(is_f16c)();

/*
* Check that this is a Genuine Intel processor
@@ -296,6 +297,30 @@ CPUIDX8664(is_avx512vl)(void)
return (ebx & bit_AVX512VL) != 0;
}/* is_avx512vl */

/*
* Check that this is either a Genuine Intel or AMD processor that supports
* f16c instructions.
*/
static int
CPUIDX8664(is_f16c)(void)
{
uint32_t eax, ebx, ecx, edx;

if ((CPUIDX8664(is_intel)() == 0) && (CPUIDX8664(is_amd)() == 0)) {
return 0;
}

if (CPUIDX8664(is_avx)() == 0) {
return 0;
}

if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) == 0) {
return 0;
}

return (ecx & bit_F16C) != 0;
}/* is_f16c */

#ifdef UNIT_TEST
int
main()
@@ -309,6 +334,7 @@ main()
printf("is_avx2()=%d\n", CPUIDX8664(is_avx2)());
printf("is_avx512f()=%d\n", CPUIDX8664(is_avx512f)());
printf("is_avx512vl()=%d\n", CPUIDX8664(is_avx512vl)());
printf("is_f16c()=%d\n", CPUIDX8664(is_f16c)());
}
#endif
#endif // #ifndef CPUIDX8664_H
@@ -0,0 +1,135 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

#include <stdint.h>
#include "pgcpuid.h"

/*
* Note:
* 1) these functions cannot call any other function
* 2) these functions can only use GPR (not floating point)
*
*/

/** @brief returns false/true if CPUID supports eax function.
* __pgi_cpuid_getma (uint32_t cpuid_func)
* @param cpuid_func (I1) function to execute CPUID with
*
* Returns false(0)/true(1)
*
*/

int
__pgi_cpuid_getmax(uint32_t f)
{
uint32_t maxcpueax;
uint32_t fin = f & 0x80000000;
asm("\tcpuid"
: "=a"(maxcpueax)
: "0"(fin)
: "ebx", "ecx", "edx"
);
return f <= maxcpueax;
}

/** @brief returns results of executing CPUID with function cpuid_func and
* sub function ecx.
* __pgi_cpuid_ecx(uint32_t cpuid_func, uint32_t *res, uint32_t ecx)
* @param cpuid_func (I1) function to execute CPUID with
* @param res (I2) pointer to buffer to store eax, ebx, ecx, edx
* @param ecx (I3) value of %ecx to execute CPUID with
*
* Returns false(0): if cpuid_func not supported
* true(1): CPUID successfully executed with cpuid_func+ecx and:
* res[0]=%eax, res[1]=%ebx, res[2]=%ecx, res[3]=%edx
*
*/

int
__pgi_cpuid_ecx(uint32_t f, uint32_t *r, uint32_t c)
{
if (__pgi_cpuid_getmax(f) == 0) return 0;
asm("\tcpuid"
: "=a"(r[0]), "=b"(r[1]), "=c"(r[2]), "=d"(r[3])
: "0"(f), "2"(c)
:
);
return 1;
}


/** @brief returns results of executing CPUID with function cpuid_func.
* __pgi_cpuid(uint32_t cpuid_func, uint32_t *res)
* @param cpuid_func (I1) function to execute CPUID with
* @param res (I2) pointer to buffer to store eax, ebx, ecx, edx
*
* Returns false(0): if cpuid_func not supported
* true(1): CPUID successfully executed with cpuid_func and:
* res[0]=%eax, res[1]=%ebx, res[2]=%ecx, res[3]=%edx
*
*/

int
__pgi_cpuid(uint32_t f, uint32_t *r)
{
return __pgi_cpuid_ecx(f, r, 0);
}

/** @brief returns results of executing CPUID with function cpuid_func.
* __pgcpuid(uint32_t cpuid_func, uint32_t *res)
* @param cpuid_func (I1) function to execute CPUID with
* @param res (I2) pointer to buffer to store eax, ebx, ecx, edx
*
* Returns false(0): if cpuid_func not supported
* true(1): CPUID successfully executed with cpuid_func and:
* res[0]=%eax, res[1]=%ebx, res[2]=%ecx, res[3]=%edx
*
*/

int
__pgcpuid(uint32_t f, uint32_t *r)
{
return __pgi_cpuid_ecx(f, r, 0);
}

/** @brief read extended control register.
* __pgi_getbv(uint32_t xcr_num, uint64_t *xcr_res)
* @param xcr_num (I1) extended control register number to read
* @param xcr_res (I2) pointer to buffer to store xcr[xcr_num]
*
* Returns true(1) with:
* xcr_res[31: 0]=%eax
* xcr_res[63:32]=%edx
*
*/
int
__pgi_getbv(uint32_t f, uint64_t *r)
{
uint32_t *u32;
u32 = (uint32_t *)r;
asm(
#if defined(__WIN64)
"\t.byte\t0x0f, 0x01, 0xd0"
#else
"\txgetbv"
#endif
: "=a"(u32[0]), "=d"(u32[1])
: "c"(f)
:
);
return 1;
}
Oops, something went wrong.

0 comments on commit 3660d02

Please sign in to comment.