Skip to content

Commit

Permalink
AARCH64 : Modifying checks and ensuring compatible isa/arch combinati…
Browse files Browse the repository at this point in the history
…ons.
  • Loading branch information
DeepakRajendrakumaran authored and aneshlya committed May 9, 2019
1 parent d7afe0e commit ee8fa6d
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 23 deletions.
4 changes: 2 additions & 2 deletions check_isa.cpp
Expand Up @@ -44,7 +44,7 @@
#include <intrin.h>
#endif

#if !defined(__arm__)
#if !defined(__arm__) && !defined(__aarch64__)
#if !defined(ISPC_IS_WINDOWS)
static void __cpuid(int info[4], int infoType) {
__asm__ __volatile__("cpuid" : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) : "0"(infoType));
Expand Down Expand Up @@ -93,7 +93,7 @@ static bool __os_has_avx512_support() {
#endif // !__arm__

static const char *lGetSystemISA() {
#ifdef __arm__
#if defined(__arm__) || defined(__aarch64__)
return "ARM NEON";
#else
int info[4];
Expand Down
6 changes: 3 additions & 3 deletions examples/timing.h
Expand Up @@ -33,7 +33,7 @@

#include <stdint.h>

#ifdef __arm__
#if defined(__arm__) || defined(__aarch64__)
#include <sys/time.h>
// There's no easy way to get a hardware clock counter on ARM, so instead
// we'll pretend it's a 1GHz processor and then compute pretend cycles
Expand Down Expand Up @@ -65,7 +65,7 @@ static inline double rtc(void) {
return etime;
}

#else // __arm__
#else // __arm__ || __aarch64__

#ifdef WIN32
#include <windows.h>
Expand Down Expand Up @@ -94,7 +94,7 @@ static inline double rtc(void) {
}

#endif // !WIN32
#endif // !__arm__
#endif // !__arm__ && !__aarch64__

static uint64_t start, end;
static double tstart, tend;
Expand Down
52 changes: 34 additions & 18 deletions src/ispc.cpp
Expand Up @@ -104,7 +104,7 @@ Module *m;
///////////////////////////////////////////////////////////////////////////
// Target

#if !defined(ISPC_IS_WINDOWS) && !defined(__arm__)
#if !defined(ISPC_IS_WINDOWS) && !defined(__arm__) && !defined(__aarch64__)
static void __cpuid(int info[4], int infoType) {
__asm__ __volatile__("cpuid" : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) : "0"(infoType));
}
Expand All @@ -117,9 +117,9 @@ static void __cpuidex(int info[4], int level, int count) {
: "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3])
: "0"(level), "2"(count));
}
#endif // !ISPC_IS_WINDOWS && !__ARM__
#endif // !ISPC_IS_WINDOWS && !__ARM__ && !__AARCH64__

#if !defined(__arm__)
#if !defined(__arm__) && !defined(__aarch64__)
static bool __os_has_avx_support() {
#if defined(ISPC_IS_WINDOWS)
// Check if the OS will save the YMM registers
Expand Down Expand Up @@ -150,10 +150,10 @@ static bool __os_has_avx512_support() {
return (rEAX & 0xE6) == 0xE6;
#endif // !defined(ISPC_IS_WINDOWS)
}
#endif // !__arm__
#endif // !__arm__ && !__aarch64__

static const char *lGetSystemISA() {
#ifdef __arm__
#if defined(__arm__) || defined(__aarch64__)
return "neon-i32x4";
#else
int info[4];
Expand Down Expand Up @@ -210,6 +210,26 @@ static const char *lGetSystemISA() {
#endif
}

static const bool lIsISAValidforArch(const char *isa, const char *arch) {
bool ret = true;
// If target name starts with sse or avx, has to be x86 or x86-64.
if (!strncmp(isa, "sse", 3) || !strncmp(isa, "avx", 3)) {
if ((strcasecmp(arch, "x86-64") != 0) && (strcasecmp(arch, "x86") != 0))
ret = false;
} else if (!strcasecmp(isa, "neon-i8x16") || !strcasecmp(isa, "neon-i16x8")) {
if (strcasecmp(arch, "arm"))
ret = false;
} else if (!strcasecmp(isa, "neon-i32x4") || !strcasecmp(isa, "neon")) {
if ((strcasecmp(arch, "arm") != 0) && (strcasecmp(arch, "aarch64") != 0))
ret = false;
} else if (!strcasecmp(isa, "nvptx")) {
if (strcasecmp(arch, "nvptx64"))
ret = false;
}

return ret;
}

typedef enum {
// Special value, indicates that no CPU is present.
CPU_None = 0,
Expand Down Expand Up @@ -582,6 +602,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
else
#endif /* ISPC_NVPTX_ENABLED */
arch = "x86-64";
} else if (!lIsISAValidforArch(isa, arch)) {
Error(SourcePos(), "arch = %s and target = %s is not a valid combination.\n", arch, isa);
return;
}

// Define arch alias
Expand Down Expand Up @@ -983,22 +1006,15 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
}

#if defined(ISPC_ARM_ENABLED) && !defined(__arm__)
if ((CPUID == CPU_None) && !strncmp(isa, "neon", 4))
if ((CPUID == CPU_None) && !strncmp(isa, "neon", 4) && !strncmp(arch, "arm", 3))
CPUID = CPU_CortexA9;
#endif
//TO-DO : Revisit cpu selection for cross-compilation
#if defined(ISPC_ARM_ENABLED) && !defined(__aarch64__)
if ((CPUID == CPU_None) && !strncmp(isa, "neon", 4) && !strncmp(arch, "aarch64", 7))
CPUID = CPU_CortexA35;
#endif
if (CPUID == CPU_None) {
if (isa == NULL) {
std::string hostCPU = llvm::sys::getHostCPUName();
if (hostCPU.size() > 0)
cpu = strdup(hostCPU.c_str());
else {
Warning(SourcePos(), "Unable to determine host CPU!\n");
cpu = a.GetDefaultNameFromType(CPU_Generic).c_str();
}
} else {
cpu = a.GetDefaultNameFromType(CPUfromISA).c_str();
}
cpu = a.GetDefaultNameFromType(CPUfromISA).c_str();
} else {
if ((CPUfromISA != CPU_None) && !a.BackwardCompatible(CPUID, CPUfromISA)) {
Error(SourcePos(),
Expand Down
1 change: 1 addition & 0 deletions src/main.cpp
Expand Up @@ -419,6 +419,7 @@ int main(int Argc, char *Argv[]) {
llvm::sys::AddSignalHandler(lSignal, NULL);

// initialize available LLVM targets
// TO-DO : Revisit after experimenting on arm and aarch64 hardware.
#ifndef __arm__
// FIXME: LLVM build on ARM doesn't build the x86 targets by default.
// It's not clear that anyone's going to want to generate x86 from an
Expand Down

0 comments on commit ee8fa6d

Please sign in to comment.