Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions sycl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ project(sycl-solution)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)

if(MSVC)
set_property(GLOBAL PROPERTY USE_FOLDERS ON)
Expand Down Expand Up @@ -135,6 +136,7 @@ add_library("${SYCLLibrary}" SHARED
"${sourceRootPath}/detail/program_manager/program_manager.cpp"
"${sourceRootPath}/detail/queue_impl.cpp"
"${sourceRootPath}/detail/os_util.cpp"
"${sourceRootPath}/detail/platform_util.cpp"
"${sourceRootPath}/detail/sampler_impl.cpp"
"${sourceRootPath}/detail/scheduler/commands.cpp"
"${sourceRootPath}/detail/scheduler/commands2.cpp"
Expand Down
2 changes: 1 addition & 1 deletion sycl/include/CL/sycl/detail/buffer_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class accessor;
template <typename T, int dimensions, typename AllocatorT> class buffer;
class handler;
class queue;
template <int dimentions> class id;
template <int dimentions> struct id;
template <int dimentions> class range;
using buffer_allocator = aligned_allocator<char, /*alignment*/ 64>;
namespace detail {
Expand Down
4 changes: 2 additions & 2 deletions sycl/include/CL/sycl/detail/helpers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ namespace cl {
namespace sycl {
class context;
class event;
template <int dimensions, bool with_offset> class item;
template <int dimensions, bool with_offset> struct item;
template <int dimensions> class group;
template <int dimensions> class range;
template <int dimensions> class id;
template <int dimensions> struct id;
template <int dimensions> class nd_item;
namespace detail {
class context_impl;
Expand Down
5 changes: 5 additions & 0 deletions sycl/include/CL/sycl/detail/os_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

#pragma once

#include <stdlib.h>

#ifdef _WIN32
#define SYCL_RT_OS_WINDOWS
// Windows platform
Expand Down Expand Up @@ -48,6 +50,9 @@ class OSUtil {
/// Module handle for the executable module - it is assumed there is always
/// single one at most.
static const OSModuleHandle ExeModuleHandle;

/// Returns the amount of RAM available for the operating system.
static size_t getOSMemSize();
};

} // namespace detail
Expand Down
40 changes: 40 additions & 0 deletions sycl/include/CL/sycl/detail/platform_util.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
//===-- platform_util.hpp - platform utilities ----------------*- C++ -*--===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include <cstdint>

namespace cl {
namespace sycl {
namespace detail {

struct PlatformUtil {
enum class TypeIndex : unsigned int {
Char = 0,
Short = 1,
Int = 2,
Long = 3,
Float = 4,
Double = 5,
Half = 6
};

/// Returns the maximum vector width counted in elements of the given type.
static uint32_t getNativeVectorWidth(TypeIndex Index);

static uint32_t getMaxClockFrequency();

static uint32_t getMemCacheLineSize();

static uint64_t getMemCacheSize();
};

} // namespace detail
} // namespace sycl
} // namespace cl
2 changes: 1 addition & 1 deletion sycl/include/CL/sycl/group.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
namespace cl {
namespace sycl {
namespace detail {
class Builder;
struct Builder;
} // namespace detail

template <int dimensions = 1> class group {
Expand Down
3 changes: 2 additions & 1 deletion sycl/include/CL/sycl/id.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ namespace cl {
namespace sycl {
template <int dimensions> class range;
template <int dimensions = 1> struct id : public detail::array<dimensions> {
public:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that replacing class by struct is a simplification in the current situation.
But I wonder whether the original author did not mean instead having the next line with base private, so that public: was indeed on the wrong line...
Is there a reason to have base publicly visible?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, the main goal of all those changes ("class" --> "struct") was the sync with SYCL SPEC and the wish to avoid build warnings like "this class was initially declared as struct".

In the additional change today I fixed 'id.hpp' and 'range.hpp' to hide 'base' inside private section.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi Ronan,
Please approve this PR if you agree with the additional changes.
Thank you,
Vyacheslav

private:
using base = detail::array<dimensions>;
public:
id() = default;

/* The following constructor is only available in the id struct
Expand Down
9 changes: 5 additions & 4 deletions sycl/include/CL/sycl/item.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
namespace cl {
namespace sycl {
namespace detail {
class Builder;
struct Builder;
}
template <int dimensions> struct id;
template <int dimensions> struct range;
template <int dimensions> class range;
template <int dimensions = 1, bool with_offset = true> struct item {

item() = delete;
Expand Down Expand Up @@ -86,8 +86,9 @@ template <int dimensions = 1, bool with_offset = true> struct item {

protected:
// For call constructor inside conversion operator
friend class item<dimensions, false>;
friend class detail::Builder;
friend struct item<dimensions, false>;
friend struct item<dimensions, true>;
friend struct detail::Builder;

template <size_t W = with_offset>
item(typename std::enable_if<(W == true), const range<dimensions>>::type &R,
Expand Down
2 changes: 1 addition & 1 deletion sycl/include/CL/sycl/nd_item.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
namespace cl {
namespace sycl {
namespace detail {
class Builder;
struct Builder;
}
template <int dimensions = 1> struct nd_item {

Expand Down
2 changes: 1 addition & 1 deletion sycl/include/CL/sycl/range.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ namespace sycl {
template <int dimensions> struct id;
template <int dimensions = 1>
class range : public detail::array<dimensions> {
public:
using base = detail::array<dimensions>;
public:
/* The following constructor is only available in the range class
specialization where: dimensions==1 */
template <int N = dimensions>
Expand Down
103 changes: 13 additions & 90 deletions sycl/source/detail/device_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@
//===----------------------------------------------------------------------===//

#include <CL/sycl/detail/device_info.hpp>
#include <CL/sycl/detail/os_util.hpp>
#include <CL/sycl/detail/platform_util.hpp>
#include <CL/sycl/device.hpp>
#include <chrono>
#include <sys/sysinfo.h>
#include <thread>

#ifdef __GNUG__
Expand All @@ -21,22 +22,6 @@ namespace cl {
namespace sycl {
namespace detail {

// Used by methods that duplicate OpenCL behaviour in order to get CPU info
// TODO add Windows support
// TODO add support for x86-64 ABI selected using ifdef.
static void cpuid(unsigned int cpuid_info[], unsigned int type) {
unsigned int eax, ebx, ecx, edx;
__asm__ __volatile__("mov %%ebx, %%edi\n\r"
"cpuid\n\r"
"xchg %%edi, %%ebx\n\r"
: "=a"(eax), "=D"(ebx), "=c"(ecx), "=d"(edx)
: "a"(type));
cpuid_info[0] = eax;
cpuid_info[1] = ebx;
cpuid_info[2] = ecx;
cpuid_info[3] = edx;
}

vector_class<info::fp_config> read_fp_bitfield(cl_device_fp_config bits) {
vector_class<info::fp_config> result;
if (bits & CL_FP_DENORM)
Expand Down Expand Up @@ -156,109 +141,51 @@ cl_uint get_device_info_host<info::device::preferred_vector_width_half>() {
return 0;
}

// SSE4.2 has 16 byte (XMM) registers
static const cl_uint NATIVE_VECTOR_WIDTH_SSE42[] = {16, 8, 4, 2, 4, 2, 0};
// AVX supports 32 byte (YMM) registers only for floats and doubles
static const cl_uint NATIVE_VECTOR_WIDTH_AVX[] = {16, 8, 4, 2, 8, 4, 0};
// AVX2 has a full set of 32 byte (YMM) registers
static const cl_uint NATIVE_VECTOR_WIDTH_AVX2[] = {32, 16, 8, 4, 8, 4, 0};
// AVX512 has 64 byte (ZMM) registers
static const cl_uint NATIVE_VECTOR_WIDTH_AVX512[] = {64, 32, 16, 8, 16, 8, 0};

cl_uint get_native_vector_width(size_t idx) {
#if (__GNUG__ && GCC_VERSION > 40900)
if (__builtin_cpu_supports("avx512f")) {
return NATIVE_VECTOR_WIDTH_AVX512[idx];
}
#endif

if (__builtin_cpu_supports("avx2")) {
return NATIVE_VECTOR_WIDTH_AVX2[idx];
}
if (__builtin_cpu_supports("avx")) {
return NATIVE_VECTOR_WIDTH_AVX[idx];
}
return NATIVE_VECTOR_WIDTH_SSE42[idx];
}

template <>
cl_uint get_device_info_host<info::device::native_vector_width_char>() {
return get_native_vector_width(0);
return PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex::Char);
}

template <>
cl_uint get_device_info_host<info::device::native_vector_width_short>() {
return get_native_vector_width(1);
return PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex::Short);
}

template <>
cl_uint get_device_info_host<info::device::native_vector_width_int>() {
return get_native_vector_width(2);
return PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex::Int);
}

template <>
cl_uint get_device_info_host<info::device::native_vector_width_long>() {
return get_native_vector_width(3);
return PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex::Long);
}

template <>
cl_uint get_device_info_host<info::device::native_vector_width_float>() {
return get_native_vector_width(4);
return PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex::Float);
}

template <>
cl_uint get_device_info_host<info::device::native_vector_width_double>() {
return get_native_vector_width(5);
return PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex::Double);
}

template <>
cl_uint get_device_info_host<info::device::native_vector_width_half>() {
return get_native_vector_width(6);
return PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex::Half);
}

template <> cl_uint get_device_info_host<info::device::max_clock_frequency>() {
throw runtime_error(
"max_clock_frequency parameter is not supported for host device");
unsigned int cpuInfo[4] = {0 - 1u};
string_class buff(sizeof(cpuInfo) * 3 + 1, 0);
size_t offset = 0;

for (unsigned int i = 0x80000002; i <= 0x80000004; i++) {
cpuid(cpuInfo, i);
std::copy(reinterpret_cast<char *>(cpuInfo),
reinterpret_cast<char *>(cpuInfo) + sizeof(cpuInfo),
buff.begin() + offset);
offset += sizeof(cpuInfo);
}
std::size_t found = buff.rfind("Hz");
// Bail out if frequency is not found in CPUID string
if (found == std::string::npos)
return 0;

buff = buff.substr(0, found);

cl_uint freq = 0;
switch (buff[buff.size() - 1]) {
case 'M':
freq = 1;
break;
case 'G':
freq = 1000;
break;
}
buff = buff.substr(buff.rfind(' '), buff.length());
freq *= std::stod(buff);
return freq;
return PlatformUtil::getMaxClockFrequency();
}

template <> cl_uint get_device_info_host<info::device::address_bits>() {
return sizeof(void *) * 8;
}

template <> cl_ulong get_device_info_host<info::device::global_mem_size>() {
struct sysinfo meminfo;
sysinfo(&meminfo);
return meminfo.totalram * meminfo.mem_unit;
return static_cast<cl_ulong>(OSUtil::getOSMemSize());
}

template <> cl_ulong get_device_info_host<info::device::max_mem_alloc_size>() {
Expand Down Expand Up @@ -362,16 +289,12 @@ get_device_info_host<info::device::global_mem_cache_type>() {

template <>
cl_uint get_device_info_host<info::device::global_mem_cache_line_size>() {
unsigned int viCPUInfo[4] = {(unsigned int)-1};
cpuid(viCPUInfo, 0x80000006);
return viCPUInfo[2] & 0xff;
return PlatformUtil::getMemCacheLineSize();
}

template <>
cl_ulong get_device_info_host<info::device::global_mem_cache_size>() {
unsigned int viCPUInfo[4] = {(unsigned int)-1};
cpuid(viCPUInfo, 0x80000006);
return ((viCPUInfo[2] >> 16) & 0xffff) * 1024;
return PlatformUtil::getMemCacheSize();
}

template <>
Expand Down
Loading