Skip to content

Commit

Permalink
[SYCL][ESIMD] Fix several issues related to building of ESIMD on Wind…
Browse files Browse the repository at this point in the history
…ows (#6971)
  • Loading branch information
fineg74 committed Oct 18, 2022
1 parent be0905f commit 77e92ce
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -551,11 +551,11 @@ __esimd_dpas_inner(const __ESIMD_DNS::vector_type_t<T0, SZ> *src0,

__ESIMD_DNS::vector_type_t<TmpAccEl, SIMDSize> simdAcc;

for (uint r = 0; r < repeat_count; r++) {
for (unsigned r = 0; r < repeat_count; r++) {
V = r;
k = 0;

for (uint n = 0; n < SIMDSize; n++) {
for (unsigned n = 0; n < SIMDSize; n++) {
if (src0 != nullptr) {
auto src0El = src0[0][r * SIMDSize + n];

Expand All @@ -570,13 +570,13 @@ __esimd_dpas_inner(const __ESIMD_DNS::vector_type_t<T0, SZ> *src0,
simdAcc[n] = 0;
}

for (uint s = 0; s < systolic_depth; s++) {
for (unsigned s = 0; s < systolic_depth; s++) {
src1_ops_per_dword = 32 / (ops_per_chan * src1_el_bits);
// U = s / src1_ops_per_dword;
U = s >> uint(log2(src1_ops_per_dword));
U = s >> unsigned(log2(src1_ops_per_dword));

for (uint n = 0; n < SIMDSize; n++) {
for (uint d = 0; d < ops_per_chan; d++) {
for (unsigned n = 0; n < SIMDSize; n++) {
for (unsigned d = 0; d < ops_per_chan; d++) {
p = d + (s % src1_ops_per_dword) * ops_per_chan;
uint32_t extension_temp = false;

Expand Down Expand Up @@ -618,7 +618,7 @@ __esimd_dpas_inner(const __ESIMD_DNS::vector_type_t<T0, SZ> *src0,

} // Systolic phase.

for (uint n = 0; n < SIMDSize; n++) {
for (unsigned n = 0; n < SIMDSize; n++) {
if constexpr (pvcBfDest) {
// TODO: make abstraction, support saturation, review rounding algo for
// corner cases.
Expand Down
4 changes: 2 additions & 2 deletions sycl/include/sycl/ext/intel/experimental/esimd/math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -988,8 +988,8 @@ template <typename RT, typename T0, int SZ,
__ESIMD_API __ESIMD_NS::simd<RT, SZ> lzd(__ESIMD_NS::simd<T0, SZ> src0,
Sat sat = {}) {
// Saturation parameter ignored
__ESIMD_NS::simd<uint, SZ> Src0 = src0;
return __esimd_lzd<uint>(Src0.data());
__ESIMD_NS::simd<__ESIMD_NS::uint, SZ> Src0 = src0;
return __esimd_lzd<__ESIMD_NS::uint, SZ>(Src0.data());
}

template <typename RT, typename T0, class Sat = __ESIMD_NS::saturation_off_tag>
Expand Down
39 changes: 21 additions & 18 deletions sycl/test/esimd/regression/windows_build_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,36 +6,39 @@
//
//===----------------------------------------------------------------------===//
// REQUIRES: windows
// RUN: %clangxx -fsycl -fsycl-device-only -fsyntax-only -Xclang -verify %s -I %sycl_include
// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify %s -I %sycl_include
// expected-no-diagnostics

// The tests validates an ability to build ESIMD code on windows platform

#include <iostream>
#include <CL/sycl.hpp>
#include <iostream>
#include <sycl/ext/intel/esimd.hpp>
#include <sycl/ext/intel/experimental/esimd/memory.hpp>

class Kernel;

int main()
{
int main() {
sycl::queue q;
sycl::device dev = q.get_device();
sycl::context ctx = q.get_context();
std::cout << "Device: " << dev.get_info<sycl::info::device::name>() << std::endl;

int* buffer = (int*)sycl::aligned_alloc_device(128, 1024, q);

q.parallel_for<Kernel>(
1,
[=](sycl::item<1> it) SYCL_ESIMD_KERNEL {
using namespace sycl::ext::intel::esimd;
using namespace sycl::ext::intel::experimental::esimd;

simd<int, 32> blk;
lsc_block_store<int, 32>(buffer, blk);
});
std::cout << "Device: " << dev.get_info<sycl::info::device::name>()
<< std::endl;

int *buffer = (int *)sycl::aligned_alloc_device(128, 1024, q);

q.parallel_for<Kernel>(1, [=](sycl::item<1> it) SYCL_ESIMD_KERNEL {
using namespace sycl::ext::intel::esimd;
using namespace sycl::ext::intel::experimental::esimd;

simd<int, 32> blk;
simd<sycl::ext::oneapi::experimental::bfloat16, 16> A;
simd<sycl::ext::oneapi::experimental::bfloat16, 256> B;
simd<float, 16> C;
lzd<uint>(blk);
lzd<uint>(35);
sycl::ext::intel::esimd::xmx::dpas<8, 1, float>(C, B, A);
lsc_block_store<int, 32>(buffer, blk);
});

q.wait();
sycl::free(buffer, q);
Expand Down

0 comments on commit 77e92ce

Please sign in to comment.