# SYCL devices


4. **Reduction to Sum Results**
    - reduction operation is performed to sum up the results from all players.
    - Data is read from the buffer, and a reduction operation is set up using `sycl::plus` to sum up all the values.
    - A parallel for loop runs the reduction operation across all elements, combining them into the final sum.

```cpp
q.submit([&](auto &h) {
    const auto c{c_buf.get_access<sycl::access_mode::read>(h)};
    const auto sum_reduction{sycl::reduction(s_buf, h, sycl::plus<>())};

    h.parallel_for(sycl::range<1>{number_of_players}, sum_reduction, [=](const auto index, auto &sum) {
        sum.combine(c[index]);
    });
});
```

In [12]:
%%writefile compute.cpp
#include <sycl/sycl.hpp>
#include <vector>

int main(int argc, char **argv) {
    const auto N = 2;
    
    std::vector<int> vectorA(N, 1);  // Vector A filled with 1s
    std::vector<int> vectorB(N, 2);  // Vector B filled with 2s
    int result = 0; // Result of dot product initialized to zero
    
    auto myQueue = sycl::queue{sycl::cpu_selector_v};
    std::cout << "Running on "
              << myQueue.get_device().get_info<sycl::info::device::name>()
              << "\n";
    {
        auto bufA = sycl::buffer{vectorA.data(), sycl::range{N}};
        auto bufB = sycl::buffer{vectorB.data(), sycl::range{N}};
        auto bufResult = sycl::buffer{&result, sycl::range{1}};
    
        myQueue.submit([&](sycl::handler &cgh) {
            auto accA = bufA.get_access<sycl::access::mode::read>(cgh);
            auto accB = bufB.get_access<sycl::access::mode::read>(cgh);
            
            //               _            _   _                     _
            //  _ __ ___  __| |_   _  ___| |_(_) ___  _ __    _ __ | |_   _ ___
            // | '__/ _ \/ _` | | | |/ __| __| |/ _ \| '_ \  | '_ \| | | | / __|
            // | | |  __/ (_| | |_| | (__| |_| | (_) | | | | | |_) | | |_| \__ \
            // |_|  \___|\__,_|\__,_|\___|\__|_|\___/|_| |_| | .__/|_|\__,_|___/
            //                                               |_|
            
            auto accResult = sycl::reduction(bufResult, cgh, sycl::plus<>());
            
            cgh.parallel_for(sycl::range<1> { N }, accResult,
                       [=](sycl::id<1> idx, auto& sum ) {
                sum += accA[idx] * accB[idx];
            });
        });
    }
      std::cout << "Dot product completed. Result: " << result
              << std::endl;
  return 0;
}


Overwriting compute.cpp


In [13]:
%%writefile ./run-sycl.sh

#!/bin/bash -x
source /opt/intel/oneapi/setvars.sh > /dev/null 2>&1
icpx -fsycl compute.cpp
if [ $? -eq 0 ]; then ./a.out; fi

Overwriting ./run-sycl.sh


In [14]:
!chmod u+x ./run-sycl.sh &&./run-sycl.sh

Running on 12th Gen Intel(R) Core(TM) i7-1280P
Dot product completed. Result: 4
