# Loopy: Reductions

## Setup code

In [6]:
import numpy as np
import pyopencl as cl
import pyopencl.array
import pyopencl.clrandom
import loopy as lp

from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2

In [7]:
ctx = cl.create_some_context(interactive=True)
queue = cl.CommandQueue(ctx)

Choose platform:
[0] <pyopencl.Platform 'Portable Computing Language' at 0x7ff8d5ac06e8>
[1] <pyopencl.Platform 'Intel(R) OpenCL' at 0x36a3e38>


Choice [0]: 


Set the environment variable PYOPENCL_CTX='' to avoid being asked again.


In [8]:
n = 1024
a = cl.clrandom.rand(queue, (n, n), dtype=np.float32)
x = cl.clrandom.rand(queue, (n,), dtype=np.float32)

## Capturing matrix-vector multiplication

In [9]:
knl = lp.make_kernel(
    "{[i,k]: 0<=i,k<n}",
    "b[i] = sum(k, a[i, k]*x[k])"
    )

In [11]:
knl = lp.set_options(knl, write_cl=True)
evt, _ = knl(queue, a=a, x=x)

[36m#[39;49;00m[36mdefine lid(N) ((int) get_local_id(N))[39;49;00m[36m[39;49;00m
[36m#[39;49;00m[36mdefine gid(N) ((int) get_group_id(N))[39;49;00m[36m[39;49;00m

__kernel [36mvoid[39;49;00m [32m__attribute__[39;49;00m ((reqd_work_group_size([34m1[39;49;00m, [34m1[39;49;00m, [34m1[39;49;00m))) loopy_kernel(__global [36mfloat[39;49;00m [34mconst[39;49;00m *__restrict__ a, __global [36mfloat[39;49;00m *__restrict__ b, [36mint[39;49;00m [34mconst[39;49;00m n, __global [36mfloat[39;49;00m [34mconst[39;49;00m *__restrict__ x)
{
  [36mfloat[39;49;00m acc_k;

  [34mfor[39;49;00m ([36mint[39;49;00m i = [34m0[39;49;00m; i <= -[34m1[39;49;00m + n; ++i)
  {
    acc_k = [34m0.0f[39;49;00m;
    [34mfor[39;49;00m ([36mint[39;49;00m k = [34m0[39;49;00m; k <= -[34m1[39;49;00m + n; ++k)
      acc_k = acc_k + a[n * i + k] * x[k];
    b[i] = acc_k;
  }
}

