# PyOpenCL

## Introduction

In [2]:
import pyopencl as cl
import numpy as np

In [3]:
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

In [4]:
mf = cl.mem_flags

In [30]:
a_np = np.arange(0, 10, 1, dtype=np.float32)
print(a_np)

[ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9.]


Allocate an OpenCL buffer.

In [31]:
a_g = cl.Buffer(ctx, mf.READ_WRITE, size=a_np.nbytes)

Copy data to the buffer.

In [32]:
cl.enqueue_copy(queue, a_g, a_np)

<pyopencl.cffi_cl.NannyEvent at 0x110e30d10>

Data transfer can be done while allocating buffer or by `cl.enqueue_copy`

In [33]:
# a_g = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=a_np)

## Programs

Write a program to square elements in `a_g`

In [34]:
prg = cl.Program(ctx, """
    __kernel void square(__global float *a_g)
    {
        int gid = get_global_id(0);
        float a_gid = a_g[gid];
        a_g[gid] = a_gid * a_gid;
    }
""").build()

In [35]:
prg.square(queue, a_np.shape, None, a_g)

<pyopencl.cffi_cl.Event at 0x1072546d0>

Copy result back to the host

In [36]:
res_np = np.empty_like(a_np)
cl.enqueue_copy(queue, res_np, a_g)

<pyopencl.cffi_cl.NannyEvent at 0x110e30cd0>

In [37]:
print(res_np)

[  0.   1.   4.   9.  16.  25.  36.  49.  64.  81.]


In [38]:
print(a_np ** 2)

[  0.   1.   4.   9.  16.  25.  36.  49.  64.  81.]


### Specifying a work group size

In [49]:
a_np = np.random.rand(128).astype(np.float32)
a_g = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=a_np)

In [50]:
gsize = 128
lsize = 32

In [51]:
prg.square(queue, (gsize,), (lsize,), a_g)

<pyopencl.cffi_cl.Event at 0x110e0f610>

In [53]:
res_np = np.empty_like(a_np)
cl.enqueue_copy(queue, res_np, a_g)

<pyopencl.cffi_cl.NannyEvent at 0x110e4a8d0>

In [54]:
print(res_np - (a_np ** 2))
print(np.linalg.norm(res_np - (a_np ** 2)))

[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.]
0.0
