# PyOpenCL

## Introduction

In [3]:
import pyopencl as cl
import numpy as np

In [4]:
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

In [5]:
mf = cl.mem_flags

In [6]:
a_np = np.arange(0, 10, 1, dtype=np.float32)
print(a_np)

[ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9.]


Allocate an OpenCL buffer.

In [7]:
a_g = cl.Buffer(ctx, mf.READ_WRITE, size=a_np.nbytes)

Copy data to the buffer.

In [8]:
cl.enqueue_copy(queue, a_g, a_np)

<pyopencl.cffi_cl.NannyEvent at 0x10cf2ae90>

Data transfer can be done while allocating buffer or by `cl.enqueue_copy`

In [9]:
# a_g = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=a_np)

## Programs

Write a program to square elements in `a_g`

In [10]:
prg = cl.Program(ctx, """
    __kernel void square(__global float *a_g)
    {
        int gid = get_global_id(0);
        float a_gid = a_g[gid];
        a_g[gid] = a_gid * a_gid;
    }
""").build()

In [11]:
prg.square(queue, a_np.shape, None, a_g)

<pyopencl.cffi_cl.Event at 0x10da33f90>

Copy result back to the host

In [12]:
res_np = np.empty_like(a_np)
cl.enqueue_copy(queue, res_np, a_g)

<pyopencl.cffi_cl.NannyEvent at 0x10da3a550>

In [13]:
print(res_np)

[  0.   1.   4.   9.  16.  25.  36.  49.  64.  81.]


In [14]:
print(a_np ** 2)

[  0.   1.   4.   9.  16.  25.  36.  49.  64.  81.]


### Specifying a work group size

In [15]:
a_np = np.random.rand(128).astype(np.float32)
a_g = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=a_np)

In [16]:
gsize = 128
lsize = 32

In [17]:
prg.square(queue, (gsize,), (lsize,), a_g)

<pyopencl.cffi_cl.Event at 0x105970e90>

In [18]:
res_np = np.empty_like(a_np)
cl.enqueue_copy(queue, res_np, a_g)

<pyopencl.cffi_cl.NannyEvent at 0x10cf25650>

In [19]:
print(res_np - (a_np ** 2))
print(np.linalg.norm(res_np - (a_np ** 2)))

[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.]
0.0


### Choosing a context

In [20]:
platforms = cl.get_platforms()

In [21]:
print(platforms)

[<pyopencl.Platform 'Apple' at 0x7fff0000>]


In [22]:
devices = platforms[0].get_devices()

In [23]:
print(devices)

[<pyopencl.Device 'Intel(R) Core(TM) i5-4278U CPU @ 2.60GHz' on 'Apple' at 0xffffffff>, <pyopencl.Device 'Iris' on 'Apple' at 0x1024500>]


In [24]:
ctx = cl.Context([devices[1]])

In [25]:
queue = cl.CommandQueue(ctx)