# PyOpenCL

## Arrays

`pyopencl.array.Array` provides a numpy-like interface to OpenCL buffers.

In [2]:
import pyopencl as cl
import pyopencl.array
import numpy as np

In [3]:
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

### Create an array

In [4]:
a_g = cl.array.empty(queue, 10, dtype=np.float32)

Most of numpy's array creation methods supported.

In [5]:
a_g = cl.array.zeros(queue, 10, dtype=np.float32)

In [6]:
b_g = cl.array.zeros_like(a_g)

In [7]:
a_g = cl.array.arange(queue, 0, 10, 1, dtype=np.float32)

### Operations on arrays

`pyopencl.array.Array` works exactly like numpy arrays.
Most of the operations supported by numpy arrays are supported by
`pyopencl.array.Array`.

In [8]:
c_g = a_g ** 2

In [9]:
c_g = a_g + 10.

In [10]:
c_g = 2 * a_g

### Easy data transfer

In [11]:
c_np = c_g.get()
print(c_np)

[  0.   2.   4.   6.   8.  10.  12.  14.  16.  18.]


Printing the array directly also supported

In [12]:
print(c_g)

[  0.   2.   4.   6.   8.  10.  12.  14.  16.  18.]


To transfer from host to device

In [13]:
c_dev = cl.array.to_device(queue, c_np)

In [14]:
print(c_dev)

[  0.   2.   4.   6.   8.  10.  12.  14.  16.  18.]


### The square problem using arrays

In [15]:
a_np = np.arange(0, 10, 1, dtype=np.float32)
print(a_np ** 2)

[  0.   1.   4.   9.  16.  25.  36.  49.  64.  81.]


In [16]:
a_g = cl.array.to_device(queue, a_np)
res_g = a_g ** 2
print(res_g)

[  0.   1.   4.   9.  16.  25.  36.  49.  64.  81.]


### Access OpenCL buffer

Accessing the OpenCL buffer from `Array` is also possible

In [17]:
a_g = cl.array.arange(queue, 0, 10, 1, dtype=np.float32)

In [18]:
prg = cl.Program(ctx, """
    __kernel void square(__global float *a_g)
    {
        int gid = get_global_id(0);
        float a_gid = a_g[gid];
        a_g[gid] = a_gid * a_gid;
    }
""").build()

In [19]:
prg.square(queue, a_g.shape, None, a_g.data)

<pyopencl.cffi_cl.Event at 0x114f5c510>

In [22]:
print(a_g)

[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]


### Multidimensional arrays

In [23]:
a_g = cl.array.zeros(queue, (10, 10), dtype=np.float32)

In [24]:
print(a_g)

[[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]]


#### Sum of two matrices

In [27]:
prg = cl.Program(ctx, """
    __kernel void sum(__global float* a, __global float* b, __global float* out, int n)
    {
        int gid_row = get_global_id(0);
        int gid_col = get_global_id(1);
        out[n * gid_row + gid_col] = a[n * gid_row + gid_col] + b[n * gid_row + gid_col];
    }""").build()

In [35]:
a_g += 1
b_g = a_g + 1

res_g = cl.array.empty_like(a_g)

prg.sum(queue, a_g.shape, None, a_g.data, b_g.data, res_g.data, np.asarray(a_g.shape[0], dtype=np.int32))

<pyopencl.cffi_cl.Event at 0x115a19b90>

In [31]:
print(a_g + b_g)

[[ 5.  5.  5.  5.  5.  5.  5.  5.  5.  5.]
 [ 5.  5.  5.  5.  5.  5.  5.  5.  5.  5.]
 [ 5.  5.  5.  5.  5.  5.  5.  5.  5.  5.]
 [ 5.  5.  5.  5.  5.  5.  5.  5.  5.  5.]
 [ 5.  5.  5.  5.  5.  5.  5.  5.  5.  5.]
 [ 5.  5.  5.  5.  5.  5.  5.  5.  5.  5.]
 [ 5.  5.  5.  5.  5.  5.  5.  5.  5.  5.]
 [ 5.  5.  5.  5.  5.  5.  5.  5.  5.  5.]
 [ 5.  5.  5.  5.  5.  5.  5.  5.  5.  5.]
 [ 5.  5.  5.  5.  5.  5.  5.  5.  5.  5.]]


In [32]:
print(res_g)

[[ 5.  5.  5.  5.  5.  5.  5.  5.  5.  5.]
 [ 5.  5.  5.  5.  5.  5.  5.  5.  5.  5.]
 [ 5.  5.  5.  5.  5.  5.  5.  5.  5.  5.]
 [ 5.  5.  5.  5.  5.  5.  5.  5.  5.  5.]
 [ 5.  5.  5.  5.  5.  5.  5.  5.  5.  5.]
 [ 5.  5.  5.  5.  5.  5.  5.  5.  5.  5.]
 [ 5.  5.  5.  5.  5.  5.  5.  5.  5.  5.]
 [ 5.  5.  5.  5.  5.  5.  5.  5.  5.  5.]
 [ 5.  5.  5.  5.  5.  5.  5.  5.  5.  5.]
 [ 5.  5.  5.  5.  5.  5.  5.  5.  5.  5.]]
