In [9]:
import pyopencl as cl
from pyopencl import array as clarray
from pyopencl import clmath
from pyopencl import clrandom
import numpy
from time import time

def timeit(f, args):
    n = 100
    t = 0
    for _ in range(n):
        start = time()
        f(*args)
        stop = time()
        t += stop - start
    print( t / n)

def runnumpy(n):
    a = numpy.random.rand(n).astype(numpy.float32)
    b = numpy.random.rand(n).astype(numpy.float32)

    timeit(lambda a, b: a + b, (a, b))
    timeit(lambda a: a + 1.0, (a,))
    timeit(lambda a, b: a * b, (a, b))
    timeit(lambda a: a * 2.0, (a,))
    timeit(lambda a: numpy.sin(a), (a,))
    timeit(lambda a: numpy.sum(a), (a,))
    timeit(lambda a, b: numpy.dot(a, b), (a, b))
    timeit(lambda : numpy.random.rand(n), ())

def runcl(device, n):
    ctx = cl.Context([device])
    queue = cl.CommandQueue(ctx)

    a = clrandom.rand(queue, n, numpy.float32)
    b = clrandom.rand(queue, n, numpy.float32)
    cl.enqueue_barrier(queue)

    def add(a, b):
        a + b
        cl.enqueue_barrier(queue)
    timeit(add, (a, b))

    def add_s(a):
        a + 1.0
        cl.enqueue_barrier(queue)
    timeit(add_s, (a,))

    def mul(a, b):
        a * b
        cl.enqueue_barrier(queue)
    timeit(mul, (a, b))

    def mul_s(a):
        a * 2.0
        cl.enqueue_barrier(queue)
    timeit(mul_s, (a,))

    def sin(a):
        clmath.sin(a)
        cl.enqueue_barrier(queue)
    timeit(sin, (a,))

    def sum(a):
        clarray.sum(a)
        cl.enqueue_barrier(queue)
    timeit(sum, (a,))

    def dot(a, b):
        clarray.dot(a, b)
        cl.enqueue_barrier(queue)
    timeit(dot, (a,b))

    def rand():
        clrandom.rand(queue, n, numpy.float32)
        cl.enqueue_barrier(queue)
    timeit(rand, ())

ns = [1000, 10000, 100000, 1000000]

for platform in cl.get_platforms():
    for device in platform.get_devices():
        print(device.name)
        if device.name=='Intel(R) Core(TM) i5-6267U CPU @ 2.90GHz':
            continue
        for n in ns:
            print(n)
            runcl(device, n)

print('numpy')
for n in ns:
    print(n)
    runnumpy(n)

Intel(R) Core(TM) i5-6267U CPU @ 2.90GHz
Intel(R) Iris(TM) Graphics 550
1000
0.00028855562210083007
0.0002804017066955566
0.00019820690155029298
0.00022507190704345703
0.00016308307647705078
0.0051787829399108885
0.00594069242477417
0.0003645467758178711
10000
0.0005650568008422852
0.0002498769760131836
0.00019583940505981445
0.0002519369125366211
0.0006443142890930175
0.0007089042663574219
0.00036574840545654296
0.00011978387832641602
100000
0.00031195402145385744
0.0005093812942504883
0.0010845232009887695
0.00046382427215576174
0.0004831075668334961
0.00169097900390625
0.0006782984733581542
0.0006832075119018555
1000000
0.0016989159584045411
0.0010665822029113769
0.0005350279808044434
0.001895301342010498
0.0029905056953430176
0.0026191210746765135
0.004685907363891601
0.0002631521224975586
numpy
1000
3.1757354736328126e-06
1.6372203826904297e-05
5.030632019042969e-06
1.0356903076171874e-05
2.916574478149414e-05
3.7877559661865235e-05
5.450248718261719e-06
3.052949905395508e-05
1000