# Mini Pytato 

In [1]:
import numpy as np
import numpy.linalg as la
import pymbolic.primitives as p
import loopy as lp
import pyopencl as cl
import pyopencl.array

ctx = cl.create_some_context(interactive=True)
queue = cl.CommandQueue(ctx)

Choose platform:
[0] <pyopencl.Platform 'Intel(R) OpenCL HD Graphics' at 0x4270220>
[1] <pyopencl.Platform 'Portable Computing Language' at 0x7fe6b28e08e8>


Choice [0]: 1


Set the environment variable PYOPENCL_CTX='1' to avoid being asked again.


Implement the `Array` base class, along with `Sum`, `Product`, and `Placeholder` subclasses:

In [14]:
#clear
class Array:
    def __init__(self):
        self.shape = (10, 10)
        self.dtype = np.float64
        
    def __add__(self, other):
        return Sum(self, other)
    
    def __mul__(self, other):
        return Product(self, other)
        
class Sum(Array):
    def __init__(self, a, b):
        super().__init__()
        self.a = a
        self.b = b
        
    mapper_method = "map_sum"
        
class Product(Array):
    def __init__(self, a, b):
        super().__init__()
        self.a = a
        self.b = b
        
    mapper_method = "map_product"
        
class Placeholder(Array):
    def __init__(self, name):
        super().__init__()
        self.name = name
        
    mapper_method = "map_placeholder"

Implement a `CodegenMapper`:

In [17]:
#clear
class CodegenMapper:
    def rec(self, ary):
        return getattr(self, ary.mapper_method)(ary)
    
    def map_sum(self, expr):
        return self.rec(expr.a) + self.rec(expr.b)
    
    def map_product(self, expr):
        return self.rec(expr.a) * self.rec(expr.b)
    
    def map_placeholder(self, expr):
        return p.Variable(expr.name)[p.Variable("i"), p.Variable("j")]

Experiment with some expressions:

In [5]:
x = Placeholder("x")
y = Placeholder("y")

expr = (x+x*y)*x

# expr = (x+y)
# expr = expr*expr
# expr = expr*expr
# expr = expr*expr
# expr = expr*expr
# expr = expr*expr

Generate code for these expressions:

In [18]:
#clear
print(CodegenMapper().rec(expr))

(x[i, j] + x[i, j]*y[i, j])*x[i, j]


Generate loopy for your expression (and print the resulting kernel):

In [7]:
#clear
knl = lp.make_kernel(
    "{[i,j]: 0<=i,j<10}",
    [lp.Assignment(
        p.Variable("lhs")[p.Variable("i"), p.Variable("j")], 
        CodegenMapper().rec(expr)
    )])
print(knl)

---------------------------------------------------------------------------
KERNEL: loopy_kernel
---------------------------------------------------------------------------
ARGUMENTS:
lhs: type: <auto/runtime>, shape: (10, 10), dim_tags: (N1:stride:10, N0:stride:1) out aspace: global
x: type: <auto/runtime>, shape: (10, 10), dim_tags: (N1:stride:10, N0:stride:1) in aspace: global
y: type: <auto/runtime>, shape: (10, 10), dim_tags: (N1:stride:10, N0:stride:1) in aspace: global
---------------------------------------------------------------------------
DOMAINS:
{ [i, j] : 0 <= i <= 9 and 0 <= j <= 9 }
---------------------------------------------------------------------------
INAME TAGS:
i: None
j: None
---------------------------------------------------------------------------
INSTRUCTIONS:
for i, j
    [36mlhs[i, j][0m = [35m(x[i, j] + x[i, j]*y[i, j])*x[i, j][0m  {id=[32minsn[0m}
end i, j
---------------------------------------------------------------------------


  tunit = make_function(*args, **kwargs)


In [8]:
xval = np.random.randn(10, 10)
yval = np.random.randn(10, 10)

evt, (res,) = knl(queue, x=xval, y=yval)

  evt, (res,) = knl(queue, x=xval, y=yval)


Check the result:

In [9]:
print(la.norm(res- (xval+xval*yval)*xval))

3.818293849341007e-15


Look at the generated C code:

In [10]:
knl = lp.add_and_infer_dtypes(knl, {"x": xval.dtype, "y": yval.dtype})

code = lp.generate_code_v2(knl).device_code()
print(code)

#define lid(N) ((int) get_local_id(N))
#define gid(N) ((int) get_group_id(N))
#if __OPENCL_C_VERSION__ < 120
#pragma OPENCL EXTENSION cl_khr_fp64: enable
#endif

__kernel void __attribute__ ((reqd_work_group_size(1, 1, 1))) loopy_kernel(__global double *__restrict__ lhs, __global double const *__restrict__ x, __global double const *__restrict__ y)
{
  for (int j = 0; j <= 9; ++j)
    for (int i = 0; i <= 9; ++i)
      lhs[10 * i + j] = (x[10 * i + j] + x[10 * i + j] * y[10 * i + j]) * x[10 * i + j];
}
