# Loopy: Controlling data layout

## Setup code

In [1]:
import numpy as np
import pyopencl as cl
import pyopencl.array
import pyopencl.clrandom
import loopy as lp

from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2

In [3]:
ctx = cl.create_some_context(interactive=True)
queue = cl.CommandQueue(ctx)

Choose platform:
[0] <pyopencl.Platform 'Portable Computing Language' at 0x7fd0ffe3c6e8>
[1] <pyopencl.Platform 'Intel(R) OpenCL' at 0x33094e8>


Choice [0]: 0


Set the environment variable PYOPENCL_CTX='0' to avoid being asked again.


## A kernel on a structured array

In [4]:
knl = lp.make_kernel(
    "{[el,dof, comp]: "
        "0<=el<nels "
        "and 0<=dof<14 "
        "and 0<=comp < 3}",
    "D[el, dof, comp] = eps[el] * E[el, dof, comp]")

knl = lp.set_options(knl, write_cl=True)

In [5]:
eps = np.random.randn(500)
E = cl.clrandom.rand(queue, (500, 14, 3), dtype=np.float64)

In [6]:
mknl = knl.copy()
evt, _ = mknl(queue, eps=eps, E=E)

[36m#[39;49;00m[36mdefine lid(N) ((int) get_local_id(N))[39;49;00m[36m[39;49;00m
[36m#[39;49;00m[36mdefine gid(N) ((int) get_group_id(N))[39;49;00m[36m[39;49;00m
[36m#[39;49;00m[36mif __OPENCL_C_VERSION__ < 120[39;49;00m[36m[39;49;00m
[36m#[39;49;00m[36mpragma OPENCL EXTENSION cl_khr_fp64: enable[39;49;00m[36m[39;49;00m
[36m#[39;49;00m[36mendif[39;49;00m[36m[39;49;00m

__kernel [36mvoid[39;49;00m [32m__attribute__[39;49;00m ((reqd_work_group_size([34m1[39;49;00m, [34m1[39;49;00m, [34m1[39;49;00m))) loopy_kernel(__global [36mdouble[39;49;00m *__restrict__ D, __global [36mdouble[39;49;00m [34mconst[39;49;00m *__restrict__ E, __global [36mdouble[39;49;00m [34mconst[39;49;00m *__restrict__ eps, [36mint[39;49;00m [34mconst[39;49;00m nels)
{
  [34mfor[39;49;00m ([36mint[39;49;00m el = [34m0[39;49;00m; el <= -[34m1[39;49;00m + nels; ++el)
    [34mfor[39;49;00m ([36mint[39;49;00m dof = [34m0[39;49;00m; dof <= [34m13[39;49;00

## Changing the layout

`E` and `D` are currently laid out as AoS. What if I want SoA?

In [8]:
mknl = knl

mknl  = lp.tag_array_axes(mknl, "E", "c,c,sep")
mknl = lp.tag_inames(mknl, {"comp": "unr"})
mknl = lp.prioritize_loops(mknl, "el,dof,comp")

# change data format of E
copy_knl = lp.make_copy_kernel("c,c,sep")
copy_knl = lp.fix_parameters(copy_knl, n2=3)
evt, E_new = copy_knl(queue, input=E)

evt, _ = mknl(queue, eps=eps, E=E_new)

[36m#[39;49;00m[36mdefine lid(N) ((int) get_local_id(N))[39;49;00m[36m[39;49;00m
[36m#[39;49;00m[36mdefine gid(N) ((int) get_group_id(N))[39;49;00m[36m[39;49;00m
[36m#[39;49;00m[36mif __OPENCL_C_VERSION__ < 120[39;49;00m[36m[39;49;00m
[36m#[39;49;00m[36mpragma OPENCL EXTENSION cl_khr_fp64: enable[39;49;00m[36m[39;49;00m
[36m#[39;49;00m[36mendif[39;49;00m[36m[39;49;00m

__kernel [36mvoid[39;49;00m [32m__attribute__[39;49;00m ((reqd_work_group_size([34m1[39;49;00m, [34m1[39;49;00m, [34m1[39;49;00m))) loopy_kernel(__global [36mdouble[39;49;00m *__restrict__ D, __global [36mdouble[39;49;00m [34mconst[39;49;00m *__restrict__ E_s0, __global [36mdouble[39;49;00m [34mconst[39;49;00m *__restrict__ E_s1, __global [36mdouble[39;49;00m [34mconst[39;49;00m *__restrict__ E_s2, __global [36mdouble[39;49;00m [34mconst[39;49;00m *__restrict__ eps, [36mint[39;49;00m [34mconst[39;49;00m nels)
{
  [34mfor[39;49;00m ([36mint[39;49;00m el = 

May want to add padding (demo).

---

Grouped padding exists as well.