# Something Larger than a One-Liner

## Initialization

In [1]:
import numpy as np
import loopy as lp
import pyopencl as cl

lp.set_caching_enabled(False)

In [2]:
cl_ctx = cl.create_some_context(interactive=True)

Choose platform:
[0] <pyopencl.Platform 'Intel Gen OCL Driver' at 0x7fc419151840>
[1] <pyopencl.Platform 'Portable Computing Language' at 0x7fc4215f1010>
[2] <pyopencl.Platform 'Intel(R) OpenCL' at 0x393dc20>
Choice [0]:1
Set the environment variable PYOPENCL_CTX='1' to avoid being asked again.


## Finding "centers" near "targets"

In [3]:
knl = lp.make_kernel(
        "{[ictr,itgt,idim]: "
        "0<=itgt<ntargets "
        "and 0<=ictr<ncenters "
        "and 0<=idim<ambient_dim}",

        """
        for itgt
            for ictr
                <> dist_sq = sum(idim,
                        (tgt[idim,itgt] - center[idim,ictr])**2)
                <> in_disk = dist_sq < (radius[ictr]*1.05)**2
                <> matches = (
                        (in_disk
                            and qbx_forced_limit == 0)
                        or (in_disk
                                and qbx_forced_limit != 0
                                and qbx_forced_limit * center_side[ictr] > 0)
                        )

                <> post_dist_sq = if(matches, dist_sq, HUGE)
            end
            <> min_dist_sq, <> min_ictr = argmin(ictr, ictr, post_dist_sq)

            tgt_to_qbx_center[itgt] = if(min_dist_sq < HUGE, min_ictr, -1)
        end
        """)

In [4]:
print(knl)

---------------------------------------------------------------------------
KERNEL: loopy_kernel
---------------------------------------------------------------------------
ARGUMENTS:
HUGE: ValueArg, type: <runtime>
ambient_dim: ValueArg, type: <runtime>
center: GlobalArg, type: <runtime>, shape: (ambient_dim, ncenters), dim_tags: (N1:stride:ncenters, N0:stride:1)
center_side: GlobalArg, type: <runtime>, shape: (ncenters), dim_tags: (N0:stride:1)
ncenters: ValueArg, type: <runtime>
ntargets: ValueArg, type: <runtime>
qbx_forced_limit: ValueArg, type: <runtime>
radius: GlobalArg, type: <runtime>, shape: (ncenters), dim_tags: (N0:stride:1)
tgt: GlobalArg, type: <runtime>, shape: (ambient_dim, ntargets), dim_tags: (N1:stride:ntargets, N0:stride:1)
tgt_to_qbx_center: GlobalArg, type: <runtime>, shape: (ntargets), dim_tags: (N0:stride:1)
---------------------------------------------------------------------------
DOMAINS:
[ambient_dim, ncenters, ntargets] -> { [ictr, itgt, idim] : 0 <= ictr 

In [5]:
knl = lp.fix_parameters(knl, ambient_dim=2)
knl = lp.add_and_infer_dtypes(knl, {
        "tgt,center,radius,HUGE": np.float32, 
        "center_side,qbx_forced_limit": np.int32,
        })

In [6]:
lp.auto_test_vs_ref(knl, cl_ctx, knl, parameters={
        "HUGE": 1e20, "ncenters": 200, "ntargets": 300,
        "qbx_forced_limit": 1})

---------------------------------------------------------------------------
Kernel #0:
---------------------------------------------------------------------------
[36m#[39;49;00m[36mdefine lid(N) ((int) get_local_id(N))[39;49;00m[36m[39;49;00m
[36m#[39;49;00m[36mdefine gid(N) ((int) get_group_id(N))[39;49;00m[36m[39;49;00m
[34minline[39;49;00m [36mint[39;49;00m [32mloopy_argmin_int32_float32_op[39;49;00m(
    [36mint[39;49;00m op1, [36mfloat[39;49;00m index1,
    [36mint[39;49;00m op2, [36mfloat[39;49;00m index2,
    [36mfloat[39;49;00m *index_out)
{
    [34mif[39;49;00m (op2 <= op1)
    {
        *index_out = index2;
        [34mreturn[39;49;00m op2;
    }
    [34melse[39;49;00m
    {
        *index_out = index1;
        [34mreturn[39;49;00m op1;
    }
}

__kernel [36mvoid[39;49;00m [32m__attribute__[39;49;00m ((reqd_work_group_size([34m1[39;49;00m, [34m1[39;49;00m, [34m1[39;49;00m))) loopy_kernel([36mfloat[39;49;00m [34mconst[39;49;00

{'elapsed_event': 0.0010216545898629192,
 'elapsed_event_marker': 0.0007795927207325803,
 'elapsed_wall': 0.0007798988372087479,
 'ref_elapsed_event': 0.000474867,
 'ref_elapsed_wall': 0.20263218879699707,
 'timing_rounds': 512}