In [5]:
#Lets have matplotlib "inline"
%pylab inline

#Lets have opencl ipython integration enabled
%load_ext pyopencl.ipython_ext

#Import packages we need
import numpy as np
import pyopencl as cl

#Make sure we get compiler output from OpenCL
import os
os.environ["PYOPENCL_COMPILER_OUTPUT"] = "1"


Populating the interactive namespace from numpy and matplotlib
The pyopencl.ipython_ext extension is already loaded. To reload it, use:
  %reload_ext pyopencl.ipython_ext


In [6]:
#Create OpenCL context
cl_ctx = cl.create_some_context()

#Create an OpenCL command queue
cl_queue = cl.CommandQueue(cl_ctx)


In [11]:

%%cl_kernel 
__kernel void linear_wave_2D(__global float *u2, __global const float *u1, __global const float *u0, float c, float dt, float dx, float dy) {
    
    //Inicializacion variables
    int i = get_global_id(0);
    int j = get_global_id(1);
    int nx = get_global_size(0);
    int ny = get_global_size(1);
    
    //Indices vecinos
    int center = j*nx+i;
    int north = (j-1)*nx+i;
    int south = (j+1)*nx+i;
    int east = j*nx+(i+1);
    int west = j*nx +(i-1);
    
    if (i == 0) {
        u2[center] = u2[east];
    }
    else if (i == nx-1) {
        u2[center] = u2[west];
    }
    else if (j == 0) {
        u2[center] = u2[north];
    }
    else if (j == ny-1) {
        u2[center] = u2[south];
    }
    else{
        u2[center] = 2*u1[center] - u0[center] + c * (dt*dt)/(dx*dx) * (u1[west] - 2 * u1[center] + u1[east]) + c *(dt*dt)/(dx*dx) * (u1[north] - 2 * u1[center] + u1[south]);

    }

}


Build on <pyopencl.Device 'Intel(R) Core(TM) i7-6700K CPU @ 4.00GHz' on 'Intel(R) OpenCL' at 0x2cf9058> succeeded, but said:

Compilation started
Compilation done
Linking started
Linking done
Device build started
Device build done
Kernel <linear_wave_2D> was successfully vectorized (4)
Done.


In [None]:
#Find number of cells
nx = 100
ny = 100
#Create test input data

u0 = np.random.rand(ny,nx).astype(np.float32)
u1 = np.random.rand(ny,nx).astype(np.float32)
c = 1.0
dx = 1.0
dy = 1.0
dt = 0.4 * min(dx*dx/(2.0*c), dy*dy/(2.0*c))

        
mf = cl.mem_flags 
        
#Upload data to the device
u0_g = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=u0)
u1_g = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=u1)       

#Allocate output buffers
u2_g = cl.Buffer(cl_ctx, mf.READ_WRITE, u0.nbytes)



#Plot initial conditions
nt = 500

for i in range(0, nt):
        #Execute program on device
        linear_wave_2D(cl_queue, (nx,ny), None, 
                       u2_g, u1_g, u0_g, 
                       numpy.float32(c), numpy.float32(dt), numpy.float32(dx), numpy.float32(dy))
        
        #Swap variables
        u0_g, u1_g, u2_g = u1_g, u2_g, u0_g
