In [39]:
# prepared invocations and structures -----------------------------------------
from __future__ import print_function
from __future__ import absolute_import
import pycuda.driver as cuda
import pycuda.autoinit
import numpy
from pycuda.compiler import SourceModule

class DoubleOpStruct:
    mem_size = 8 + numpy.uintp(0).nbytes
    def __init__(self, array, struct_arr_ptr):
        self.data = cuda.to_device(array)
        self.shape, self.dtype = array.shape, array.dtype
        """
        numpy.getbuffer() needed due to lack of new-style buffer interface for
        scalar numpy arrays as of numpy version 1.9.1

        see: https://github.com/inducer/pycuda/pull/60
        """
        cuda.memcpy_htod(int(struct_arr_ptr),
                         memoryview(numpy.int32(array.size)))
        cuda.memcpy_htod(int(struct_arr_ptr) + 8,
                         memoryview(numpy.uintp(int(self.data))))

    def __str__(self):
        return str(cuda.from_device(self.data, self.shape, self.dtype))

struct_arr = cuda.mem_alloc(2 * DoubleOpStruct.mem_size)
do2_ptr = int(struct_arr) + DoubleOpStruct.mem_size

array1 = DoubleOpStruct(numpy.array([1, 2, 3], dtype=numpy.float32), struct_arr)
array2 = DoubleOpStruct(numpy.array([0, 4], dtype=numpy.float32), do2_ptr)

print("original arrays")
print(array1)
print(array2)

mod = SourceModule("""
    struct DoubleOperation {
        int datalen, __padding; // so 64-bit ptrs can be aligned
        float *ptr;
    };


    __global__ void double_array(DoubleOperation *a)
    {
        a = a + blockIdx.x;
        for (int idx = threadIdx.x; idx < a->datalen; idx += blockDim.x)
        {
            float *a_ptr = a->ptr;
            a_ptr[idx] *= 2;
        }
    }
    """)
func = mod.get_function("double_array")
func(struct_arr, block=(32, 1, 1), grid=(2, 1))

print("doubled arrays")
print(array1)
print(array2)

func(numpy.uintp(do2_ptr), block=(32, 1, 1), grid=(1, 1))
print("doubled second only")
print(array1)
print(array2)

if cuda.get_version() < (4, ):
    func.prepare("P", block=(32, 1, 1))
    func.prepared_call((2, 1), struct_arr)
else:
    func.prepare("P")
    block = (32, 1, 1)
    func.prepared_call((2, 1), block, struct_arr)


print("doubled again")
print(array1)
print(array2)

if cuda.get_version() < (4, ):
    func.prepared_call((1, 1), do2_ptr)
else:
    func.prepared_call((1, 1), block, do2_ptr)


print("doubled second only again")
print(array1)
print(array2)


original arrays
[ 1.  2.  3.]
[ 0.  4.]
doubled arrays
[ 2.  4.  6.]
[ 0.  8.]
doubled second only
[ 2.  4.  6.]
[  0.  16.]
doubled again
[  4.   8.  12.]
[  0.  32.]
doubled second only again
[  4.   8.  12.]
[  0.  64.]


In [40]:
numpy.uintp(0).nbytes

8

In [41]:
array = numpy.array([1, 2, 3], dtype=numpy.float32)

In [42]:
cuda.to_device(array)

<pycuda._driver.DeviceAllocation at 0x7f33ee2e9d00>

In [43]:
array.dtype

dtype('float32')

In [44]:
type(array.size)

int

In [45]:
type(numpy.int32(array.size))

numpy.int32

# Prepare data

In [89]:
import numpy as np
data = np.zeros(3, dtype={'names':('x', 'y', 'E'),
                          'formats':('i', 'i', 'i')})
                          #'formats':('f8', 'f8', 'f8')})
print(data.dtype)

[('x', '<i4'), ('y', '<i4'), ('E', '<i4')]


In [90]:
data

array([(0, 0, 0), (0, 0, 0), (0, 0, 0)],
      dtype=[('x', '<i4'), ('y', '<i4'), ('E', '<i4')])

In [91]:
data[0] = (1,2,3)
data[1] = (4,5,6)
data[2] = (7,8,9)

In [92]:
data

array([(1, 2, 3), (4, 5, 6), (7, 8, 9)],
      dtype=[('x', '<i4'), ('y', '<i4'), ('E', '<i4')])

In [93]:
data_ptr = memoryview(data)

In [94]:
data_ptr

<memory at 0x7f33f5c1d948>

# Read the structure

In [99]:
cuda.to_device(data)
data.dtype.itemsize

12

In [110]:
# prepared invocations and structures -----------------------------------------
from __future__ import print_function
from __future__ import absolute_import
import pycuda.driver as cuda
import pycuda.autoinit
import numpy
from pycuda.compiler import SourceModule

class Voxels:
    #mem_size = 8 + numpy.uintp(0).nbytes
    def __init__(self, array, struct_arr_ptr):
        self.data = cuda.to_device(array)
        self.shape, self.dtype = array.shape, array.dtype
        self.mem_size = array.dtype.itemsize

    def __str__(self):
        return str(cuda.from_device(self.data, self.shape, self.dtype))

mem_size = data.dtype.itemsize
struct_arr = cuda.mem_alloc(1 * mem_size)
array1 = Voxels(data, struct_arr)

print("original arrays")
print(array1)

mod = SourceModule("""
    #include <stdio.h>
    
    struct Voxels {
        int x;
        int y;
        int E;
    };

    __global__ void double_array(Voxels *v)
    {
        v = v + blockIdx.x;
        printf("x: %d, y: %d, E: %d\\n", v->x, v->y, v->E);
    }
    """)
func = mod.get_function("double_array")
func(struct_arr, block=(3, 1, 1), grid=(1, 1))

print("doubled arrays")
print(array1)

original arrays
[(1, 2, 3) (4, 5, 6) (7, 8, 9)]
doubled arrays
[(1, 2, 3) (4, 5, 6) (7, 8, 9)]


In [107]:
data

array([(1, 2, 3), (4, 5, 6), (7, 8, 9)],
      dtype=[('x', '<i4'), ('y', '<i4'), ('E', '<i4')])

In [114]:
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule

mod = SourceModule("""
    #include <stdio.h>

    __global__ void say_hi()
    {
      printf("I am %d.%d\\n", threadIdx.x, threadIdx.y);
    }
    """)

func = mod.get_function("say_hi")
func(block=(4,4,1))