In [1]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import absolute_import, print_function
from tabulate import tabulate
import pyopencl as cl
import numpy as np

## Seleccionar plataforma, dispositivo y crear contexto

In [2]:
ID_P = 1
ID_D = 0
platforms = cl.get_platforms()
devices = [platforms[ID_P].get_devices()[ID_D]]
context = cl.Context(devices=devices,properties=[(cl.context_properties.PLATFORM, platforms[ID_P])])
context.get_info(cl.context_info.DEVICES)

[<pyopencl.Device 'pthread-AMD Ryzen Threadripper 1950X 16-Core Processor' on 'Portable Computing Language' at 0x560acbbcb390>]

## Creación de una cola de comandos

In [3]:
queue = cl.CommandQueue(context, properties=None)
queue.get_info(cl.command_queue_info.CONTEXT)

<pyopencl.Context at 0x560acbb02a80 on <pyopencl.Device 'pthread-AMD Ryzen Threadripper 1950X 16-Core Processor' on 'Portable Computing Language' at 0x560acbbcb390>>

## Compilación del programa

https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/barrier.html

In [4]:
source_file=open("minmax.c","r")

kernel = source_file.read()

source_file.close()

prg = cl.Program(context, kernel)
exe = prg.build(options=[])

## Imprimir alguna información util

In [5]:
CU=context.get_info(cl.context_info.DEVICES)[0].get_info(cl.device_info.MAX_COMPUTE_UNITS)
WGS=context.get_info(cl.context_info.DEVICES)[0].get_info(cl.device_info.MAX_WORK_GROUP_SIZE)
WIS=context.get_info(cl.context_info.DEVICES)[0].get_info(cl.device_info.MAX_WORK_ITEM_SIZES)
LMS=context.get_info(cl.context_info.DEVICES)[0].get_info(cl.device_info.LOCAL_MEM_SIZE)

LMS ='{:.0f} KB'.format(LMS/1024)


text=tabulate([[CU,WGS,WIS,LMS]], headers=['Compute Units', 'MAX_WORK_GROUP_SIZE', 'MAX_WORK_ITEM_SIZES','LOCAL_MEM_SIZE'], tablefmt="fancy_grid")

print(text)

╒═════════════════╤═══════════════════════╤═══════════════════════╤══════════════════╕
│   Compute Units │   MAX_WORK_GROUP_SIZE │ MAX_WORK_ITEM_SIZES   │ LOCAL_MEM_SIZE   │
╞═════════════════╪═══════════════════════╪═══════════════════════╪══════════════════╡
│              32 │                  4096 │ [4096, 4096, 4096]    │ 4096 KB          │
╘═════════════════╧═══════════════════════╧═══════════════════════╧══════════════════╛


## Iniciar buffers de memoria

In [8]:
host_data = np.random.random_sample((2**19,)).astype(np.float32)
host_outputs= np.zeros( (host_data.shape[0]//1024)*2, dtype=np.float32)
host_index = np.uint32(0)

In [9]:
device_data = cl.Buffer(context, cl.mem_flags.READ_WRITE, size=host_data.nbytes)
device_temp = cl.Buffer(context, cl.mem_flags.READ_WRITE, size=host_outputs.nbytes)
device_index = cl.Buffer(context, cl.mem_flags.READ_WRITE, size=host_index.nbytes)

In [10]:
cl.enqueue_copy(queue, device_data, host_data, wait_for=None)
cl.enqueue_fill_buffer(queue, device_temp, pattern=np.float32(0),size=4,offset=0)
cl.enqueue_fill_buffer(queue, device_index, pattern=np.uint32(0),size=4,offset=0)

<pyopencl._cl.Event at 0x7f4b800a8c50>

## Ejecutar el Kernel

In [11]:
exe.minmax(queue, host_data.shape, [1024], device_data, device_temp, device_index)

<pyopencl._cl.Event at 0x7f4b800aff50>

In [12]:

host_output = np.zeros((1024),dtype=np.float32)
cl.enqueue_copy(queue, host_output, device_temp)
print(host_output[0], host_output[1])
print(host_data.max(),host_data.min())

0.9999998 9.828495e-08
0.9999998 9.828495e-08
