In [1]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import absolute_import, print_function
from timeit import default_timer as timer
import pyopencl as cl
import numpy as np

## Seleccionar plataforma y dispositivo a usar

In [4]:
ID_P = 0
ID_D = 2
platforms = cl.get_platforms()
devices = [platforms[ID_P].get_devices()[ID_D]]
list(devices)

[<pyopencl.Device 'GeForce RTX 2080' on 'NVIDIA CUDA' at 0x556189e92690>]

## Crear contexto y mostrar los dispositivos asignados 

In [3]:
context = cl.Context(devices=devices,properties=[(cl.context_properties.PLATFORM, platforms[0])])
context.get_info(cl.context_info.DEVICES)

[<pyopencl.Device 'GeForce RTX 2080' on 'NVIDIA CUDA' at 0x556189e92690>]

## Creación de una cola de comandos

In [5]:
queue = cl.CommandQueue(context, device=None, properties=None)
queue.get_info(cl.command_queue_info.CONTEXT)

<pyopencl.Context at 0x556189ef79c0 on <pyopencl.Device 'GeForce RTX 2080' on 'NVIDIA CUDA' at 0x556189e92690>>

## Creación del buffer en el host

Crear un buffer de 100 Megabytes

In [6]:
host_buffer = np.random.randint(255, size=100*(2**20),dtype=np.uint8)
print(host_buffer.nbytes/(2**20),'MB')

100.0 MB


## Creación del buffer en el contexto

https://www.khronos.org/registry/OpenCL/sdk/1.0/docs/man/xhtml/clCreateBuffer.html<br>
https://documen.tician.de/pyopencl/runtime_memory.html#buffer<br><br>
Aqui se esta solicitando espacio de memoria en el dispositivo asociado al contexto.<br>
Será del tamaño del buffer en el host (100 MB)

In [7]:
device_buffer = cl.Buffer(context, cl.mem_flags.READ_WRITE, size=host_buffer.nbytes)
print(device_buffer)

<pyopencl._cl.Buffer object at 0x7f45ec2d5e90>


## Copia del host al dispositivo

https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/clEnqueueCopyBuffer.html<br>
https://documen.tician.de/pyopencl/runtime_memory.html#pyopencl.enqueue_copy<br><br>

Aqui se transfiere lo que hay en el buffer en el host (RAM) al buffer en el dispositivo (VRAM).

In [8]:
cl.enqueue_copy(queue, device_buffer, host_buffer, is_blocking=True, wait_for=None)

<pyopencl._cl.NannyEvent at 0x7f45e0004110>

## Copia del dispositivo al host
Aqui se transfiere lo que hay en el buffer en el dispositivo (VRAM) a un nuevo buffer en el host (RAM)

In [9]:
host_buffer2 = np.zeros_like(host_buffer)
print(host_buffer2.sum()) #El buffer esta en 0's

cl.enqueue_copy(queue, host_buffer2, device_buffer, is_blocking=True, wait_for=None)

#ambos buffers deben sumar lo mismo
print(host_buffer.sum())
print(host_buffer2.sum())

#device_buffer.release()

0
13316828359
13316828359


# Copia en Creación

In [10]:
device_buffer2 = cl.Buffer(context, cl.mem_flags.READ_WRITE |  cl.mem_flags.COPY_HOST_PTR, hostbuf=host_buffer)

In [11]:
host_buffer2.fill(0)
print(host_buffer2.sum()) #El buffer esta en 0's

cl.enqueue_copy(queue, host_buffer2, device_buffer2, is_blocking=True, wait_for=None)

#ambos buffers deben sumar lo mismo
print(host_buffer.sum())
print(host_buffer2.sum())

0
13316828359
13316828359


In [12]:
speeds=np.zeros(10)
for i in range(speeds.shape[0]):
    start = timer()
    cl.enqueue_copy(queue, device_buffer, host_buffer, is_blocking=True, wait_for=None)
    end = timer()
    speeds[i]=1/(end - start)*100
    
print("{:.2f} MB/s".format(speeds.mean()))

9327.80 MB/s


In [13]:
speeds=np.zeros(10)
for i in range(speeds.shape[0]):
    start = timer()
    cl.enqueue_copy(queue, host_buffer, device_buffer, is_blocking=True, wait_for=None)
    end = timer()
    speeds[i]=1/(end - start)*100
    
print("{:.2f} MB/s".format(speeds.mean()))

4350.86 MB/s
