# PYCUDA: 
Needs a GPU and pycuda install

<table align="left">
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/dtrad/geoml_course/blob/master/pycudaexamples.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
  </td>
</table>

Environment

In [2]:
!env 

SHELL=/bin/bash
SESSION_MANAGER=local/wynter:@/tmp/.ICE-unix/1746,unix/wynter:/tmp/.ICE-unix/1746
QT_ACCESSIBILITY=1
SNAP_REVISION=107
DATAPATH=/data/tmp/
COLORTERM=truecolor
XDG_CONFIG_DIRS=/etc/xdg/xdg-ubuntu:/etc/xdg
PYTHONUNBUFFERED=1
XDG_MENU_PREFIX=gnome-
GNOME_DESKTOP_SESSION_ID=this-is-deprecated
CONDA_EXE=/home/dtrad/anaconda3/bin/conda
_CE_M=
APPLICATION_INSIGHTS_NO_DIAGNOSTIC_CHANNEL=true
SNAP_REAL_HOME=/home/dtrad
RSFROOT=/home/dtrad/madagascar
SNAP_USER_COMMON=/home/dtrad/snap/code/common
LANGUAGE=en_CA:en
TEXMFHOME=/home/dtrad/segtex-0.9.3/texmf
GNOME_SHELL_SESSION_MODE=ubuntu
SSH_AUTH_SOCK=/run/user/1000/keyring/ssh
SNAP_INSTANCE_KEY=
ELECTRON_RUN_AS_NODE=1
XMODIFIERS=@im=ibus
DESKTOP_SESSION=ubuntu
SSH_AGENT_PID=1712
GDK_PIXBUF_MODULE_FILE=/home/dtrad/snap/code/common/.cache/gdk-pixbuf-loaders.cache
VSCODE_AMD_ENTRYPOINT=vs/workbench/api/node/extensionHostProcess
GTK_MODULES=gail:atk-bridge
PWD=/home/dtrad/disk2/pythonexamples/geoml/Lec03
GSETTINGS_SCHEMA_DIR=/snap/code

In [3]:
# Environment needs ipykernel to be installed outside the notebook (in a terminal)
# conda install -n pycuda ipykernel --update-deps --force-reinstall

In [15]:
# If not installed, for example in Collab, needs this 
#!pip install pycuda

In [16]:
!echo $HOME


/home/dtrad


In [17]:
!pip config list

In [18]:
import pycuda
import pycuda.driver as drv
drv.init()

In [19]:
print('CUDA device query (PyCUDA version) \n')
print('Detected {} CUDA Capable device(s) \n'.format(drv.Device.count()))

CUDA device query (PyCUDA version) 

Detected 2 CUDA Capable device(s) 



In [20]:
for i in range(drv.Device.count()):
    gpu_device = drv.Device(i)
    print(gpu_device)
    print( 'Device {}: {}'.format( i, gpu_device.name() ) )
    compute_capability = float( '%d.%d' % gpu_device.compute_capability() )
    print( '\t Compute Capability: {}'.format(compute_capability))
    print( '\t Total Memory: {} megabytes'.format(gpu_device.total_memory()//(1024**2)))
    
    

<pycuda._driver.Device object at 0x7fbf9a029940>
Device 0: NVIDIA GeForce RTX 2060 SUPER
	 Compute Capability: 7.5
	 Total Memory: 7982 megabytes
<pycuda._driver.Device object at 0x7fbf9a029b70>
Device 1: NVIDIA GeForce GTX 960
	 Compute Capability: 5.2
	 Total Memory: 1988 megabytes


In [21]:
import pycuda.autoinit
from pycuda import gpuarray
from time import time
from pycuda.elementwise import ElementwiseKernel 
import numpy as np  
    

In [23]:
host_data = np.float32( np.random.random(50000000) )

gpu_2x_ker = ElementwiseKernel(
"float *in, float *out",
"out[i] = 2*in[i];",
"gpu_2x_ker")

In [24]:
def speedcomparison():
    t1 = time()
    host_data_2x =  host_data * np.float32(2)
    t2 = time()
    print('total time to compute on CPU: %f' % (t2 - t1))
    device_data = gpuarray.to_gpu(host_data)
    # allocate memory for output
    device_data_2x = gpuarray.empty_like(device_data)
    t1 = time()
    gpu_2x_ker(device_data, device_data_2x)
    t2 = time()
    from_device = device_data_2x.get()
    print('total time to compute on GPU: %f' % (t2 - t1))
    print('Is the host computation the same as the GPU computation? : {}'.format(np.allclose(from_device, host_data_2x) ))
    

In [25]:
# First time GPU is slow because it needs to compile the code.
speedcomparison()

total time to compute on CPU: 0.064909
total time to compute on GPU: 0.077960
Is the host computation the same as the GPU computation? : True


In [26]:
speedcomparison()

total time to compute on CPU: 0.061355
total time to compute on GPU: 0.000088
Is the host computation the same as the GPU computation? : True
