### Passing CuPy array to Dask

In [1]:
import cupy
import dask.array as da
# generate chunked dask arrays of many cupy random arrays
rs = da.random.RandomState(RandomState=cupy.random.RandomState)  # <-- we specify cupy here
x = rs.normal(10, 1, size=(500000, 500000), chunks=(10000, 10000))
print(x.nbytes / 1e9)  # 2 TB

2000.0


### Dask_cuda (Local CUDA Cluster)

In [None]:
from dask_cuda import LocalCUDACluster
#https://github.com/rapidsai/dask-cuda
from dask.distributed import Client
#cluster = LocalCUDACluster(CUDA_VISIBLE_DEVICES=[0, 1, 2, 4])
#cluster = LocalCUDACluster()
cluster = LocalCUDACluster()
client = Client(cluster)
client

In [None]:
client.dashboard_link

In [None]:
#takes two hours on CPU
#(x + 1)[::2, ::2].sum().compute(scheduler='single-threaded')

In [None]:
#GPU
y = (x + 1)[::2, ::2].sum().compute()
y.device

In [None]:
cupy.get_default_memory_pool().free_all_blocks()
cupy.get_default_pinned_memory_pool().free_all_blocks()

### Exploring dask_jobqueue (PBSCluster) - For Cluster Use

In [2]:
import time 
import cupy
import dask.array as da
import xarray as xr
from dask_jobqueue import PBSCluster
from dask.distributed import Client

cluster = PBSCluster(memory='16GB',
                     processes=1,
                     cores=1,
                     queue='casper',
                     resource_spec='select=1:ngpus=1')
print(cluster.job_script())
cluster.scale(2)
client = Client(cluster)
#cluster.close()

  from distributed.utils import tmpfile


#!/usr/bin/env bash

#PBS -N dask-worker
#PBS -q casper
#PBS -A NTDD0005
#PBS -l select=1:ngpus=1
#PBS -l walltime=00:30:00

/glade/work/hkashgar/conda-envs/geocat/bin/python -m distributed.cli.dask_worker tcp://10.12.205.24:38030 --nthreads 1 --memory-limit 14.90GiB --name dummy-name --nanny --death-timeout 60 --interface ib0 --protocol tcp://



In [3]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/hkashgar/proxy/8787/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/hkashgar/proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.12.205.24:38030,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/hkashgar/proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [4]:
rs = da.random.RandomState(RandomState=cupy.random.RandomState)  # <-- we specify cupy here
x = rs.normal(10, 1, size=(500000, 500000), chunks=(10000, 10000))

In [6]:
#GPU
y = (x + 1)[::2, ::2].sum().compute()
y.device

<CUDA Device 0>