# Init

In [31]:
from dask.distributed import Client, LocalCluster

cluster = LocalCluster(processes=False, memory_limit="8GB")
client_cpu = Client(cluster)
client_cpu

Perhaps you already have a cluster running?
Hosting the HTTP server on port 42699 instead
  http_address["port"], self.http_server.port


0,1
Client  Scheduler: inproc://192.168.0.103/2387/1  Dashboard: http://192.168.0.103:42699/status,Cluster  Workers: 1  Cores: 4  Memory: 8.00 GB


In [1]:
from dask.distributed import Client
from dask_cuda import LocalCUDACluster

# Create a Dask Cluster with one worker per GPU
cluster = LocalCUDACluster(memory_limit="2GB")
client_gpu = Client(cluster)
client_gpu

0,1
Client  Scheduler: tcp://127.0.0.1:36965  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 1  Cores: 1  Memory: 2.00 GB


In [2]:
import cupy as cp
import dask.array as da
import dask.config as dc
import numpy as np

# Device test

In [5]:
import cupy as cp

x = cp.array([1, 2, 3])
print(x.device)

<CUDA Device 0>


# CPU test

In [37]:
# generate chunked dask arrays of mamy numpy random arrays
rs = da.random.RandomState()
x = rs.normal(10, 1, size=(50000, 5000))
display(x)

dc.set(scheduler=client_cpu)
%time (x + 1)[::2, ::2].sum().compute()

Unnamed: 0,Array,Chunk
Bytes,2.00 GB,62.50 MB
Shape,"(50000, 5000)","(3125, 2500)"
Count,32 Tasks,32 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 2.00 GB 62.50 MB Shape (50000, 5000) (3125, 2500) Count 32 Tasks 32 Chunks Type float64 numpy.ndarray",5000  50000,

Unnamed: 0,Array,Chunk
Bytes,2.00 GB,62.50 MB
Shape,"(50000, 5000)","(3125, 2500)"
Count,32 Tasks,32 Chunks
Type,float64,numpy.ndarray


CPU times: user 17.7 s, sys: 1.49 s, total: 19.2 s
Wall time: 5.61 s


687493351.6954968

# GPU test

In [55]:
# generate chunked dask arrays of mamy cupy random arrays
rs = da.random.RandomState(
    RandomState=cp.random.RandomState
)  # <-- we specify cupy here
x = rs.normal(10, 1, size=(50000, 5000), dtype=cp.float32)
display(x)

dc.set(scheduler=client_gpu)
%time (x + 1)[::2, ::2].sum().compute()

Unnamed: 0,Array,Chunk
Bytes,1000.00 MB,125.00 MB
Shape,"(50000, 5000)","(6250, 5000)"
Count,8 Tasks,8 Chunks
Type,float32,cupy.ndarray
"Array Chunk Bytes 1000.00 MB 125.00 MB Shape (50000, 5000) (6250, 5000) Count 8 Tasks 8 Chunks Type float32 cupy.ndarray",5000  50000,

Unnamed: 0,Array,Chunk
Bytes,1000.00 MB,125.00 MB
Shape,"(50000, 5000)","(6250, 5000)"
Count,8 Tasks,8 Chunks
Type,float32,cupy.ndarray


CPU times: user 101 ms, sys: 16.7 ms, total: 117 ms
Wall time: 389 ms


array(6.8751514e+08, dtype=float32)

# GPU tests

In [3]:
dc.set(scheduler=client_gpu)

<dask.config.set at 0x7fb8849fbb90>

In [57]:
# generate chunked dask arrays of mamy cupy random arrays
rs = da.random.RandomState(
    seed=0, RandomState=cp.random.RandomState
)  # <-- we specify cupy here
x = rs.uniform(size=(5e4, 5e4), chunks=(4e3, 4e3), dtype=cp.float32)
display(x)
res = da.coarsen(cp.mean, x, {0: int(1e2), 1: int(1e2)})
display(res)

%time res.compute()

Unnamed: 0,Array,Chunk
Bytes,10.00 GB,64.00 MB
Shape,"(50000, 50000)","(4000, 4000)"
Count,169 Tasks,169 Chunks
Type,float32,cupy.ndarray
"Array Chunk Bytes 10.00 GB 64.00 MB Shape (50000, 50000) (4000, 4000) Count 169 Tasks 169 Chunks Type float32 cupy.ndarray",50000  50000,

Unnamed: 0,Array,Chunk
Bytes,10.00 GB,64.00 MB
Shape,"(50000, 50000)","(4000, 4000)"
Count,169 Tasks,169 Chunks
Type,float32,cupy.ndarray


Unnamed: 0,Array,Chunk
Bytes,1000.00 kB,6.40 kB
Shape,"(500, 500)","(40, 40)"
Count,338 Tasks,169 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1000.00 kB 6.40 kB Shape (500, 500) (40, 40) Count 338 Tasks 169 Chunks Type float32 numpy.ndarray",500  500,

Unnamed: 0,Array,Chunk
Bytes,1000.00 kB,6.40 kB
Shape,"(500, 500)","(40, 40)"
Count,338 Tasks,169 Chunks
Type,float32,numpy.ndarray


CPU times: user 1.62 s, sys: 156 ms, total: 1.78 s
Wall time: 6.38 s


array([[0.5048172 , 0.4990801 , 0.49782628, ..., 0.50250643, 0.50239426,
        0.49746788],
       [0.50026214, 0.4985503 , 0.49895966, ..., 0.49902743, 0.50055015,
        0.50050217],
       [0.49882555, 0.4994005 , 0.50236785, ..., 0.50004697, 0.49868837,
        0.49705607],
       ...,
       [0.50282395, 0.49974814, 0.5001139 , ..., 0.4999639 , 0.5014822 ,
        0.49912983],
       [0.4959562 , 0.4989585 , 0.49683473, ..., 0.49901348, 0.50812995,
        0.5001192 ],
       [0.5031596 , 0.5020208 , 0.49956128, ..., 0.5052707 , 0.4993606 ,
        0.49953824]], dtype=float32)

In [18]:
# generate chunked dask arrays of mamy cupy random arrays
rs = da.random.RandomState(
    seed=0, RandomState=cp.random.RandomState
)  # <-- we specify cupy here
x = rs.uniform(size=(5e2, 5e2, 1e4))  # , chunks=(5e3, 5e3, 1e2))
display(x)
res = x.mean(axis=2)
display(res)

%time res.compute()

Unnamed: 0,Array,Chunk
Bytes,20.00 GB,125.00 MB
Shape,"(500, 500, 10000)","(250, 250, 250)"
Count,160 Tasks,160 Chunks
Type,float64,cupy.ndarray
"Array Chunk Bytes 20.00 GB 125.00 MB Shape (500, 500, 10000) (250, 250, 250) Count 160 Tasks 160 Chunks Type float64 cupy.ndarray",10000  500  500,

Unnamed: 0,Array,Chunk
Bytes,20.00 GB,125.00 MB
Shape,"(500, 500, 10000)","(250, 250, 250)"
Count,160 Tasks,160 Chunks
Type,float64,cupy.ndarray


Unnamed: 0,Array,Chunk
Bytes,2.00 MB,500.00 kB
Shape,"(500, 500)","(250, 250)"
Count,376 Tasks,4 Chunks
Type,float64,cupy.ndarray
"Array Chunk Bytes 2.00 MB 500.00 kB Shape (500, 500) (250, 250) Count 376 Tasks 4 Chunks Type float64 cupy.ndarray",500  500,

Unnamed: 0,Array,Chunk
Bytes,2.00 MB,500.00 kB
Shape,"(500, 500)","(250, 250)"
Count,376 Tasks,4 Chunks
Type,float64,cupy.ndarray


KeyboardInterrupt: 

In [17]:
# generate chunked dask arrays of mamy cupy random arrays
rs = da.random.RandomState(
    RandomState=cp.random.RandomState
)  # <-- we specify cupy here
x = rs.uniform(size=(5e4, 5e4))  # , chunks=(5e3, 5e3, 1e2))
display(x)
x = x.reshape(5e2, 1e2, 1e2, 5e2).reshape(5e2, 1e4, 5e2).persist()
# display(x)
# res = x.mean(axis=1)
display(res)

# %time res.compute()

Unnamed: 0,Array,Chunk
Bytes,20.00 GB,78.12 MB
Shape,"(50000, 50000)","(3125, 3125)"
Count,256 Tasks,256 Chunks
Type,float64,cupy.ndarray
"Array Chunk Bytes 20.00 GB 78.12 MB Shape (50000, 50000) (3125, 3125) Count 256 Tasks 256 Chunks Type float64 cupy.ndarray",50000  50000,

Unnamed: 0,Array,Chunk
Bytes,20.00 GB,78.12 MB
Shape,"(50000, 50000)","(3125, 3125)"
Count,256 Tasks,256 Chunks
Type,float64,cupy.ndarray


Unnamed: 0,Array,Chunk
Bytes,2.00 MB,500.00 kB
Shape,"(500, 500)","(250, 250)"
Count,376 Tasks,4 Chunks
Type,float64,cupy.ndarray
"Array Chunk Bytes 2.00 MB 500.00 kB Shape (500, 500) (250, 250) Count 376 Tasks 4 Chunks Type float64 cupy.ndarray",500  500,

Unnamed: 0,Array,Chunk
Bytes,2.00 MB,500.00 kB
Shape,"(500, 500)","(250, 250)"
Count,376 Tasks,4 Chunks
Type,float64,cupy.ndarray


# Reshape test

In [57]:
a, b, c = 3, 3, 3
da.arange(8 * 2 * 2 * 2).reshape(8, 8).reshape(8, 2, 2, 2).reshape(4, 4, 4)

Unnamed: 0,Array,Chunk
Bytes,512 B,512 B
Shape,"(4, 4, 4)","(4, 4, 4)"
Count,4 Tasks,1 Chunks
Type,int64,numpy.ndarray
"Array Chunk Bytes 512 B 512 B Shape (4, 4, 4) (4, 4, 4) Count 4 Tasks 1 Chunks Type int64 numpy.ndarray",4  4  4,

Unnamed: 0,Array,Chunk
Bytes,512 B,512 B
Shape,"(4, 4, 4)","(4, 4, 4)"
Count,4 Tasks,1 Chunks
Type,int64,numpy.ndarray


# CUPy test

In [None]:
# generate chunked dask arrays of mamy cupy random arrays
x = cp.random.uniform(size=np.array([1e2, 1e2, 1e4], dtype=cp.int))
display(x)
# res = x.reshape(100, 5e3, 5e3)
res = x.mean(axis=2)
display(res)
# %time res.compute()

In [13]:
x0 = cp.random.uniform(size=int(1e5))
x1 = cp.random.uniform(size=int(1e4))
res = %timeit x0 < x1[:, None]

del x0, x1, res
cp._default_memory_pool.free_all_blocks()



KeyboardInterrupt: 

# Numpy tests

In [4]:
x0 = np.random.uniform(size=int(1e5))
x1 = np.random.uniform(size=int(1e4))
%time x0 < x1[:, None]

CPU times: user 614 ms, sys: 190 ms, total: 804 ms
Wall time: 756 ms


array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [ True,  True,  True, ..., False, False,  True],
       ...,
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ..., False,  True,  True]])

In [16]:
rs = da.random.RandomState(
    RandomState=cp.random.RandomState
)  # <-- we specify cupy here
# rs = da.random.RandomState()
x0 = rs.uniform(size=(int(1e5), int(1e4)), chunks=(int(1e4), int(1e3)))
x1 = rs.uniform(size=(int(1e5), int(1e4)), chunks=(int(1e4), int(1e3)))
display(x0, x1)

a = da.maximum(x0, x1)
display(a)

%time a.mean().compute()

Unnamed: 0,Array,Chunk
Bytes,8.00 GB,80.00 MB
Shape,"(100000, 10000)","(10000, 1000)"
Count,100 Tasks,100 Chunks
Type,float64,cupy.ndarray
"Array Chunk Bytes 8.00 GB 80.00 MB Shape (100000, 10000) (10000, 1000) Count 100 Tasks 100 Chunks Type float64 cupy.ndarray",10000  100000,

Unnamed: 0,Array,Chunk
Bytes,8.00 GB,80.00 MB
Shape,"(100000, 10000)","(10000, 1000)"
Count,100 Tasks,100 Chunks
Type,float64,cupy.ndarray


Unnamed: 0,Array,Chunk
Bytes,8.00 GB,80.00 MB
Shape,"(100000, 10000)","(10000, 1000)"
Count,100 Tasks,100 Chunks
Type,float64,cupy.ndarray
"Array Chunk Bytes 8.00 GB 80.00 MB Shape (100000, 10000) (10000, 1000) Count 100 Tasks 100 Chunks Type float64 cupy.ndarray",10000  100000,

Unnamed: 0,Array,Chunk
Bytes,8.00 GB,80.00 MB
Shape,"(100000, 10000)","(10000, 1000)"
Count,100 Tasks,100 Chunks
Type,float64,cupy.ndarray


Unnamed: 0,Array,Chunk
Bytes,8.00 GB,80.00 MB
Shape,"(100000, 10000)","(10000, 1000)"
Count,300 Tasks,100 Chunks
Type,float64,cupy.ndarray
"Array Chunk Bytes 8.00 GB 80.00 MB Shape (100000, 10000) (10000, 1000) Count 300 Tasks 100 Chunks Type float64 cupy.ndarray",10000  100000,

Unnamed: 0,Array,Chunk
Bytes,8.00 GB,80.00 MB
Shape,"(100000, 10000)","(10000, 1000)"
Count,300 Tasks,100 Chunks
Type,float64,cupy.ndarray


CPU times: user 2.08 s, sys: 145 ms, total: 2.22 s
Wall time: 31.1 s


array(0.66667109)

In [9]:
rs = da.random.RandomState()
x0 = rs.uniform(size=(int(1e4), int(1e4)), chunks=(int(1e4), int(1e3)))
x1 = rs.uniform(size=(int(1e4), int(1e4)), chunks=(int(1e4), int(1e3)))
display(x0, x1)

a = x0 + x1
display(a)

%time a.mean().compute()

Unnamed: 0,Array,Chunk
Bytes,800.00 MB,80.00 MB
Shape,"(10000, 10000)","(10000, 1000)"
Count,10 Tasks,10 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 800.00 MB 80.00 MB Shape (10000, 10000) (10000, 1000) Count 10 Tasks 10 Chunks Type float64 numpy.ndarray",10000  10000,

Unnamed: 0,Array,Chunk
Bytes,800.00 MB,80.00 MB
Shape,"(10000, 10000)","(10000, 1000)"
Count,10 Tasks,10 Chunks
Type,float64,numpy.ndarray


Unnamed: 0,Array,Chunk
Bytes,800.00 MB,80.00 MB
Shape,"(10000, 10000)","(10000, 1000)"
Count,10 Tasks,10 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 800.00 MB 80.00 MB Shape (10000, 10000) (10000, 1000) Count 10 Tasks 10 Chunks Type float64 numpy.ndarray",10000  10000,

Unnamed: 0,Array,Chunk
Bytes,800.00 MB,80.00 MB
Shape,"(10000, 10000)","(10000, 1000)"
Count,10 Tasks,10 Chunks
Type,float64,numpy.ndarray


Unnamed: 0,Array,Chunk
Bytes,800.00 MB,80.00 MB
Shape,"(10000, 10000)","(10000, 1000)"
Count,30 Tasks,10 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 800.00 MB 80.00 MB Shape (10000, 10000) (10000, 1000) Count 30 Tasks 10 Chunks Type float64 numpy.ndarray",10000  10000,

Unnamed: 0,Array,Chunk
Bytes,800.00 MB,80.00 MB
Shape,"(10000, 10000)","(10000, 1000)"
Count,30 Tasks,10 Chunks
Type,float64,numpy.ndarray


CPU times: user 222 ms, sys: 30 ms, total: 252 ms
Wall time: 2.11 s


0.999995846866216

In [19]:
rs = da.random.RandomState()
x0 = rs.uniform(size=int(1e4), chunks=1e4)
x1 = rs.uniform(size=int(1e4), chunks=1e4)
display(x0, x1)

a = x0 < x1[:, None]
display(a)

# %time (x + 1)[::2, ::2].sum().compute(scheduler='threads')
%time a.mean().compute()

Unnamed: 0,Array,Chunk
Bytes,80.00 kB,80.00 kB
Shape,"(10000,)","(10000,)"
Count,1 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 80.00 kB 80.00 kB Shape (10000,) (10000,) Count 1 Tasks 1 Chunks Type float64 numpy.ndarray",10000  1,

Unnamed: 0,Array,Chunk
Bytes,80.00 kB,80.00 kB
Shape,"(10000,)","(10000,)"
Count,1 Tasks,1 Chunks
Type,float64,numpy.ndarray


Unnamed: 0,Array,Chunk
Bytes,80.00 kB,80.00 kB
Shape,"(10000,)","(10000,)"
Count,1 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 80.00 kB 80.00 kB Shape (10000,) (10000,) Count 1 Tasks 1 Chunks Type float64 numpy.ndarray",10000  1,

Unnamed: 0,Array,Chunk
Bytes,80.00 kB,80.00 kB
Shape,"(10000,)","(10000,)"
Count,1 Tasks,1 Chunks
Type,float64,numpy.ndarray


Unnamed: 0,Array,Chunk
Bytes,100.00 MB,100.00 MB
Shape,"(10000, 10000)","(10000, 10000)"
Count,4 Tasks,1 Chunks
Type,bool,numpy.ndarray
"Array Chunk Bytes 100.00 MB 100.00 MB Shape (10000, 10000) (10000, 10000) Count 4 Tasks 1 Chunks Type bool numpy.ndarray",10000  10000,

Unnamed: 0,Array,Chunk
Bytes,100.00 MB,100.00 MB
Shape,"(10000, 10000)","(10000, 10000)"
Count,4 Tasks,1 Chunks
Type,bool,numpy.ndarray


CPU times: user 46.1 ms, sys: 4.82 ms, total: 51 ms
Wall time: 237 ms


0.50351362

In [5]:
res = x.mean(axis=2)
display(res)
# %time res.compute()