In [1]:
# Import cuPy, cuDF, dask_cuDF to load and read the file(s)
import cupy as cp
import pandas as pd
import cudf
import dask_cudf

In [2]:
cp.random.seed(12)

In [17]:
df = cudf.read_csv('model_data/housing.csv', index=False)

In [18]:
%time
df.head()

CPU times: user 5 µs, sys: 0 ns, total: 5 µs
Wall time: 11.4 µs


Unnamed: 0,RM,LSTAT,PTRATIO,MEDV
0,6.575,4.98,15.3,504000.0
1,6.421,9.14,17.8,453600.0
2,7.185,4.03,17.8,728700.0
3,6.998,2.94,18.7,701400.0
4,7.147,5.33,18.7,760200.0


In [19]:
%time
ddf = dask_cudf.from_cudf(df, npartitions=2)
ddf.compute()

CPU times: user 5 µs, sys: 1 µs, total: 6 µs
Wall time: 10.7 µs


Unnamed: 0,RM,LSTAT,PTRATIO,MEDV
0,6.575,4.98,15.3,504000.0
1,6.421,9.14,17.8,453600.0
2,7.185,4.03,17.8,728700.0
3,6.998,2.94,18.7,701400.0
4,7.147,5.33,18.7,760200.0
...,...,...,...,...
484,6.593,9.67,21.0,470400.0
485,6.120,9.08,21.0,432600.0
486,6.976,5.64,21.0,501900.0
487,6.794,6.48,21.0,462000.0


In [20]:
%time
ddf.compute().to_arrow()

CPU times: user 5 µs, sys: 1 µs, total: 6 µs
Wall time: 11.2 µs


pyarrow.Table
RM: double
LSTAT: double
PTRATIO: double
MEDV: double
index: int64

In [22]:
ddf.to_parquet('model_data/housing')

In [23]:
ddf = dask_cudf.read_parquet('model_data/housing/*')

In [24]:
ddf

Unnamed: 0_level_0,RM,LSTAT,PTRATIO,MEDV
npartitions=2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,float64,float64,float64,float64
245,...,...,...,...
488,...,...,...,...


In [25]:
import time

from dask.distributed import Client, wait
from dask_cuda import LocalCUDACluster

In [26]:
cluster = LocalCUDACluster()
client = Client(cluster)
client

0,1
Client  Scheduler: tcp://127.0.0.1:33419  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 1  Cores: 1  Memory: 16.62 GB


In [27]:
nrows = 10000000

df2 = cudf.DataFrame({'a': cp.arange(nrows), 'b': cp.arange(nrows)})
ddf2 = dask_cudf.from_cudf(df2, npartitions=5)
ddf2['c'] = ddf2['a'] + 5
ddf2

Unnamed: 0_level_0,a,b,c
npartitions=5,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,int64,int64,int64
2000000,...,...,...
...,...,...,...
8000000,...,...,...
9999999,...,...,...


In [28]:
!nvidia-smi

Mon Jan 25 19:09:44 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  GeForce RTX 207...  Off  | 00000000:01:00.0  On |                  N/A |
| N/A   60C    P0    30W /  N/A |    989MiB /  7982MiB |      6%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [29]:
ddf2 = ddf2.persist()
ddf2

Unnamed: 0_level_0,a,b,c
npartitions=5,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,int64,int64,int64
2000000,...,...,...
...,...,...,...
8000000,...,...,...
9999999,...,...,...


In [30]:
!nvidia-smi

Mon Jan 25 19:10:20 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  GeForce RTX 207...  Off  | 00000000:01:00.0  On |                  N/A |
| N/A   61C    P0    30W /  N/A |   1813MiB /  7982MiB |      5%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

