In [28]:
import pandas as pd
import numpy as np
import cudf
import cupy as cp

## Problem 1: cudf dataframe index-based lookup much slower than on CPU

#### 1) Create the lookup table with an array of indices where to lookup into

In [26]:
# List of relative 2D coordinates for 8-neighbour connectiviy (9-element list). 1st one is the origin pixel.
coords_8nb = np.array([[0, 0], [-1, 0], [-1, -1], [0, -1], [1, -1], [1, 0], [1, 1], [0, 1], [-1, 1]])
# Array of 2D coordinates for a 4096 x 4096 array. Matrix convention is kept. [rows, cols] = [y-axis, x-axis]
ny, nx = [4096, 4096]
coords_1d = np.arange(nx * ny)
coordy, coordx = np.unravel_index(coords_1d, [ny, nx]) # also possible by raveling a meshgrid() output
coords2d = np.array([coordy, coordx])
# Create the array of 2D coordinates of 8-neighbours associated with each pixel.
# pixel 0 has 8 neighbour + itself, pixel 1 has 8 neighbour + itself, etc...
coords2d_8nb = coords2d[np.newaxis, ...] + coords_8nb[..., np.newaxis]
# Handle off-edges coordinates by clipping to the edges, operation done in-place. Here, square detector assumed. Update
# to per-axis clipping if that ever changes for another instrument.
np.clip(coords2d_8nb, 0, nx-1, out=coords2d_8nb)
# Convert to 1D coordinates - This is the lookup table
index_8nb = np.array([coords2d_8nb[i, 0, :] * nx + coords2d_8nb[i, 1, :] for i in range(len(coords_8nb))],
                     dtype='int32', order='C').T
print('Table size: ', index_8nb.shape)
# Convert into a pandas dataframe
pd_df = pd.DataFrame(index_8nb)
# Convert to cudf Dataframe.
# there does not seem possible to create directly from numpy arrays. Yet it is possible with cudf Series...
gdf8nb = cudf.from_pandas(pd_df)
# Random array of indices to look into the table (this emulates our actual data)
idx = np.random.randint(low=1, high=4095*4095, size=10000)
print('lookup index list of size: ', len(idx))
# Convert to a cudf Series
sidx = cudf.Series(idx)

Table size:  (16777216, 9)
lookup index list of size:  10000


#### 2) Profiling cpu and gpu

In [11]:
# Timing the lookup on cpu with numpy: 192 us +/- 10.1us
%timeit coords = index_8nb[idx, :]

192 µs ± 10.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [18]:
# Timing the lookup on cpu using the Pandas dataframe
%timeit coords = pd_df.loc[idx]

999 µs ± 23.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [22]:
# Timing the lookup  using Cudf Dataframe and numpy array as list of incides ~ 553 ms
%timeit gcoords = gdf8nb.loc[idx]

553 ms ± 7.75 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [31]:
# Timing the lookup  using Cudf Dataframe and Cudf Series as list of indices ~ 589 ms
%timeit gcoords = gdf8nb.loc[sidx]

558 ms ± 14.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


#### Cudf dataframes are several orders of magnitude slower to look into for table size of order 10^6 rows (here, 9 columns) and lookup list of 10^4 elements. 

## Problem 2: Cupy isin() not memory optimized 
#### also, installing latest cudf version downgrades Cupy from 0.7.x to 0.6.x version where neither isin() or in1d() exist. 

In [29]:
g1 = cp.random.randint(1, high=4096*4096, size=75000)
g2 = cp.random.randint(1, high=4096*4096, size=300000)

In [32]:
g3 = cp.in1d(g1, g2)

AttributeError: module 'cupy' has no attribute 'in1d'