In [None]:
import os
import pandas as pd
import numpy as np
import fitsio
from pathlib import Path, PurePath
import cudf

In [None]:
def create_lookup_8nb(nx, ny):
    """ Pre-compute the 8-connectivity lookup table. This will be shared across parallel workers.
    :param nx:
    :param ny:
    :return:
    """
    # List of relative 2D coordinates for 8-neighbour connectiviy (9-element list). 1st one is the origin pixel.
    coords_8nb = np.array([[0, 0], [-1, 0], [-1, -1], [0, -1], [1, -1], [1, 0], [1, 1], [0, 1], [-1, 1]])
    # Array of 2D coordinates for a 4096 x 4096 array. Matrix convention is kept. [rows, cols] = [y-axis, x-axis]
    coords_1d = np.arange(nx * ny)
    coordy, coordx = np.unravel_index(coords_1d, [ny, nx]) # also possible by raveling a meshgrid() output
    coords2d = np.array([coordy, coordx])
    # Create the array of 2D coordinates of 8-neighbours associated with each pixel.
    # pixel 0 has 8 neighbour + itself, pixel 1 has 8 neighbour + itself, etc...
    coords2d_8nb = coords2d[np.newaxis, ...] + coords_8nb[..., np.newaxis]
    # Handle off-edges coordinates by clipping to the edges, operation done in-place. Here, square detector assumed.
    # to per-axis clipping if that ever changes for another instrument.
    np.clip(coords2d_8nb, 0, nx-1, out=coords2d_8nb)
    # Convert to 1D coordinates.
    lookup_coords = np.array([coords2d_8nb[i, 0, :] * nx + coords2d_8nb[i, 1, :] for i in range(len(coords_8nb))],
                         dtype='int32', order='C').T
    return lookup_coords


def extract_coincidentals(spikes_list, idx):
    # Spikes coordinates at given wavelength index
    spikes_w = spikes_list[idx]
    # Associated neighbour coordinates
    nb_pixels = index_8nb[spikes_w[0, :], :]
    # Sublist of spikes data that will excludes the one serving as template
    spikes_sublist = spikes_list[:idx] + spikes_list[idx + 1:]
    # Coincidental cross-referencing.
    # mask_w_arr = np.array([np.isin(nb_pixels, index_8nb[spikes[0, :], :]).any(axis=1) for spikes in spikes_sublist])
    mask_w_arr = np.array([np.isin(nb_pixels, spikes[0, :]).any(axis=1) for spikes in spikes_sublist])
    select_pixels = mask_w_arr.any(axis=0)
    coords_w = spikes_w[0, select_pixels]
    w_tables = np.insert(mask_w_arr[:, select_pixels], idx, True, axis=0)
    # Retrieve intensity values for the selected coordinates
    intensities = spikes_w[1:, select_pixels]
    arr_w = np.concatenate([coords_w[np.newaxis, ...], intensities, w_tables], axis=0)
    arr_w = np.insert(arr_w, 3, idx, axis=0)

    return arr_w


def extract_coincidentals_gpu(spikes_list, idx):
    # Spikes coordinates at given wavelength index
    spikes_w = spikes_list[idx]
    # Associated neighbour coordinates
    nb_pixels = index_8nb[spikes_w[0, :], :]
    # Sublist of spikes data that will excludes the one serving as template
    spikes_sublist = spikes_list[:idx] + spikes_list[idx + 1:]
    # Coincidental cross-referencing.
    # mask_w_arr = np.array([np.isin(nb_pixels, index_8nb[spikes[0, :], :]).any(axis=1) for spikes in spikes_sublist])
    mask_w_arr = np.array([np.isin(nb_pixels, spikes[0, :]).any(axis=1) for spikes in spikes_sublist])
    select_pixels = mask_w_arr.any(axis=0)
    coords_w = spikes_w[0, select_pixels]
    w_tables = np.insert(mask_w_arr[:, select_pixels], idx, True, axis=0)
    # Retrieve intensity values for the selected coordinates
    intensities = spikes_w[1:, select_pixels]
    arr_w = np.concatenate([coords_w[np.newaxis, ...], intensities, w_tables], axis=0)
    arr_w = np.insert(arr_w, 3, idx, axis=0)

    return arr_w


def process_group(group_n):
    fpaths = path_Series.loc[group_n]
    spikes_list = [fitsio.read(os.path.join(os.environ['SPIKESDATA'], f)) for f in fpaths]
    group_data = np.concatenate([extract_coincidentals(spikes_list, i) for i in range(7)], axis=1)
    column_names = ['coords', 'int1', 'int2', 'wref', 'w0', 'w1', 'w2', 'w3', 'w4', 'w5', 'w6']
    coincidental_spikes_df = pd.DataFrame(group_data.T, columns=column_names)
    coincidental_spikes_df['GroupNumber'] = group_n
    return coincidental_spikes_df


def process_group_gpu(group_n):
    fpaths = path_Series.loc[group_n]
    spikes_list = [fitsio.read(os.path.join(os.environ['SPIKESDATA'], f)) for f in fpaths]
    group_data = np.concatenate([extract_coincidentals(spikes_list, i) for i in range(7)], axis=1)
    column_names = ['coords', 'int1', 'int2', 'wref', 'w0', 'w1', 'w2', 'w3', 'w4', 'w5', 'w6']
    coincidental_spikes_df = pd.DataFrame(group_data.T, columns=column_names)
    coincidental_spikes_df['GroupNumber'] = group_n
    return coincidental_spikes_df

In [None]:
index_8nb = create_lookup_8nb(4096, 4096)

In [None]:
spikes_df = pd.read_parquet(os.path.join(os.environ['SPIKESDATA'], 'spikes_df_2010.parquet'),
                                engine='pyarrow')

In [None]:
spikes_df2 = spikes_df.set_index(['GroupNumber', 'Time'])
path_Series = spikes_df2['Path']

In [None]:
tintervals = pd.interval_range(start=pd.Timestamp('2010-06-13 00:00:00', tz='UTC'),
                                   end=pd.Timestamp('2010-06-16 00:00:00', tz='UTC'),
                                   freq='D', closed='left')

tint = tintervals[0]

In [None]:
groups = spikes_df['GroupNumber'].loc[(spikes_df['Time'] >= tint.left) & (spikes_df['Time'] < tint.right)].unique()
groups

In [None]:
%timeit group_df = process_group(groups[0])

### Prototyping RAPIDS/CUDF-centric equivalent

In [None]:
group_n = groups[0]
fpaths = path_Series.loc[group_n]
spikes_list = [fitsio.read(os.path.join(os.environ['SPIKESDATA'], f)) for f in fpaths]

In [None]:
%timeit spikes_list = [fitsio.read(os.path.join(os.environ['SPIKESDATA'], f)) for f in fpaths]

In [None]:
# Send the CCD 8-connectivity lookup to a CUDF dataframe. Would be more straightforward to have a direct Numpy array -> CUDF Dataframe. CUDF Series has it. 
pd_temp = pd.DataFrame(index_8nb)
cuindex_8nb = cudf.DataFrame.from_pandas(pd_temp)
pd_temp.head()

In [None]:
# Make list of CUDF Series containing only the coordinates in each file, making 7 CUDF Series
cuseries_ = [cudf.Series(spikes[0,:]) for spikes in spikes_list]
cuseries_[0].head()

In [None]:
# Wavelength index. Start with the 1st one 
idx = 0

In [None]:
# Coordinates of the template wavelength. The template wavelength is the one that is compared against the 6 others for 8-connectivity spikes coordinates
spikes_w = cuseries_[idx]
# Sublist of spikes data that will excludes the one serving as template (we might need to include the template too for same-wavelength 8-connectivity)
spikes_sublist = cuseries_[:idx] + cuseries_[idx + 1:]

In [None]:
# Associated neighbour coordinates - Need to happen in GPU as well. 
nb_pixels = cuindex_8nb.loc[spikes_w]
nb_pixels.head()

In [None]:
nb_pixels_0 = nb_pixels[0]
print(len(nb_pixels_0))
nb_pixels_0.head()

In [None]:
# Coincidental spikes 8-connectivity search.
%timeit mask_w01 = nb_pixels_0.isin(cuseries_[1])

In [None]:
arr0 = nb_pixels_0.to_array()
arr01 = cuseries_[1].to_array()
print(arr0.shape, arr01.shape)

In [None]:
%timeit np_mask = np.isin(arr0, arr01)

In [None]:
size1 = 1000
size2 = 1500
a = np.random.randint(0, high=4096, size=size1)
b = np.random.randint(0, high=4096, size=size2)
ca = cudf.Series(a)
cb = cudf.Series(b)

In [None]:
%timeit ab = np.isin(a, b)

In [None]:
%timeit cab = ca.isin(cb)

### Trying with apply_rows()

In [None]:
def kernel(in0, in1, in2, in3, in4, in5, in6, in7, in8, out1, kwarg1):
    for i, (nb0, nb1, nb2, nb3, nb4, nb5, nb6, nb7, nb8) in enumerate(zip(in0, in1, in2, in3, in4, in5, in6, in7, in8)):
        out1[i] = # huh?.... 
    