In [None]:
import os
import pandas as pd
import numpy as np
import fitsio
from pathlib import Path, PurePath
import cupy as cp
from IPython.display import display
print(cp.__version__)

In [None]:
def create_lookup_8nb(nx, ny):
    """ Pre-compute the 8-connectivity lookup table. This will be shared across parallel workers.
    :param nx: number of columns in image array (number of pixels on horizontal axis)
    :param ny: number of rows in image array (number of pixels on vertical axis)
    :return:
    """
    # List of relative 2D coordinates for 8-neighbour connectivity, including origin pixel.
    coords_8nb = np.array([[0, 0], [-1, 0], [-1, -1], [0, -1], [1, -1], [1, 0], [1, 1], [0, 1], [-1, 1]])
    # Array of 2D coordinates for a 4096 x 4096 array. Matrix convention is kept. [rows, cols] = [y-axis, x-axis]
    coords_1d = np.arange(nx * ny)
    coordy, coordx = np.unravel_index(coords_1d, [ny, nx]) # also possible by raveling a meshgrid() output
    coords2d = np.array([coordy, coordx])
    # Create the array of 2D coordinates of 8-neighbours associated with each pixel.
    # pixel 0 has 8 neighbour + itself, pixel 1 has 8 neighbour + itself, etc...
    coords2d_8nb = coords2d[np.newaxis, ...] + coords_8nb[..., np.newaxis]
    # Handle off-edges coordinates by clipping to the edges, operation done in-place. Here, square detector assumed.
    # to per-axis clipping if that ever changes for another instrument.
    np.clip(coords2d_8nb, 0, nx-1, out=coords2d_8nb)
    # Convert to 1D coordinates.
    lookup_coords = np.array([coords2d_8nb[i, 0, :] * nx + coords2d_8nb[i, 1, :] for i in range(len(coords_8nb))],
                         dtype='int32', order='C').T
    return lookup_coords

kernel = cp.ElementwiseKernel('T num,  T x,  raw T y', 'bool z',
'''int t = 0; 
z = 0;
#pragma unroll
for(t = 0; t < num; t++) z = z || (x == y[t]);''',
'my_kernel')

def extract_coincidentals_GPU(kernel, spikes_list, idx):
    # Spikes coordinates at a given wavelength (starting with 1st one, i.e, index 0)
    spikes = cuarrays_[idx]
    # Haystack variable: spikes coordinates at one wavelength with the coordinates of their 8 nearest neighbours. 
    # Haystack is assigned True wherever elements of needles_pixels are found. 
    haystack_pixels = cuindex_8nb[spikes, :]
    # output of needle - haystack search has dimensions: [7-wave, nb of pixels]. e.g [7, 8486]
    bool_H0_all = cp.array([cp.column_stack([kernel(n_needles, haystack_pixels[:, j], needles) for j in range(9)]).any(axis=1) for needles in cuarrays_])
    # Get which have at least 1 neighbour and copy them in another array. 
    bool_H0 = bool_H0_all.any(axis=0)
    #coords_w0 = cuarrays_[0][bool_H0]
    # Connectivity table
    w_tables = bool_H0_all[:, bool_H0]
    # Account for same-wavelength connectivity
    w_tables[idx, :] = w_tables[idx, :] + 1
    # Back to host
    #coords_w0_cpu = cp.asnumpy(coords_w0)
    w_tables_cpu = cp.asnumpy(w_tables)
    bool_H0_cpu = cp.asnumpy(bool_H0)
    coords_intensities = spikes_list[idx][:, bool_H0_cpu]
    arr_w = np.concatenate([coords_intensities, w_tables_cpu], axis=0)
    arr_w = np.insert(arr_w, 3, idx, axis=0)
    
    return arr_w


def process_group_GPU(kernel, spikes_list):
    
    cuarrays_ = [cp.asarray(spikes[0,:]) for spikes in spikes_list]
#     tables_j = []
#     bool_j = []
    searches_group = []
    for idx in range(7):
        # Haystack variable: spikes coordinates at one wavelength with the coordinates of their 8 nearest neighbours. 
        # Haystack is assigned True wherever elements of needles_pixels are found. 
        haystack = cuindex_8nb[cuarrays_[idx], :]
        # output of needle - haystack search has dimensions: [7-wave, nb of pixels]. e.g [7, 8486]
        searches = [kernel(len(needles), haystack.ravel(), needles).reshape(haystack.shape) for needles in cuarrays_]
        searches_group.append(searches)
        
    bool_Hall_Wall = [cp.array([cp.column_stack([searches_group[idx][i][j] for j in range(9)]).any(axis=1) for i in range(7)]) for idx in range(7)]
    select_pixels_all = [bool_H.any(axis=0) for bool_H in bool_Hall_Wall]
#     tables_Wall = [bool_Hall_Wall[i][:, select_pixels_all[i]] for i in range(7)]

#     return select_pixels_all, tables_Wall
    return bool_Hall_Wall, select_pixels_all

### Load the dataframes of file paths and their timestamp. 

In [None]:
spikes_df = pd.read_parquet(os.path.join(os.environ['SPIKESDATA'], 'spikes_df_2010.parquet'), engine='pyarrow')
spikes_df.set_index(['GroupNumber'], inplace=True)
path_Series = spikes_df['Path']

In [None]:
tintervals = pd.interval_range(start=pd.Timestamp('2010-05-13 00:00:00', tz='UTC'),
                                   end=pd.Timestamp('2010-05-16 00:00:00', tz='UTC'),
                                   freq='D', closed='left')

### Get the file paths of the 1st group in that time interval. There are 7 files per group

In [None]:
tint = tintervals[0]
groups = spikes_df.loc[(spikes_df['Time'] >= tint.left) & (spikes_df['Time'] < tint.right)].index.unique()
group_n = groups[0]
fpaths = path_Series[group_n]
fpaths

### load these fits files in RAM and send to GPU

In [None]:
# %%timeit 3.99 ms ± 257 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
spikes_list = [fitsio.read(os.path.join(os.environ['SPIKESDATA'], f)) for f in fpaths]
for spikes in spikes_list:
    print(len(spikes[0,:]))
# Note: the size of the series of coordinates varies between files. They are independent measurements of "spikes" in each CCD <-> wavelength

In [None]:
# To GPU: list of CUDF Series containing only the coordinates from the data loaded in each file
# 7 CUDF Series cooresponding to the spikes coordinates measured in the 7 wavelengths (wav0, wav1, ... wav6)
cuarrays_ = [cp.asarray(spikes[0,:]) for spikes in spikes_list]

## Create lookup table and send to GPU

In [None]:
index_8nb = create_lookup_8nb(4096, 4096)
index_8nb_cont = np.ascontiguousarray(index_8nb)
cuindex_8nb = cp.asarray(index_8nb)

## Define the "needles and the haystack" 

In [None]:
idx = 0

In [None]:
# Spikes coordinates at a given wavelength (starting with 1st one, i.e, index 0)
spikes = cuarrays_[idx]
# needles: series of spikes coordinates in another wavelength from which occurences will be searched in the above haystack
needles = cuarrays_[1]
# Haystack variable: spikes coordinates at one wavelength with the coordinates of their 8 nearest neighbours. 
# Haystack is assigned True wherever elements of needles_pixels are found. 
haystack = cuindex_8nb[spikes, :]
# Show the needles and haystack
display(haystack[0:5,:])
display(needles[0:20])
n_needles = len(needles)
haystack.shape

In [None]:
kernel = cp.ElementwiseKernel('T num,  T x,  raw T y', 'bool z',
'''int t = 0; 
z = 0;
#pragma unroll
for(t = 0; t < num; t++) z = z || (x == y[t]);''',
'my_kernel')

In [None]:
kernel2 = cp.ElementwiseKernel('T num,  T x,  raw T y', 'bool z',
'z = x == y[i];',
'my_kernel')

In [None]:
searches = cp.column_stack([kernel(n_needles, haystack[:, j], needles) for j in range(9)]).any(axis=1)
cp.cuda.runtime.deviceSynchronize()

In [None]:
searches = kernel(n_needles, haystack.ravel(), needles)
searches2d = searches.reshape(haystack.shape).any(axis=1)
cp.cuda.runtime.deviceSynchronize()

In [None]:
%%timeit
searches = cp.column_stack([kernel(n_needles, haystack[:, j], needles) for j in range(9)]).any(axis=1)
cp.cuda.runtime.deviceSynchronize()

In [None]:
%%timeit
searches = kernel(n_needles, haystack.ravel(), needles)
searches2d = searches.reshape(haystack.shape).any(axis=1)
cp.cuda.runtime.deviceSynchronize()

In [None]:
temp1, temp2 = process_group_GPU(kernel, spikes_list)

In [None]:
temp1, temp2 = process_group_GPU(kernel, spikes_list)
cp.cuda.runtime.deviceSynchronize()

In [None]:
%%time 
#temp1, temp2 = process_group_GPU(kernel, spikes_list)
temp1_cpu = [t.get() for t in temp1]
temp2_cpu = [t.get() for t in temp2]

In [None]:
temp = process_group_GPU(kernel, spikes_list)

In [None]:
%time temp = process_group_GPU(kernel, spikes_list)

In [None]:
# output of needle - haystack search has dimensions: [7-wave, nb of pixels]. e.g [7, 8486]
bool_H0_all = cp.array([cp.column_stack([kernel(n_needles, haystack_pixels[:, j], needles) for j in range(9)]).any(axis=1) for needles in cuarrays_])
# Get which have at least 1 neighbour and copy them in another array. 
bool_H0 = bool_H0_all.any(axis=0)
#coords_w0 = cuarrays_[0][bool_H0]
# Connectivity table
w_tables = bool_H0_all[:, bool_H0]
# Account for same-wavelength connectivity
w_tables[idx, :] = w_tables[idx, :] + 1
# Back to host
#coords_w0_cpu = cp.asnumpy(coords_w0)
w_tables_cpu = cp.asnumpy(w_tables)
bool_H0_cpu = cp.asnumpy(bool_H0)
coords_intensities = spikes_list[idx][:, bool_H0_cpu]
arr_w = np.concatenate([coords_intensities, w_tables_cpu], axis=0)
arr_w = np.insert(arr_w, 3, idx, axis=0)

In [None]:
%timeit arr_w = extract_coincidentals_GPU(kernel, spikes_list, 0)