In [17]:
import os
import pandas as pd
import numpy as np
import fitsio
from pathlib import Path, PurePath

In [58]:
def create_lookup_8nb(nx, ny):
    """ Pre-compute the 8-connectivity lookup table. This will be shared across parallel workers.
    :param nx:
    :param ny:
    :return:
    """
    # List of relative 2D coordinates for 8-neighbour connectiviy (9-element list). 1st one is the origin pixel.
    coords_8nb = np.array([[0, 0], [-1, 0], [-1, -1], [0, -1], [1, -1], [1, 0], [1, 1], [0, 1], [-1, 1]])
    # Array of 2D coordinates for a 4096 x 4096 array. Matrix convention is kept. [rows, cols] = [y-axis, x-axis]
    coords_1d = np.arange(nx * ny)
    coordy, coordx = np.unravel_index(coords_1d, [ny, nx]) # also possible by raveling a meshgrid() output
    coords2d = np.array([coordy, coordx])
    # Create the array of 2D coordinates of 8-neighbours associated with each pixel.
    # pixel 0 has 8 neighbour + itself, pixel 1 has 8 neighbour + itself, etc...
    coords2d_8nb = coords2d[np.newaxis, ...] + coords_8nb[..., np.newaxis]
    # Handle off-edges coordinates by clipping to the edges, operation done in-place. Here, square detector assumed.
    # to per-axis clipping if that ever changes for another instrument.
    np.clip(coords2d_8nb, 0, nx-1, out=coords2d_8nb)
    # Convert to 1D coordinates.
    lookup_coords = np.array([coords2d_8nb[i, 0, :] * nx + coords2d_8nb[i, 1, :] for i in range(len(coords_8nb))],
                         dtype='int32', order='C').T
    return lookup_coords


def extract_coincidentals(spikes_list, idx):
    # Spikes coordinates at given wavelength index
    spikes_w = spikes_list[idx]
    # Associated neighbour coordinates
    nb_pixels = index_8nb[spikes_w[0, :], :]
    # Cross-referencing the spikes coordinates of all wavelengths (needles) with the neighbour array (haystack) at wavelength idx
    isin_arr = np.array([ np.isin(nb_pixels[:, int(i==idx):], spikes[0, :]).any(axis=1) for i, spikes in enumerate(spikes_list)])
    select_pixels = isin_arr.any(axis=0)
    isin_table = np.insert(isin_arr[:, select_pixels], idx, True, axis=0)
    # Get coordinates and intensity values for the selected coincidental pixels, alongside the "truth table".
    arr_w = np.concatenate([spikes_w[:, select_pixels], isin_table], axis=0)
    arr_w = np.insert(arr_w, 3, idx, axis=0)
    return arr_w


SPIKESDATA = os.environ['SPIKESDATA']
column_names = ['coords', 'int1', 'int2', 'wref', 'w1', 'w2', 'w3', 'w4', 'w4', 'w5', 'w6', 'w7']

In [3]:
index_8nb = create_lookup_8nb(4096, 4096)

In [4]:
spikes_df = pd.read_parquet(os.path.join(os.environ['SPIKESDATA'], 'spikes_df_2010.parquet'),
                                engine='pyarrow')

In [5]:
spikes_df2 = spikes_df.set_index(['GroupNumber', 'Time'])
path_Series = spikes_df2['Path']

In [6]:
tintervals = pd.interval_range(start=pd.Timestamp('2010-05-13 00:00:00', tz='UTC'),
                                   end=pd.Timestamp('2010-05-16 00:00:00', tz='UTC'),
                                   freq='D', closed='left')

tint = tintervals[0]

In [60]:
groups = spikes_df['GroupNumber'].loc[(spikes_df['Time'] >= tint.left) & (spikes_df['Time'] < tint.right)].unique()
group_n = groups[0]

In [69]:
fpaths = path_Series.loc[groups[0]]
spikes_list = [fitsio.read(PurePath(SPIKESDATA, f).as_posix()) for f in fpaths]
for spikes in spikes_list:
    print(spikes.shape)

(3, 8486)
(3, 30356)
(3, 36549)
(3, 7993)
(3, 13781)
(3, 26443)
(3, 27576)


In [48]:
%time r = extract_coincidentals(spikes_list, 0)

CPU times: user 27 ms, sys: 3.18 ms, total: 30.2 ms
Wall time: 29.4 ms


In [49]:
%timeit r = extract_coincidentals(spikes_list, 0)

22.8 ms ± 250 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [53]:
%time group_data = np.concatenate([ extract_coincidentals(spikes_list, i) for i in range(7) ], axis=1)

CPU times: user 489 ms, sys: 0 ns, total: 489 ms
Wall time: 488 ms


In [68]:
%timeit group_data = np.concatenate([ extract_coincidentals(spikes_list, i) for i in range(7) ], axis=1)

454 ms ± 1.77 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [64]:
%%timeit
group_data = np.concatenate([ extract_coincidentals(spikes_list, i) for i in range(7) ], axis=1)
coincidental_spikes_df = pd.DataFrame(group_data.T, columns=column_names)
coincidental_spikes_df['GroupNumber'] = group_n

461 ms ± 1.32 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
