In [44]:
import os
import pandas as pd
import numpy as np
import fitsio
import cudf
from numba import cuda
import cupy as cp

In [2]:
def extract_coincidentals(spikes_list, idx):
    
    # Spikes coordinates at given wavelength index
    spikes_w = spikes_list[idx]
    # Associated neighbour coordinates
    nb_pixels = index_8nb[spikes_w[0, :], :]
    # Sublist of spikes data that will excludes the one serving as template
    spikes_sublist = spikes_list[:idx]+spikes_list[idx+1:]
    # Coincidental cross-referencing. 
    mask_w_arr = np.array([np.isin(nb_pixels, index_8nb[spikes[0,:], :]).any(axis=1) for spikes in spikes_sublist])
    select_pixels = mask_w_arr.any(axis=0)
    coords_w = spikes_w[0, select_pixels] 
    w_tables = np.insert(mask_w_arr[:, select_pixels], idx, True, axis=0)
    # Retrieve intensity values for the selected coordinates
    intensities = spikes_w[ 1:, select_pixels]
    arr_w = np.concatenate([coords_w[np.newaxis,...], intensities, w_tables], axis=0)
    arr_w = np.insert(arr_w, 3, idx, axis=0)
    
    return arr_w

In [3]:
data_dir = os.environ['SPIKESDATA']
spikes_db = pd.read_parquet(os.path.join(data_dir, 'spikes_df_2010.parquet'), engine='pyarrow')
spikes_db2 = spikes_db.set_index(['GroupNumber', 'Time'])

### Get the filepaths (typically 7) for a given group

In [4]:
################################################################################################
# Pre-compute the 8-connectivity lookup table. This will be shared across parallel workers.
################################################################################################
# List of relative 2D coordinates for 8-neighbour connectiviy (9-element list). 1st one is the origin pixel.
coords_8nb = np.array([[0, 0], [-1, 0], [-1, -1], [0, -1], [1, -1], [1, 0], [1, 1], [0, 1], [-1, 1]])
# Array of 2D coordinates for a 4096 x 4096 array. Matrix convention is kept. [rows, cols] = [y-axis, x-axis]
ny, nx = [4096, 4096]
coords_1d = np.arange(nx * ny)
coordy, coordx = np.unravel_index(coords_1d, [ny, nx]) # also possible by raveling a meshgrid() output
coords2d = np.array([coordy, coordx])
# Create the array of 2D coordinates of 8-neighbours associated with each pixel.
# pixel 0 has 8 neighbour + itself, pixel 1 has 8 neighbour + itself, etc...
coords2d_8nb = coords2d[np.newaxis, ...] + coords_8nb[..., np.newaxis]
# Handle off-edges coordinates by clipping to the edges, operation done in-place. Here, square detector assumed. Update
# to per-axis clipping if that ever changes for another instrument.
np.clip(coords2d_8nb, 0, nx-1, out=coords2d_8nb)
# Convert to 1D coordinates.
index_8nb = np.array([coords2d_8nb[i, 0, :] * nx + coords2d_8nb[i, 1, :] for i in range(len(coords_8nb))],
                     dtype='int32', order='C').T
index_8nb.shape

(16777216, 9)

In [5]:
n_co_spikes = 2

group_n = 0
fpaths = spikes_db2.loc[group_n]['Path'].values
spikes_list = [fitsio.read(os.path.join(data_dir, f)) for f in fpaths]
nspikes = 0
for spikes in spikes_list:
    print(spikes.shape)
    nspikes += spikes.shape[1]
print('\ntotal spikes = ', nspikes)

(3, 8486)
(3, 30356)
(3, 36549)
(3, 7993)
(3, 13781)
(3, 26443)
(3, 27576)

total spikes =  151184


In [6]:
column_names = ['coords' , 'int1', 'int2', 'wref', 'w0', 'w1', 'w2', 'w3', 'w4', 'w5', 'w6']

In [7]:
group_data = np.concatenate([extract_coincidentals(spikes_list, i) for i in range(7)], axis=1)
#u, idx, counts = np.unique(group_data[0, :], return_index=True, return_counts=True)
df0 = pd.DataFrame(group_data.T, columns=column_names)
#df0.head()

In [12]:
print(df0.head())
print(len(df0))

   coords  int1  int2  wref  w0  w1  w2  w3  w4  w5  w6
0   18917   122    11     0   1   1   0   0   0   0   0
1   19192   124     7     0   1   0   0   1   0   0   0
2   23013    75    10     0   1   1   0   0   0   0   0
3   23287   157     9     0   1   0   0   1   0   0   0
4   27109    38     9     0   1   1   0   0   0   0   0
16339


### Design method to extract unique coincidental events, lifting any ambiguity (conjugates, redundancies, ...)

In [23]:
df0.loc[(df0.w0==1) & (df0.w1==1)].head()

Unnamed: 0,coords,int1,int2,wref,w0,w1,w2,w3,w4,w5,w6
0,18917,122,11,0,1,1,0,0,0,0,0
2,23013,75,10,0,1,1,0,0,0,0,0
4,27109,38,9,0,1,1,0,0,0,0,0
6,31475,111,8,0,1,1,0,0,0,0,0
36,272628,240,8,0,1,1,0,0,0,0,0


In [41]:
%timeit coords = df0.loc[(df0.w0==1) & (df0.w1==1)].coords.values

885 µs ± 2.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [50]:
arr0 = df0.values
arr0.shape

(16339, 11)

In [117]:
%%timeit
mask = (arr0[:, 4]==1) & (arr0[:, 5] == 1)
coords = arr0[mask, :]

72.6 µs ± 321 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [33]:
print(len(coords))
coords_x = coords % 4096
coords_y = coords // 4096
coords_xb = coords_x[:, np.newaxis]
coords_yb = coords_y[:, np.newaxis]

485


In [36]:
dx_broadc = coords_x - coords_xb
dy_broadc = coords_y - coords_yb
print(dx_broadc.shape)

(485, 485)


In [216]:
def get_2coincidentals(array, w1_idx, w2_idx):
    np_mask = (array[:, w1_idx] == 1) & (array[:, w2_idx] == 1)
    df_idx = np.nonzero(np_mask)[0]
    coords = arr0[np_mask, 0]
    coords_x = coords % 4096
    coords_y = coords // 4096
    coords_xb = coords_x[:, np.newaxis]
    coords_yb = coords_y[:, np.newaxis]
    dx_broadc = coords_x - coords_xb
    dy_broadc = coords_y - coords_yb
    dist_matrix = np.sqrt(dx_broadc**2 + dy_broadc**2)
    select = dist_matrix < 2 
    select2 = np.triu(select, k=1)
    r,c = np.nonzero(select2)
    idx1, idx2 = df_idx[r], df_idx[c]
    records = []
    for i1, i2 in zip(idx1, idx2):
        # build the record
        records.append(df0.loc[[i1, i2]][['coords', 'int1', 'int2', 'wref']].reset_index(drop=True))
    #records = [df0.loc[[df_idx[r[i]], df_idx[c[i]]]] for i in range(len(r))]
    return records

In [210]:
df0.loc[[0,1]][['coords', 'int1', 'int2', 'wref']].values

array([[18917,   122,    11,     0],
       [19192,   124,     7,     0]], dtype=int32)

In [231]:
records = get_2coincidentals(arr0, 4, 5)
# index = pd.MultiIndex.from_product([list(range(len(records))), [0,1]], names=['coincidental_id', 'pixel_id'])
records2 = pd.concat(records, keys=list(range(len(records))))
records2.head(15)

Unnamed: 0,Unnamed: 1,coords,int1,int2,wref
0,0,18917,122,11,0
0,1,23013,75,10,0
1,0,23013,75,10,0
1,1,27109,38,9,0
2,0,23013,75,10,0
2,1,27108,11,0,1
3,0,27109,38,9,0
3,1,27108,11,0,1
4,0,272628,240,8,0
4,1,272629,32,10,0


In [244]:
records2.index.unique(level=0)

Int64Index([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,
            ...
            466, 467, 468, 469, 470, 471, 472, 473, 474, 475],
           dtype='int64', length=476)

In [247]:
non_unique_idx = [i for i in records2.index.unique(level=0) if not records2.wref.loc[i].is_unique]
print(non_unique_idx)

[0, 1, 4, 9, 10, 11, 13, 14, 15, 16, 17, 19, 22, 23, 26, 30, 33, 35, 36, 39, 43, 44, 47, 49, 51, 52, 53, 65, 76, 77, 83, 92, 93, 95, 96, 99, 104, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 123, 124, 126, 134, 135, 136, 141, 143, 148, 149, 152, 155, 162, 168, 169, 170, 173, 177, 187, 190, 196, 199, 200, 202, 203, 207, 209, 212, 214, 215, 217, 218, 224, 225, 227, 232, 234, 238, 239, 240, 241, 242, 243, 244, 246, 247, 249, 250, 252, 254, 257, 258, 259, 260, 261, 263, 265, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 334, 336, 337, 338, 339, 340, 341, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380,

In [None]:
for rec in records:
    print(rec)

In [181]:
df0.loc[df_idx]

Unnamed: 0,coords,int1,int2,wref,w0,w1,w2,w3,w4,w5,w6
0,18917,122,11,0,1,1,0,0,0,0,0
2,23013,75,10,0,1,1,0,0,0,0,0
4,27109,38,9,0,1,1,0,0,0,0,0
6,31475,111,8,0,1,1,0,0,0,0,0
36,272628,240,8,0,1,1,0,0,0,0,0
37,272629,32,10,0,1,1,0,0,0,0,0
42,345695,184,15,0,1,1,0,0,0,0,0
43,345696,168,15,0,1,1,0,0,0,0,0
44,349791,105,15,0,1,1,0,0,0,0,0
45,349792,98,14,0,1,1,0,0,0,0,0


In [138]:
select = dist_matrix < 2 
print(select)

[[ True  True False ... False False False]
 [ True  True  True ... False False False]
 [False  True  True ... False False False]
 ...
 [False False False ...  True  True False]
 [False False False ...  True  True False]
 [False False False ... False False  True]]


In [141]:
select2 = np.triu(select, k=1)
print(select2)

[[False  True False ... False False False]
 [False False  True ... False False False]
 [False False False ... False False False]
 ...
 [False False False ... False  True False]
 [False False False ... False False False]
 [False False False ... False False False]]


In [191]:
r,c = np.nonzero(select2)
print(df_idx[r[0]], df_idx[c[0]])

0 2


In [193]:
spikes = [df0.loc[[df_idx[r[i]], df_idx[c[i]]]] for i in range(len(r))]

for i in range(len(r)):
    print(spikes[i])

   coords  int1  int2  wref  w0  w1  w2  w3  w4  w5  w6
0   18917   122    11     0   1   1   0   0   0   0   0
2   23013    75    10     0   1   1   0   0   0   0   0
   coords  int1  int2  wref  w0  w1  w2  w3  w4  w5  w6
2   23013    75    10     0   1   1   0   0   0   0   0
4   27109    38     9     0   1   1   0   0   0   0   0
      coords  int1  int2  wref  w0  w1  w2  w3  w4  w5  w6
2      23013    75    10     0   1   1   0   0   0   0   0
3202   27108    11     0     1   1   1   0   0   0   0   0
      coords  int1  int2  wref  w0  w1  w2  w3  w4  w5  w6
4      27109    38     9     0   1   1   0   0   0   0   0
3202   27108    11     0     1   1   1   0   0   0   0   0
    coords  int1  int2  wref  w0  w1  w2  w3  w4  w5  w6
36  272628   240     8     0   1   1   0   0   0   0   0
37  272629    32    10     0   1   1   0   0   0   0   0
      coords  int1  int2  wref  w0  w1  w2  w3  w4  w5  w6
36    272628   240     8     0   1   1   0   0   0   0   0
1137  268532   111   

In [None]:
r, c = np.unravel_index(487, select.shape)
print(r)

In [39]:
cdf0 = cudf.DataFrame.from_pandas(df0)

In [78]:
%timeit m = (cdf0.w0==1) & (cdf0.w1==1)

1.19 ms ± 2.36 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [89]:
arr_cupy = cp.asarray(df0.values) # ~85 us
print(arr_cupy.shape)

(16339, 11)


In [70]:
a = arr0[:,4]
b = arr0[:,5]
print(len(a))

16339


In [71]:
c1 = arr_cupy[:,4]
c2 = arr_cupy[:,5]
print(len(c1))

16339


In [69]:
%timeit mask = (a==1) & (b==1)

26.1 µs ± 106 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [85]:
%timeit mask = (c1==1) & (c2==1)

40.7 µs ± 167 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [92]:
mask = (c1==1) & (c2==1)
mask.shape

(16339,)

In [114]:
%%timeit 
mask = ( arr_cupy[:,4]==1) & (arr_cupy[:,5]==1)
cp_coords = arr_cupy[:, 0][mask]

127 µs ± 539 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [125]:
coincidental# %%timeit 
mask = (arr_cupy[:,4]==1) & (arr_cupy[:,5]==1)
cp_coords = arr_cupy[:, 0][mask]
cp_x = cp_coords % 4096
cp_y = cp_coords // 4096
cp_xb = cp_x[:, cp.newaxis]
cp_yb = cp_y[:, cp.newaxis]
cp_dx_broadc = cp_x - cp_xb
cp_dy_broadc = cp_y - cp_yb
cp_dist_matrix = cp.sqrt(cp_dx_broadc**2 + cp_dy_broadc**2)

In [9]:
idx = 0
# Spikes coordinates at given wavelength index
spikes_w = spikes_list[idx]
# Associated neighbour coordinates
nb_pixels_w1 = index_8nb[spikes_w[0, :], :]

# Sublist of spikes data that will excludes the one serving as template
spikes_sublist = spikes_list[:idx]+spikes_list[idx+1:]
# Coincidental cross-referencing. 
nb_pixels_w2 = index_8nb[spikes_sublist[0][0,:], :]
#m = np.isin(nb_pixels_w1, nb_pixels_w2).any(axis=1)

In [77]:
def get_coincidentals(nb_pixels1, pixels2):
    nb_pixels2 = index_8nb[pixels2, :]
    m1, p1, p2 = np.intersect1d(nb_pixels1, nb_pixels2, return_indices=True)
    p1r, p1c = np.unravel_index(p1, nb_pixels1.shape)
    p2r, p2c = np.unravel_index(p2, nb_pixels2.shape)
    return p1r, p2r

In [84]:
# Coincidentals for 6 pair-wise
p1r, p2r = get_coincidentals(nb_pixels_w1, spikes_list[1][0,:])

res_w1 = [get_coincidentals(nb_pixels_w1, spikes[0,:]) for spikes in spikes_list[1:]]
len(res_w1[0][0])

517

In [85]:
data_w1 = spikes_list[0][:, res_w1[0][0]]
data_w2 = spikes_list[1][:, res_w1[0][1]]


In [86]:

datadict = {'cw1':data_w1[0,:], 'w1_int1':data_w1[1,:], 'w1_int2':data_w1[2,:], 'cw2':data_w2[0,:], 'w2_int1':data_w2[1,:], 'w2_int2':data_w2[2,:]}

df = pd.DataFrame(datadict)
df.head()

Unnamed: 0,cw1,w1_int1,w1_int2,cw2,w2_int1,w2_int2
0,18917,122,11,27107,220,0
1,18917,122,11,27108,11,0
2,23013,75,10,27107,220,0
3,23013,75,10,27108,11,0
4,27109,38,9,27107,220,0
