In [2]:
import os
import pandas as pd
import numpy as np
import fitsio
import cudf

In [3]:
def extract_coincidentals(spikes_list, idx):
    
    # Spikes coordinates at given wavelength index
    spikes_w = spikes_list[idx]
    # Associated neighbour coordinates
    nb_pixels = index_8nb[spikes_w[0, :], :]
    # Sublist of spikes data that will excludes the one serving as template
    spikes_sublist = spikes_list[:idx]+spikes_list[idx+1:]
    # Coincidental cross-referencing. 
    mask_w_arr = np.array([np.isin(nb_pixels, index_8nb[spikes[0,:], :]).any(axis=1) for spikes in spikes_sublist])
    select_pixels = mask_w_arr.any(axis=0)
    coords_w = spikes_w[0, select_pixels] 
    w_tables = np.insert(mask_w_arr[:, select_pixels], idx, True, axis=0)
    # Retrieve intensity values for the selected coordinates
    intensities = spikes_w[ 1:, select_pixels]
    arr_w = np.concatenate([coords_w[np.newaxis,...], intensities, w_tables], axis=0)
    
    return arr_w

In [4]:
data_dir = os.environ['SPIKESDATA']
spikes_db = pd.read_parquet(os.path.join(data_dir, 'spikes_df_2010.parquet'), engine='pyarrow')
spikes_db2 = spikes_db.set_index(['GroupNumber', 'Time'])

### Get the filepaths (typically 7) for a given group

In [5]:
################################################################################################
# Pre-compute the 8-connectivity lookup table. This will be shared across parallel workers.
################################################################################################
# List of relative 2D coordinates for 8-neighbour connectiviy (9-element list). 1st one is the origin pixel.
coords_8nb = np.array([[0, 0], [-1, 0], [-1, -1], [0, -1], [1, -1], [1, 0], [1, 1], [0, 1], [-1, 1]])
# Array of 2D coordinates for a 4096 x 4096 array. Matrix convention is kept. [rows, cols] = [y-axis, x-axis]
ny, nx = [4096, 4096]
coords_1d = np.arange(nx * ny)
coordy, coordx = np.unravel_index(coords_1d, [ny, nx]) # also possible by raveling a meshgrid() output
coords2d = np.array([coordy, coordx])
# Create the array of 2D coordinates of 8-neighbours associated with each pixel.
# pixel 0 has 8 neighbour + itself, pixel 1 has 8 neighbour + itself, etc...
coords2d_8nb = coords2d[np.newaxis, ...] + coords_8nb[..., np.newaxis]
# Handle off-edges coordinates by clipping to the edges, operation done in-place. Here, square detector assumed. Update
# to per-axis clipping if that ever changes for another instrument.
np.clip(coords2d_8nb, 0, nx-1, out=coords2d_8nb)
# Convert to 1D coordinates.
index_8nb = np.array([coords2d_8nb[i, 0, :] * nx + coords2d_8nb[i, 1, :] for i in range(len(coords_8nb))],
                     dtype='int32', order='C').T
index_8nb.shape

(16777216, 9)

In [6]:
n_co_spikes = 2

group_n = 0
fpaths = spikes_db2.loc[group_n]['Path'].values
spikes_list = [fitsio.read(os.path.join(data_dir, f)) for f in fpaths]
for spikes in spikes_list:
    print(spikes.shape)

(3, 8486)
(3, 30356)
(3, 36549)
(3, 7993)
(3, 13781)
(3, 26443)
(3, 27576)


In [8]:
column_names = ['coords' , 'int1', 'int2', 'w1', 'w2', 'w3', 'w4', 'w5', 'w6', 'w7']
#column_names_list = [[names for names in column_names[:i]+column_names[i+1:]] for i in range(7)]
df = pd.DataFrame(columns=column_names)
df.head()

Unnamed: 0,coords,int1,int2,w1,w2,w3,w4,w5,w6,w7


In [9]:
spikes_pix = [[spikes[0,:] for spikes in spikes_list[:i]+spikes_list[i+1:]] for i in range(7)]
pixels_ws = [spikes_list[i][0,:] for i in range(7)]

In [10]:
group_data = np.concatenate([extract_coincidentals(spikes_list, i) for i in range(7)], axis=1)
u, idx = np.unique(group_data[0, :], return_index=True)
group_data2 = group_data[:, idx]
df = pd.DataFrame(group_data2.T, columns=column_names)
df.head(15)

Unnamed: 0,coords,int1,int2,w1,w2,w3,w4,w5,w6,w7
0,11175,64,-1,0,1,1,0,0,0,0
1,13785,167,0,0,1,0,0,0,0,1
2,14205,24,0,0,0,1,0,0,1,0
3,14206,1386,0,0,0,1,0,0,1,0
4,15124,233,0,0,1,0,0,0,1,0
5,15272,259,0,0,1,1,0,0,0,0
6,15273,280,0,0,1,1,0,0,0,0
7,17487,56,1,0,1,0,0,0,1,0
8,17489,56,-1,0,1,0,0,0,1,0
9,18917,122,11,1,1,0,0,0,0,0


In [62]:
idx = 0
# Spikes coordinates at given wavelength index
spikes_w = spikes_list[idx]
# Associated neighbour coordinates
nb_pixels_w1 = index_8nb[spikes_w[0, :], :]

# Sublist of spikes data that will excludes the one serving as template
spikes_sublist = spikes_list[:idx]+spikes_list[idx+1:]
# Coincidental cross-referencing. 
nb_pixels_w2 = index_8nb[spikes_sublist[0][0,:], :]
#m = np.isin(nb_pixels_w1, nb_pixels_w2).any(axis=1)

In [77]:
def get_coincidentals(nb_pixels1, pixels2):
    nb_pixels2 = index_8nb[pixels2, :]
    m1, p1, p2 = np.intersect1d(nb_pixels1, nb_pixels2, return_indices=True)
    p1r, p1c = np.unravel_index(p1, nb_pixels1.shape)
    p2r, p2c = np.unravel_index(p2, nb_pixels2.shape)
    return p1r, p2r

In [84]:
# Coincidentals for 6 pair-wise
p1r, p2r = get_coincidentals(nb_pixels_w1, spikes_list[1][0,:])

res_w1 = [get_coincidentals(nb_pixels_w1, spikes[0,:]) for spikes in spikes_list[1:]]
len(res_w1[0][0])

517

In [85]:
data_w1 = spikes_list[0][:, res_w1[0][0]]
data_w2 = spikes_list[1][:, res_w1[0][1]]


In [86]:

datadict = {'cw1':data_w1[0,:], 'w1_int1':data_w1[1,:], 'w1_int2':data_w1[2,:], 'cw2':data_w2[0,:], 'w2_int1':data_w2[1,:], 'w2_int2':data_w2[2,:]}

df = pd.DataFrame(datadict)
df.head()

Unnamed: 0,cw1,w1_int1,w1_int2,cw2,w2_int1,w2_int2
0,18917,122,11,27107,220,0
1,18917,122,11,27108,11,0
2,23013,75,10,27107,220,0
3,23013,75,10,27108,11,0
4,27109,38,9,27107,220,0
