In [2]:
import os
import pandas as pd
import numpy as np
import astropy.io.fits as fits
from pathlib import Path, PurePath

In [33]:
def create_lookup_8nb(nx, ny):
    """ Pre-compute the 8-connectivity lookup table. This will be shared across parallel workers.
    :param nx:
    :param ny:
    :return:
    """
    # List of relative 2D coordinates for 8-neighbour connectiviy (9-element list). 1st one is the origin pixel.
    coords_8nb = np.array([[0, 0], [-1, 0], [-1, -1], [0, -1], [1, -1], [1, 0], [1, 1], [0, 1], [-1, 1]])
    # Array of 2D coordinates for a 4096 x 4096 array. Matrix convention is kept. [rows, cols] = [y-axis, x-axis]
    coords_1d = np.arange(nx * ny)
    coordy, coordx = np.unravel_index(coords_1d, [ny, nx]) # also possible by raveling a meshgrid() output
    coords2d = np.array([coordy, coordx])
    # Create the array of 2D coordinates of 8-neighbours associated with each pixel.
    # pixel 0 has 8 neighbour + itself, pixel 1 has 8 neighbour + itself, etc...
    coords2d_8nb = coords2d[np.newaxis, ...] + coords_8nb[..., np.newaxis]
    # Handle off-edges coordinates by clipping to the edges, operation done in-place. Here, square detector assumed.
    # to per-axis clipping if that ever changes for another instrument.
    np.clip(coords2d_8nb, 0, nx-1, out=coords2d_8nb)
    # Convert to 1D coordinates.
    lookup_coords = np.array([coords2d_8nb[i, 0, :] * nx + coords2d_8nb[i, 1, :] for i in range(len(coords_8nb))],
                         dtype='int32', order='C').T
    return lookup_coords


def extract_coincidentals(spikes_list, idx, lookup_8nb):
    # Spikes coordinates at given wavelength index
    spikes_w = spikes_list[idx]
    # Associated neighbour coordinates
    nb_pixels = lookup_8nb[spikes_w[0, :], :]
    # Sublist of spikes data that will excludes the one serving as template
    spikes_sublist = spikes_list[:idx] + spikes_list[idx + 1:]
    # Coincidental cross-referencing.
    # mask_w_arr = np.array([np.isin(nb_pixels, index_8nb[spikes[0, :], :]).any(axis=1) for spikes in spikes_sublist])
    mask_w_arr = np.array([np.isin(nb_pixels, spikes[0, :]).any(axis=1) for spikes in spikes_sublist])
    select_pixels = mask_w_arr.any(axis=0)
    coords_w = spikes_w[0, select_pixels]
    w_tables = np.insert(mask_w_arr[:, select_pixels], idx, True, axis=0)
    # Retrieve intensity values for the selected coordinates
    intensities = spikes_w[1:, select_pixels]
    arr_w = np.concatenate([coords_w[np.newaxis, ...], intensities, w_tables], axis=0)
    arr_w = np.insert(arr_w, 3, idx, axis=0)
    return arr_w


def process_group(df, group_n):
    fpaths = df['Path'].loc[group_n]
    spikes_list = [fits.getdata(os.path.join(os.environ['SPIKESDATA'], f)) for f in fpaths]
    group_data = np.concatenate([extract_coincidentals(spikes_list, i) for i in range(7)], axis=1)
    column_names = ['coords', 'int1', 'int2', 'wref', 'w0', 'w1', 'w2', 'w3', 'w4', 'w5', 'w6']
    overlaps_df = pd.DataFrame(group_data.T, columns=column_names)
    overlaps_df.insert(0, 'GroupNumber', group_n)
    overlaps_df['overlaps'] = overlaps_df[['w0', 'w1', 'w2', 'w3', 'w4', 'w5', 'w6']].sum(axis=1).astype(np.uint8)
    return overlaps_df


def process_interval(tinterv):
    print('Processing time interval: ', tinterv)
    spikes_df2 = spikes_df.loc[(spikes_df['Time'] >= tinterv.left) & (spikes_df['Time'] < tinterv.right)].set_index(['GroupNumber', 'Time'])
    groups = spikes_df2.index.get_level_values(0).unique().values
    group_df_list = [process_group(spikes_df2, n) for n in groups]
    return group_df_list


def write_to_parquet(df, date, outputdir):
    month_dir = Path(outputdir, f'{date.year}', f'{date.month:02d}')
    Path(month_dir).mkdir(parents=True, exist_ok=True)
    df_path = Path(month_dir, f'df_coincidentals_{date.year}_{date.month:02d}_{date.day:02d}.parquet')
    df.to_parquet(df_path, engine='pyarrow')
    return None


outputdir = os.environ['SPIKESDATA']
spikes_df = pd.read_parquet(Path(outputdir, 'spikes_df_2010.parquet'), engine='pyarrow')

In [11]:
tstart = pd.Timestamp('2010-07-12 00:00:00', tz='UTC')
tend = pd.Timestamp('2010-07-13 00:00:00', tz='UTC')
tintervals = pd.interval_range(start=tstart,
                               end=tend,
                               freq='H',
                               closed='left')
# Create data for lookup from the child processes.
lookup_8nb = create_lookup_8nb(4096, 4096)

tinterv = tintervals[0]

In [14]:
print('Processing time interval: ', tinterv)
spikes_df2 = spikes_df.loc[(spikes_df['Time'] >= tinterv.left) & (spikes_df['Time'] < tinterv.right)].set_index(['GroupNumber', 'Time'])
groups = spikes_df2.index.get_level_values(0).unique().values
groups

Processing time interval:  [2010-07-12, 2010-07-12 01:00:00)


array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
       104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
       117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
       130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
       143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
       156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
       169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 18

In [34]:
group_n = 0
fpaths = spikes_df2['Path'].loc[group_n]
spikes_list = [fits.getdata(Path(os.environ['SPIKESDATA'], f)) for f in fpaths]
group_data = np.concatenate([extract_coincidentals(spikes_list, i, lookup_8nb) for i in range(7)], axis=1)

(7, 4459)
(7, 7754)
(7, 15155)
(7, 22242)
(7, 4938)
(7, 18902)
(7, 17931)


In [35]:
column_names = ['coords', 'int1', 'int2', 'wref', 'w0', 'w1', 'w2', 'w3', 'w4', 'w5', 'w6']
overlaps_df = pd.DataFrame(group_data.T, columns=column_names)
overlaps_df.insert(0, 'GroupNumber', group_n)
overlaps_df['overlaps'] = overlaps_df[['w0', 'w1', 'w2', 'w3', 'w4', 'w5', 'w6']].sum(axis=1).astype(np.uint8)

In [36]:
print(overlaps_df['overlaps'].max())
overlaps_df.head()

4


Unnamed: 0,GroupNumber,coords,int1,int2,wref,w0,w1,w2,w3,w4,w5,w6,overlaps
0,0,10159,156,14,0,1,0,0,0,0,0,0,1
1,0,13241,221,4,0,1,0,0,0,0,0,0,1
2,0,14255,111,14,0,1,0,0,0,0,0,0,1
3,0,33736,191,4,0,1,0,0,0,0,0,0,1
4,0,52872,163,1,0,1,0,0,0,0,0,0,1


In [290]:
group_df_list[0].head()

Unnamed: 0,GroupNumber,coords,int1,int2,wref,w0,w1,w2,w3,w4,w5,w6,overlaps
0,1,149558,117,0,0,1,0,1,0,0,0,0,2
1,1,205896,165,0,0,1,0,0,0,0,0,1,2
2,1,209992,111,0,0,1,0,0,0,0,0,1,2
3,1,239694,151,1,0,1,1,0,0,0,0,0,2
4,1,348983,102,0,0,1,0,0,0,1,0,0,2


In [291]:
group_df = pd.concat(group_df_list)

In [292]:
group_df.head()

Unnamed: 0,GroupNumber,coords,int1,int2,wref,w0,w1,w2,w3,w4,w5,w6,overlaps
0,1,149558,117,0,0,1,0,1,0,0,0,0,2
1,1,205896,165,0,0,1,0,0,0,0,0,1,2
2,1,209992,111,0,0,1,0,0,0,0,0,1,2
3,1,239694,151,1,0,1,1,0,0,0,0,0,2
4,1,348983,102,0,0,1,0,0,0,1,0,0,2


In [293]:
group_df.to_parquet(Path(os.environ['SPIKESDATA'], 'df_coincidentals_2018_01_01_H00.parquet'))

In [296]:
tint

Interval('2018-01-01', '2018-01-01 01:00:00', closed='left')

In [299]:
write_to_parquet(group_df, tint.left, os.environ['SPIKESDATA'])