In [1]:
from data.echogram import DataReaderZarr, get_data_readers
import numpy as np
from tqdm import tqdm
import dask

dask.config.set(**{'array.slicing.split_large_chunks': False})

#  Path to surveys
survey_path = '/data/'

#  Get list of relevant surveys
surveys = ['2019/S2019847/ACOUSTIC/GRIDDED/S2019847_sv.zarr']
readers = [DataReaderZarr(survey_path + zarr_file) for zarr_file in surveys]

# Or alternatively ...
# readers = get_data_readers()

patch_size = (256, 256)  # x, y

In [2]:
survey = readers[0]

n_pings, n_range = survey.shape

xs = np.arange(patch_size[0] // 2, n_pings - patch_size[0] // 2, patch_size[0])
ys = np.arange(patch_size[1] // 2, patch_size[1] * 3 - patch_size[1] // 2, patch_size[1])

(xs, ys) = np.meshgrid(xs, ys)
coordinate_list = np.array([xs.ravel(), ys.ravel()]).T[:-2,:] # Last two rows excluded

fish_categories = list(survey.fish_categories)

# x, y, mean, median, n_pixels_below_seabed, Nr of fish categories + ignore + background
data = np.zeros((len(coordinate_list), 2 + 3 + len(survey.fish_categories) + 5))

In [None]:
for i, (x, y) in tqdm(enumerate(coordinate_list), total=len(coordinate_list)):
    data[i, 0] = x
    data[i, 1] = y

    data_sv = survey.get_data_slice(idx_ping=x - patch_size[0] // 2, n_pings=patch_size[0],
                                    idx_range=int(y - patch_size[1] // 2), n_range=patch_size[1],
                                    return_numpy=False, frequencies=[38000])
    mean_sv = data_sv.mean().values
    median_sv = data_sv.median(dim=['ping_time', 'range']).values

    data[i, 2] = mean_sv # Mean sv value
    data[i, 3] = median_sv # Median sv value

    seabed_mask = survey.get_seabed_mask(idx_ping=x - patch_size[0] // 2, n_pings=patch_size[0],
                                         idx_range=int(y - patch_size[1] // 2), n_range=patch_size[1],
                                         return_numpy=False)
    n_pixels_below_seabed = seabed_mask.sum().values
    data[i, 4] = n_pixels_below_seabed  # Pixels under seabed

    labels = survey.annotation.annotation[:, (x - patch_size[0] // 2):(x + patch_size[0] // 2),
             int(y - patch_size[1] // 2):int(y + patch_size[1] // 2)]

    # Number of Pixels
    data[i, 5] = labels.sel(category=1).sum().values  # Other class
    data[i, 6] = labels.sel(category=27).sum().values  # Sandeel class
    data[i, 7] = labels.sel(category=6009).sum().values  # Possible Sandeel class
    data[i, 8] = 256 * 256 - (data[i, 5] + data[i, 6] + data[i, 7])  # Background class

    # Average Intensity Values
    if data[i, 5] != 0: data[i, 9] = ((data_sv * labels.sel(category=1)).sum() / data[i, 5]).values  # Average Other sv
    if data[i, 6] != 0: data[i, 10] = (
            (data_sv * labels.sel(category=27)).sum() / data[i, 6]).values  # Average Sandeel sv
    if data[i, 7] != 0: data[i, 11] = (
            (data_sv * labels.sel(category=6009)).sum() / data[i, 7]).values  # Average Possible Sandeel sv
    if data[i, 8] == 256 * 256:
        data[i, 12] = mean_sv
    else:
        data[i, 12] = data_sv[0].values[np.logical_and(labels.sel(category=27).values != 1,
                                                       labels.sel(category=1).values != 1,
                                                       labels.sel(category=6009).values != 1)].sum() / data[
                          i, 8]  # Average Background sv

  4%|▍         | 1186/31128 [36:43<14:11:34,  1.71s/it]

In [None]:
names = ['x', 'y', 'mean_sv', 'median_sv', 
         'nop_below_seabed', 'nop_other', 'nop_sandeel', 'nop_possandeel', 'nop_background',
         'mean_other', 'mean_sandeel', 'mean_possandeel', 'mean_background']
