10.01.23

# Select slices that experts will annotate

*IDEA*: select a total of 10 slices
- 3 where the model perform poorly
    - if it is obvious why -> discard
    - if it is interesting -> keep
- 7 selected "randomly" among the top k frames with the most pixels annotated

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
#import glob
import os
import imageio
import napari
import matplotlib.pyplot as plt
import math
import seaborn as sns
import pandas as pd

#from skimage.io import imsave

from in_out_tools import load_movies_ids, load_annotations_ids
from visualization_tools import get_discrete_cmap, get_labels_cmap, get_annotations_contour
from metrics_tools import compute_iou, empty_marginal_frames
from data_processing_tools import get_processed_result

## Set global parameters

In [3]:
BASEDIR = os.path.abspath('')
BASEDIR

'c:\\Users\\prisc\\Code\\sparks_project\\sparks'

In [4]:
# configure Napari cmap
cmap = get_discrete_cmap(name='gray', lut=16)
labels_cmap = get_labels_cmap()

# frames ignored by loss fct during training
ignore_frames_loss = 6

In [5]:
classes = ['sparks', 'puffs', 'waves']

class_to_nb = {'sparks': 1,
               'puffs': 3,
               'waves': 2,
               'ignore_rois': 4
              }

#movie_ids = ["05","10","15","20","25","32","34","40","45"]
movie_ids = ["05","34"]

In [6]:
dataset_dir = os.path.join("..", "data", "sparks_dataset")

## Load movies, annotations & predictions

In [7]:
# Config model to load
training_name = 'TEMP_new_annotated_peaks_physio'
epoch = 100000

preds_dir = os.path.join("trainings_validation", training_name)

In [8]:
### Load movies
xs = load_movies_ids(data_folder=dataset_dir,
                     ids=movie_ids,
                     names_available=True,
                     movie_names="video")

### Load annotations
ys = load_annotations_ids(data_folder=dataset_dir,
                          ids=movie_ids,
                          mask_names="class_label")

### Load predictions
sparks_filenames = {movie_id: os.path.join(preds_dir, f"{training_name}_{epoch}_{movie_id}_sparks.tif") for movie_id in movie_ids}
puffs_filenames = {movie_id: os.path.join(preds_dir, f"{training_name}_{epoch}_{movie_id}_puffs.tif") for movie_id in movie_ids}
waves_filenames = {movie_id: os.path.join(preds_dir, f"{training_name}_{epoch}_{movie_id}_waves.tif") for movie_id in movie_ids}

sparks = {movie_id: np.asarray(imageio.volread(f)) for movie_id, f in sparks_filenames.items()}
puffs = {movie_id: np.asarray(imageio.volread(f)) for movie_id, f in puffs_filenames.items()}
waves = {movie_id: np.asarray(imageio.volread(f)) for movie_id, f in waves_filenames.items()}

TiffPage 0: TypeError: read_bytes() missing 3 required positional arguments: 'dtype', 'count', and 'offsetsize'
TiffPage 0: TypeError: read_bytes() missing 3 required positional arguments: 'dtype', 'count', and 'offsetsize'


### Compute class weights

To use in order to get frames with the most annotated pixels in a weighted way

In [44]:
# define function to compute class weights
def compute_class_weights(dataset, w0=1, w1=1, w2=1, w3=1):
    '''
    Modified version of 'compute_class_weights' in  training_inference_tools.py
    (using numpy arrays instead of a SparkDataset instance)

    dataset: list of numpy arrays representing annotation masks with values
             between 0 and 4
    '''

    # For 4 classes
    count0 = 0
    count1 = 0
    count2 = 0
    count3 = 0

    for y in dataset:
        count0 += np.count_nonzero(y == 0)
        count1 += np.count_nonzero(y == 1)
        count2 += np.count_nonzero(y == 2)
        count3 += np.count_nonzero(y == 3)

    total = count0 + count1 + count2 + count3

    w0_new = w0 * total / (4 * count0) if count0 != 0 else 0
    w1_new = w1 * total / (4 * count1) if count1 != 0 else 0
    w2_new = w2 * total / (4 * count2) if count2 != 0 else 0
    w3_new = w3 * total / (4 * count3) if count3 != 0 else 0

    weights = [w0_new, w1_new, w2_new, w3_new]
    return weights

In [51]:
# compute class weights
w_bg, w_sparks, w_waves, w_puffs = compute_class_weights(dataset=list(ys.values()))

# define dict with weights
w_dict = {'sparks': w_sparks,
          'puffs': w_puffs,
          'waves': w_waves}

In [52]:
w_dict

{'sparks': 166.89727978145254,
 'puffs': 52.30136655692318,
 'waves': 4.645747287208633}

### Process predictions to obtain segmentation masks with small objects removed

In [9]:
# parameters necessary to process predictions

# physiological params (for spark peaks results)
pixel_size = 0.2 # 1 pixel = 0.2 um x 0.2 um
min_dist_xy = round(1.8 / pixel_size) # min distance in space
time_frame = 6.8 # 1 frame = 6.8 ms
min_dist_t = round(20 / time_frame) # min distance in time

# spark instances detection parameters
min_dist_xy = min_dist_xy
min_dist_t = min_dist_t
radius = math.ceil(min_dist_xy / 2)
y, x = np.ogrid[-radius : radius + 1, -radius : radius + 1]
disk = x**2 + y**2 <= radius**2
conn_mask = np.stack([disk] * (min_dist_t), axis=0)

# connectivity for event instances detection
connectivity = 26
sigma = 3

# TODO: use better parameters !!!
pixel_size = 0.2
spark_min_width = 3
spark_min_t = 3
puff_min_t = 5
wave_min_width = round(15 / pixel_size)

# maximal gap between two predicted puffs or waves that belong together
max_gap = 2  # i.e., 2 empty frames

In [10]:
preds_segmentation = {}

for movie_id in movie_ids:
    _, preds_segmentation[movie_id], _ = get_processed_result(
            sparks=sparks[movie_id],
            puffs=puffs[movie_id],
            waves=waves[movie_id],
            xs=xs[movie_id],
            conn_mask=conn_mask,
            connectivity=connectivity,
            max_gap=max_gap,
            sigma=sigma,
            wave_min_width=wave_min_width,
            puff_min_t=puff_min_t,
            spark_min_t=spark_min_t,
            spark_min_width=spark_min_width,
        )

	Events detection threshold: 0.7783508
	Number of sparks detected by nonmaxima suppression: 148
	Events detection threshold: 0.7502694
	Number of sparks detected by nonmaxima suppression: 47


## Compute statistics

In [58]:
# configure dataframe
cols = ['movie ID', 'frame ID', 
        'sparks IoU', 'puffs IoU', 'waves IoU', 'average IoU',
        'sparks pixels', 'puffs pixels', 'waves pixels', 
        'sparks weighted pixels', 'puffs weighted pixels', 
        'waves weighted pixels', 'weighted sum pixels']

frames_stats_df = pd.DataFrame(columns=cols)

# iterate over movies and frame to compute statistics
for movie_id in movie_ids:
    # remove frames ignored by loss function
    sample_ys = empty_marginal_frames(video=ys[movie_id],
                                      n_frames=ignore_frames_loss)
    
    sample_preds = {} # where predicted segmentation will be stored
    for event_type in classes:
        sample_preds[event_type] = empty_marginal_frames(
                                    video=preds_segmentation[movie_id][event_type],
                                    n_frames=ignore_frames_loss
                                    )

    # mask ignored during training
    ignore_mask = sample_ys==class_to_nb['ignore_rois']

    # sample_preds is a dict with keys 'sparks','puffs' and 'waves'
    for frame_id in range(len(sample_ys)):
        # create frame dict that will be appended to dataframe
        frame_dict = {'movie ID': movie_id,
                      'frame ID': frame_id,
                      'average IoU' : 0,
                      'weighted sum pixels' : 0}

        for event_type in classes:
            # get binary masks corrisponding to event type
            ys_frame_class = sample_ys[frame_id]==class_to_nb[event_type]
            preds_frame_class = sample_preds[event_type][frame_id]

            # compute Intersection over Union score
            iou = compute_iou(ys_roi=ys_frame_class,
                              preds_roi=preds_frame_class,
                              ignore_mask=ignore_mask[frame_id])
            frame_dict[event_type+' IoU'] = iou
            frame_dict['average IoU'] += iou

            # count number of (WEIGHTED) annotated pixels
            nonzero_pixels = np.count_nonzero(ys_frame_class)
            frame_dict[event_type+' pixels'] = nonzero_pixels
            frame_dict[event_type+' weighted pixels'] = w_dict[event_type]*nonzero_pixels
            frame_dict['weighted sum pixels'] += w_dict[event_type]*nonzero_pixels
            
        frame_dict['average IoU'] /= len(classes)
        
        # create df from frame dict
        frame_df = pd.DataFrame([frame_dict])
        frames_stats_df = pd.concat([frames_stats_df, frame_df])

In [59]:
frames_stats_df

Unnamed: 0,movie ID,frame ID,sparks IoU,puffs IoU,waves IoU,average IoU,sparks pixels,puffs pixels,waves pixels,sparks weighted pixels,puffs weighted pixels,waves weighted pixels,weighted sum pixels
0,05,0,1.0,1.0,1.0,1.0,0,0,0,0.0,0.0,0.0,0.0
0,05,1,1.0,1.0,1.0,1.0,0,0,0,0.0,0.0,0.0,0.0
0,05,2,1.0,1.0,1.0,1.0,0,0,0,0.0,0.0,0.0,0.0
0,05,3,1.0,1.0,1.0,1.0,0,0,0,0.0,0.0,0.0,0.0
0,05,4,1.0,1.0,1.0,1.0,0,0,0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,34,899,1.0,1.0,1.0,1.0,0,0,0,0.0,0.0,0.0,0.0
0,34,900,1.0,1.0,1.0,1.0,0,0,0,0.0,0.0,0.0,0.0
0,34,901,1.0,1.0,1.0,1.0,0,0,0,0.0,0.0,0.0,0.0
0,34,902,1.0,1.0,1.0,1.0,0,0,0,0.0,0.0,0.0,0.0


### Discard empty frames (if both ys and preds are empty)

Idea: if nb pixels in annotations == 0 and IoU avg == 1 ==> nb pixels in preds = 0

In [61]:
frames_stats_df = frames_stats_df.loc[(frames_stats_df['average IoU']!=1) 
                                        | (frames_stats_df['weighted sum pixels']!=0)]

In [62]:
frames_stats_df

Unnamed: 0,movie ID,frame ID,sparks IoU,puffs IoU,waves IoU,average IoU,sparks pixels,puffs pixels,waves pixels,sparks weighted pixels,puffs weighted pixels,waves weighted pixels,weighted sum pixels
0,05,6,0.163488,0.0,0.000000,0.054496,68,0,0,11349.015025,0.0,0.000000,11349.015025
0,05,7,0.215539,0.0,0.000000,0.071846,109,0,0,18191.803496,0.0,0.000000,18191.803496
0,05,8,0.235012,0.0,0.000000,0.078337,120,0,0,20027.673574,0.0,0.000000,20027.673574
0,05,9,0.246341,0.0,1.000000,0.415447,115,0,0,19193.187175,0.0,0.000000,19193.187175
0,05,10,0.270270,0.0,1.000000,0.423423,129,0,0,21529.749092,0.0,0.000000,21529.749092
...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,34,893,0.386233,0.0,0.335000,0.240411,202,0,1914,33713.250516,0.0,8891.960308,42605.210824
0,34,894,0.395349,0.0,0.321839,0.239063,204,0,2046,34047.045075,0.0,9505.198950,43552.244025
0,34,895,0.375486,0.0,0.314423,0.229970,196,0,2190,32711.866837,0.0,10174.186559,42886.053396
0,34,896,0.059347,0.0,0.304218,0.121188,20,0,2369,3337.945596,0.0,11005.775323,14343.720919


## Select frames according to some criteria and visualize them in Napari

### Get frames where waves IoU is worst, and annotated waves aren't empty

In [66]:
# Get df of frames containing waves
waves_df = frames_stats_df.loc[frames_stats_df['waves pixels']!=0]

In [67]:
# Order waves df from worst waves IoU to best
waves_df.sort_values('waves IoU', ascending=True)

Unnamed: 0,movie ID,frame ID,sparks IoU,puffs IoU,waves IoU,average IoU,sparks pixels,puffs pixels,waves pixels,sparks weighted pixels,puffs weighted pixels,waves weighted pixels,weighted sum pixels
0,34,685,0.77957,0.463661,0.000000,0.414410,150,1283,3504,25034.591967,67102.653293,16278.698494,108415.943754
0,34,401,1.00000,0.000000,0.000000,0.333333,0,0,2525,0.000000,0.000000,11730.511900,11730.511900
0,34,400,0.00000,0.000000,0.000000,0.000000,0,0,2586,0.000000,0.000000,12013.902485,12013.902485
0,34,399,0.00000,0.000000,0.000000,0.000000,0,0,2735,0.000000,0.000000,12706.118831,12706.118831
0,34,398,0.00000,0.000000,0.000000,0.000000,0,0,3008,0.000000,0.000000,13974.407840,13974.407840
...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,34,350,0.00000,0.000000,0.824809,0.274936,0,0,4953,0.000000,0.000000,23010.386314,23010.386314
0,34,354,0.00000,0.000000,0.828208,0.276069,0,0,5132,0.000000,0.000000,23841.975078,23841.975078
0,34,351,0.00000,0.000000,0.829453,0.276484,0,0,5016,0.000000,0.000000,23303.068393,23303.068393
0,34,355,0.00000,0.000000,0.830052,0.276684,0,0,5078,0.000000,0.000000,23591.104724,23591.104724


### Get frames where puffs IoU is worst, and annotated puffs aren't empty

In [68]:
# Get df of frames containing puffs
puffs_df = frames_stats_df.loc[frames_stats_df['puffs pixels']!=0]

In [69]:
# Order puffs df from worst puffs IoU to best
puffs_df.sort_values('puffs IoU', ascending=True)

Unnamed: 0,movie ID,frame ID,sparks IoU,puffs IoU,waves IoU,average IoU,sparks pixels,puffs pixels,waves pixels,sparks weighted pixels,puffs weighted pixels,waves weighted pixels,weighted sum pixels
0,05,13,0.177817,0.000000,1.000000,0.392606,105,47,0,17524.214377,2458.164228,0.000000,19982.378605
0,34,175,0.000000,0.000000,0.741840,0.247280,0,194,3584,0.000000,10146.465112,16650.358277,26796.823389
0,34,176,0.000000,0.000000,0.730743,0.243581,0,196,3545,0.000000,10251.067845,16469.174133,26720.241978
0,34,177,0.000000,0.000000,0.721687,0.240562,0,187,3459,0.000000,9780.355546,16069.639866,25849.995413
0,34,178,0.000000,0.000000,0.716907,0.238969,0,174,3366,0.000000,9100.437781,15637.585369,24738.023150
...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,34,26,1.000000,0.855316,1.000000,0.951772,0,1130,0,0.000000,59100.544209,0.000000,59100.544209
0,34,37,1.000000,0.858053,1.000000,0.952684,0,978,0,0.000000,51150.736493,0.000000,51150.736493
0,34,34,1.000000,0.858407,1.000000,0.952802,0,1019,0,0.000000,53295.092522,0.000000,53295.092522
0,34,35,1.000000,0.861333,1.000000,0.953778,0,1015,0,0.000000,53085.887055,0.000000,53085.887055


### Get frames where sparks IoU is worst, and annotated sparks aren't empty

In [70]:
# Get df of frames containing sparks
sparks_df = frames_stats_df.loc[frames_stats_df['sparks pixels']!=0]

In [71]:
# Order sparks df from worst sparks IoU to best
sparks_df.sort_values('sparks IoU', ascending=True)

Unnamed: 0,movie ID,frame ID,sparks IoU,puffs IoU,waves IoU,average IoU,sparks pixels,puffs pixels,waves pixels,sparks weighted pixels,puffs weighted pixels,waves weighted pixels,weighted sum pixels
0,34,476,0.000000,0.000000,0.400128,0.133376,21,0,5974,3504.842875,0.000000,27753.694294,31258.537169
0,34,595,0.000000,0.000000,0.000000,0.000000,68,0,4432,11349.015025,0.000000,20589.951977,31938.967002
0,34,596,0.000000,0.000000,0.000000,0.000000,62,0,4615,10347.631346,0.000000,21440.123730,31787.755077
0,34,597,0.000000,0.000000,0.000000,0.000000,63,0,4730,10514.528626,0.000000,21974.384668,32488.913295
0,34,598,0.000000,0.000000,0.000000,0.000000,54,0,4816,9012.453108,0.000000,22373.918935,31386.372043
...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,34,104,0.797710,0.000000,0.333659,0.377123,225,0,341,37551.887951,0.000000,1584.199825,39136.087776
0,34,98,0.804545,1.000000,0.171622,0.658722,177,0,127,29540.818521,0.000000,590.009905,30130.828427
0,34,99,0.813333,1.000000,0.218950,0.677428,183,0,171,30542.202200,0.000000,794.422786,31336.624986
0,34,145,0.814286,1.000000,0.809667,0.874651,115,0,2711,19193.187175,0.000000,12594.620896,31787.808070


### For each movie, get frames with most weighted annotated pixels

In [73]:
for movie_id in movie_ids:
    most_annotated_df = frames_stats_df.loc[frames_stats_df['movie ID']==movie_id]
    display(most_annotated_df.sort_values('weighted sum pixels', ascending=False).head(5))

Unnamed: 0,movie ID,frame ID,sparks IoU,puffs IoU,waves IoU,average IoU,sparks pixels,puffs pixels,waves pixels,sparks weighted pixels,puffs weighted pixels,waves weighted pixels,weighted sum pixels
0,5,100,0.268463,0.131303,1.0,0.466589,287,391,0,47899.519297,20449.834324,0.0,68349.353621
0,5,66,0.247492,0.080046,1.0,0.442513,371,101,0,61918.890799,5282.438022,0.0,67201.328821
0,5,69,0.204163,0.09405,1.0,0.432737,344,175,0,57412.664245,9152.739147,0.0,66565.403392
0,5,67,0.222028,0.07892,1.0,0.433649,354,136,0,59081.637043,7112.985852,0.0,66194.622894
0,5,101,0.269857,0.130793,1.0,0.466883,265,404,0,44227.779142,21129.752089,0.0,65357.531231


Unnamed: 0,movie ID,frame ID,sparks IoU,puffs IoU,waves IoU,average IoU,sparks pixels,puffs pixels,waves pixels,sparks weighted pixels,puffs weighted pixels,waves weighted pixels,weighted sum pixels
0,34,682,0.742718,0.459356,0.0,0.400691,165,1297,3987,27538.051164,67834.872424,18522.594434,113895.518022
0,34,683,0.73913,0.46086,0.0,0.399997,165,1300,3844,27538.051164,67991.776524,17858.252572,113388.08026
0,34,681,0.729064,0.460177,0.0,0.396414,160,1294,4018,26703.564765,67677.968325,18666.6126,113048.14569
0,34,680,0.706806,0.46271,0.0,0.389839,146,1289,4064,24367.002848,67416.461492,18880.316975,110663.781315
0,34,684,0.765306,0.461954,0.0,0.409087,155,1292,3663,25869.078366,67573.365592,17017.372313,110459.816271


### Visualize sample movie with Napari

In [76]:
movie_id = '34'

# get predicted segmentation with values between 0 and 3
preds = np.zeros_like(ys[movie_id])
for event_type in classes:
    preds += class_to_nb[event_type] * preds_segmentation[movie_id][event_type]

# get contours of annotated mask, for visualization
ys_contours = get_annotations_contour(annotations=ys[movie_id], contour_val=2)

In [77]:

viewer = napari.Viewer()
viewer.add_image(xs[movie_id], 
                 name='original movie', 
                 colormap=('colors',cmap)
                )

viewer.add_labels(preds,
                  name='predicted segmentation',
                  opacity=0.5,
                  color=labels_cmap
                 )

viewer.add_labels(ys_contours,
                  name='annotated segmentation',
                  opacity=0.8,
                  color=labels_cmap
                 )

<Labels layer 'annotated segmentation' at 0x183d7b7e520>