# Filter evaluation images
## 11/11/22

author: E. Orenstein (eorenstein@mbari.org)

Filter SPC data for model evaluation based on up/down current deployment and frame numbers. 

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import glob
from shutil import copy
from utils.symlink_tools import symlink_from_list

Preliminaries

In [2]:
path_to_symlink = r'D:\spc_storage\model_evaluation\2022-11-11_updownAuto\unseen_data'  # where to symlink
path_to_lookup = r'D:\model_eval_101122\lookups'  # where the look up tables are
increment = 50000  # the number of frames 
min_thresh = 70  # minimum major axis threshold
max_thresh = 1000000  # maximum major axis threshold

# {looktable_basename: first_frame}
runs = {
    'processed-2021-07-27-03-05-34.041094512-low_mag_cam': 215726,
    'processed-2021-07-26-11-10-34.051148696-low_mag_cam': 661,
    'processed-2021-07-26-03-05-33.041805816-low_mag_cam': 98515,
    'processed-2021-07-25-11-10-36.057611408-low_mag_cam': 12246,
    'processed-2021-08-10-09-30-25.044459792-low_mag_cam': 161990,
    'processed-2021-08-11-03-05-24.042119200-low_mag_cam': 122840,
    'processed-2021-08-09-09-30-27.040390608-low_mag_cam': 119731,    
    'processed-2021-08-10-03-05-25.043638328-low_mag_cam': 44755,
}

Loop over the runs and symlink for model runs.

In [3]:
for kk in runs.keys():

    print(kk)
    # read the look up table
    tmp = pd.read_csv(os.path.join(path_to_lookup, f'{kk}.csv'), index_col=[0])

    # get the slice observed by the human annotator
    tmp = tmp[tmp['frame_number'].between(runs[kk], runs[kk]+50000)]

    print('number of frames in time interval: ', tmp.shape[0])

    # filter by major axis length
    tmp = tmp[tmp['major_axis_length'].between(min_thresh, max_thresh)]
    print('number of frames after size filter: ', tmp.shape[0])
    
    # symlink the images 
    outpath = os.path.join(path_to_symlink, kk)
    if not os.path.exists(outpath):
        os.mkdir(outpath)

    symlink_from_list(tmp['abspath_rawcolor'].to_list(), outpath)

processed-2021-07-27-03-05-34.041094512-low_mag_cam
number of frames in time interval:  24923
number of frames after size filter:  1620


symlinking...: 100%|██████████| 1620/1620 [00:00<00:00, 4791.93it/s]


processed-2021-07-26-11-10-34.051148696-low_mag_cam
number of frames in time interval:  15079
number of frames after size filter:  776


symlinking...: 100%|██████████| 776/776 [00:00<00:00, 5554.24it/s]

processed-2021-07-26-03-05-33.041805816-low_mag_cam





number of frames in time interval:  16606
number of frames after size filter:  909


symlinking...: 100%|██████████| 909/909 [00:00<00:00, 4105.73it/s]


processed-2021-07-25-11-10-36.057611408-low_mag_cam
number of frames in time interval:  18742
number of frames after size filter:  999


symlinking...: 100%|██████████| 999/999 [00:00<00:00, 5376.78it/s]


processed-2021-08-10-09-30-25.044459792-low_mag_cam
number of frames in time interval:  17430
number of frames after size filter:  1501


symlinking...: 100%|██████████| 1501/1501 [00:00<00:00, 4616.52it/s]


processed-2021-08-11-03-05-24.042119200-low_mag_cam
number of frames in time interval:  8960
number of frames after size filter:  1058


symlinking...: 100%|██████████| 1058/1058 [00:00<00:00, 4299.76it/s]


processed-2021-08-09-09-30-27.040390608-low_mag_cam
number of frames in time interval:  18854
number of frames after size filter:  1773


symlinking...: 100%|██████████| 1773/1773 [00:00<00:00, 5049.87it/s]


processed-2021-08-10-03-05-25.043638328-low_mag_cam
number of frames in time interval:  13220
number of frames after size filter:  1168


symlinking...: 100%|██████████| 1168/1168 [00:00<00:00, 4618.83it/s]


Save all the images into a single directory for manual processing.

In [4]:
sv_path = r'D:\model_eval_101122\manual_ground_truth_131122'

for kk in runs.keys():

    print(kk)
    # read the look up table
    tmp = pd.read_csv(os.path.join(path_to_lookup, f'{kk}.csv'), index_col=[0])

    # get the slice observed by the human annotator
    tmp = tmp[tmp['frame_number'].between(runs[kk], runs[kk]+50000)]

    print('number of frames in time interval: ', tmp.shape[0])

    # filter by major axis length
    tmp = tmp[tmp['major_axis_length'].between(min_thresh, max_thresh)]
    print('number of frames after size filter: ', tmp.shape[0])
    
    # symlink the images 
    outpath = os.path.join(sv_path, kk)
    if not os.path.exists(outpath):
        os.mkdir(outpath)

    for item in tmp['abspath_rawcolor'].to_list():
        copy(item, os.path.join(outpath, os.path.basename(item)))

processed-2021-07-27-03-05-34.041094512-low_mag_cam
number of frames in time interval:  24923
number of frames after size filter:  1620
processed-2021-07-26-11-10-34.051148696-low_mag_cam
number of frames in time interval:  15079
number of frames after size filter:  776
processed-2021-07-26-03-05-33.041805816-low_mag_cam
number of frames in time interval:  16606
number of frames after size filter:  909
processed-2021-07-25-11-10-36.057611408-low_mag_cam
number of frames in time interval:  18742
number of frames after size filter:  999
processed-2021-08-10-09-30-25.044459792-low_mag_cam
number of frames in time interval:  17430
number of frames after size filter:  1501
processed-2021-08-11-03-05-24.042119200-low_mag_cam
number of frames in time interval:  8960
number of frames after size filter:  1058
processed-2021-08-09-09-30-27.040390608-low_mag_cam
number of frames in time interval:  18854
number of frames after size filter:  1773
processed-2021-08-10-03-05-25.043638328-low_mag_cam


In [6]:
for kk in runs.keys():

    for line in concepts:
        concept_dir = os.path.join(sv_path, kk, line)

        os.mkdir(concept_dir)

FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'D:\\model_eval_101122\\manual_ground_truth_131122\\processed-2021-07-27-03-05-34.041094512-low_mag_cam\\chaetoceros'

In [None]:
concept_dir

'D:\\model_eval_101122\\manual_ground_truth_131122\\processed-2021-08-10-03-05-25.043638328-low_mag_cam\\Tunicates'

In [None]:
concepts

['chaetoceros',
 'copepoda',
 'Diatoms_round_chains',
 'Diatoms_single_cells',
 'Diatoms_spirals',
 'Diatoms_straight_chains',
 'Marine_snow_POC',
 'mollusca',
 'Plankton_all_other_taxa_combined',
 'Protozoans',
 'Tunicates']