In [1]:
from collections import defaultdict
import sys

sys.path.append('..')

from tqdm import tqdm

from pixel_accuracy import read_annotations_gt, convert_pixel_images, SUBSTRATE_LIST, SUBSTRATE_TO_IDX

ModuleNotFoundError: No module named 'cv2'

In [3]:
!pip install opencv-python

Collecting opencv-python
  Downloading opencv_python-4.5.1.48-cp38-cp38-macosx_10_13_x86_64.whl (40.3 MB)
[K     |████████████████████████████████| 40.3 MB 1.9 MB/s eta 0:00:01     |████████████████▉               | 21.1 MB 142 kB/s eta 0:02:15     |██████████████████▊             | 23.5 MB 6.4 MB/s eta 0:00:03     |███████████████████             | 23.9 MB 6.4 MB/s eta 0:00:03     |████████████████████████▋       | 31.0 MB 386 kB/s eta 0:00:25     |█████████████████████████▋      | 32.2 MB 386 kB/s eta 0:00:21     |█████████████████████████▊      | 32.4 MB 386 kB/s eta 0:00:21     |█████████████████████████████   | 36.5 MB 386 kB/s eta 0:00:10     |█████████████████████████████▎  | 36.8 MB 386 kB/s eta 0:00:09     |█████████████████████████████▋  | 37.2 MB 1.9 MB/s eta 0:00:02
Installing collected packages: opencv-python
Successfully installed opencv-python-4.5.1.48
You should consider upgrading via the '/Users/campea/python3.8/bin/python -m pip install --upgrade pip' command.[0m


## 1. Pixel distribution for test data

In [2]:
gt_test = read_annotations_gt(file='../../yam/plugins/training_set_2021_02_04/training_set_task_2_2021_02_04.csv', task=2)

In [3]:
pixel_images = convert_pixel_images(gt_test)

100%|██████████| 879/879 [00:20<00:00, 42.28it/s]


In [4]:
pixel_images['2018_0714_112608_061'].shape

(4032, 3024)

In [5]:
def calculate_distribution(pixel_images):
    pixel_distribution = defaultdict(int)
    number_objects = defaultdict(int)

    for image in tqdm(pixel_images.values()):
        non_background = 0
        for name, idx in SUBSTRATE_TO_IDX.items():
            pixels_substrate = (image == idx).sum()
            pixel_distribution[name] += pixels_substrate
            non_background += pixels_substrate
            number_objects[name] += 1*(pixels_substrate>0)
        pixel_distribution['background'] += (4032*3024-non_background)
        
    return pixel_distribution, number_objects

In [6]:
pixel_distribution, number_objects = calculate_distribution(pixel_images)

100%|██████████| 879/879 [05:44<00:00,  2.55it/s]


In [8]:
s_pixels = sum(pixel_distribution.values())
s_objects = sum(number_objects.values())

normalised_pixels = {key:value/s_pixels for key, value in pixel_distribution.items()}
normalised_objects = {key:value/s_objects for key, value in number_objects.items()}

In [9]:
for key, value in normalised_pixels.items():
    print(f'{key} {100*value:.2f}%')

c_algae_macro_or_leaves 1.04%
c_fire_coral_millepora 0.02%
c_hard_coral_boulder 4.12%
c_hard_coral_branching 3.49%
c_hard_coral_encrusting 1.17%
c_hard_coral_foliose 0.19%
c_hard_coral_mushroom 0.06%
c_hard_coral_submassive 1.71%
c_hard_coral_table 0.64%
c_soft_coral 5.61%
c_soft_coral_gorgonian 0.14%
c_sponge 1.55%
c_sponge_barrel 0.41%
background 79.86%


In [10]:
for key, value in normalised_objects.items():
    print(f'{key} {100*value:.2f}%')

c_algae_macro_or_leaves 4.46%
c_fire_coral_millepora 0.27%
c_hard_coral_boulder 18.02%
c_hard_coral_branching 14.24%
c_hard_coral_encrusting 13.10%
c_hard_coral_foliose 3.17%
c_hard_coral_mushroom 3.76%
c_hard_coral_submassive 5.95%
c_hard_coral_table 2.68%
c_soft_coral 13.44%
c_soft_coral_gorgonian 1.85%
c_sponge 14.17%
c_sponge_barrel 4.88%


In [None]:
normalised_pixels

### 2. Pixel distribution per location

In [None]:
list_of_files = [
    '../data/imageCLEFcoral2020_GT_geographicallyDistinct.csv',
    '../data/imageCLEFcoral2020_GT_geographicallySimilar.csv',
    '../data/imageCLEFcoral2020_GT_sameLocation.csv',
    '../data/imageCLEFcoral2020_GT_similarLocation.csv'
]
for file in list_of_files:
    print(file.split('/')[-1][:-4])
    gt_test = read_annotations_gt(file=file, task=2)
    pixel_images = convert_pixel_images(gt_test)
    pixel_distribution, number_objects = calculate_distribution(pixel_images)
    
    s_pixels = sum(pixel_distribution.values())
    s_objects = sum(number_objects.values())

    normalised_pixels = {key:value/s_pixels for key, value in pixel_distribution.items()}
    normalised_objects = {key:value/s_objects for key, value in number_objects.items()}
    
    print('\nPixel distribution')
    for key, value in normalised_pixels.items():
        print(f'{key} {100*value:.2f}%')
    print('\nNumber of objects distribution')
    for key, value in normalised_objects.items():
        print(f'{key} {100*value:.2f}%')

## 3. Pixel distribution for train data

In [None]:
gt_test = read_annotations_gt(file='../data/annotations_train_task_2.csv', task=2)

In [None]:
pixel_images = convert_pixel_images(gt_test)

In [None]:
pixel_distribution, number_objects = calculate_distribution(pixel_images)

In [None]:
s_pixels = sum(pixel_distribution.values())
s_objects = sum(number_objects.values())

normalised_pixels = {key:value/s_pixels for key, value in pixel_distribution.items()}
normalised_objects = {key:value/s_objects for key, value in number_objects.items()}

print('\nPixel distribution')
for key, value in normalised_pixels.items():
    print(f'{key} {100*value:.2f}%')
print('\nNumber of objects distribution')
for key, value in normalised_objects.items():
    print(f'{key} {100*value:.2f}%')

## 2. Same as before but without percentages and including unnatotated pixels

In [None]:
gt_test = read_annotations_gt(file='../data/annotations_train_task_2_corrected.csv', task=2)

pixel_images = convert_pixel_images(gt_test)

In [None]:
pixel_distribution, number_objects = calculate_distribution(pixel_images)

In [None]:
pixel_distribution

In [None]:
for key, value in pixel_distribution.items():
    print(f'{key} {value}')

In [None]:
list_of_files = [
    '../../yam/plugins/imageCLEFcoral2020_GT_geographicallyDistinct.csv',
    '../../yam/plugins/imageCLEFcoral2020_GT_geographicallySimilar.csv',
    '../../yam/plugins/imageCLEFcoral2020_GT_sameLocation.csv',
    '../../yam/plugins/imageCLEFcoral2020_GT_similarLocation.csv'
]
for file in list_of_files:
    print(file.split('/')[-1][:-4] + '\n')
    gt_test = read_annotations_gt(file=file, task=2)
    pixel_images = convert_pixel_images(gt_test)
    pixel_distribution, number_objects = calculate_distribution(pixel_images)
    
    for key, value in pixel_distribution.items():
        print(f'{key} {value}')
        
    print('\n')