# Prepare data
## 15/3/21

Compute percent with-egg pixels in ROIs and save train/val/test lists with the information. Assume VOC structure and save each line as:

>`image-id %with-egg[float]`



In [1]:
import os
import glob
import sys
import numpy as np
import cv2
import matplotlib.pyplot as plt

Specific locations, files, etc. 

In [2]:
maskdir = '/home/eorenstein/VOCCopepodEgg/SegmentationMask'
imgsetdir = '/home/eorenstein/VOCCopepodEgg/ImageSets/Main/SplitByProfile-230221/'
imgset = 'egg_test.txt'

savedir = '/home/eorenstein/VOCCopepodEgg/ImageSets/Main/DeepReg-EggOnly-150321'

if not os.path.exists(savedir):
    os.mkdir(savedir)

Get the desired list of img-ids.

In [3]:
with open(os.path.join(imgsetdir, imgset), 'r') as ff:
    imgs = list(ff)
    ff.close()
    
imgs = [line.strip() for line in imgs]

Define a function to get the percent with-egg pixels from the masks.

In [4]:
def get_pct(imgpath):
    """
    return the % with-egg pixels in a mask
    assumes:
        - pixel value 50 == copepod pixel
        - pixel value 100 == egg pixel
        
    :param imgpath: absolute path to segmentation mask
    :return pct: % with-egg pixels [float]
    """
    
    mask = cv2.imread(imgpath, 0)
    num_cope = np.sum(mask==50, dtype=float)
    num_egg = np.sum(mask==100, dtype=float)
    
    return np.around(num_egg/num_cope, decimals=4)

Iterate over the specified list

In [5]:
tmp = []
for imgid in imgs:
    img = os.path.join(maskdir, imgid + '.png')
    pct = get_pct(img)
    tmp.append((imgid, pct))

# print one out to make sure it looks reasonable
print(tmp[10])

('65574434', 0.785)


Save out to the specified directory

In [6]:
savepath = os.path.join(savedir, imgset)

with open(savepath, 'w') as ff:
    for line in tmp:
        ff.write(f'{line[0]} {line[1]}\n')
    ff.close()