In [None]:
import shutil
import matplotlib.pyplot as plt
from PIL import Image, ImageFilter, ImageOps, ImageDraw
import PIL
import glob
import pandas as pd
import numpy as np
import os
import random
from src import generate_labels

## Generating labels from masks or json files
**YOLOv*** MUST have label data in a text (.txt) file with the same name as the img. Each label is in the following format, where x and y are the anchor (center) point of the object.
* <object-class-id> \<x> \<y> \<width> \<height>

<ins>Instructions for .png Mask Labels:<ins>

When dealing with .png masks, the labels are some version of a mask the same dimensions as the original image. We use OpenCV to do this. I wrote a class called generate_data which is currently stored in generate_data.py. The function lbl_masks_to_img_lbls is used to do this.

### LABEL GEN - JSON files with bounding boxes

In [None]:
## for images with corresponding json labels that have bounding boxes (top left (x,y) w,h)

## some 2019- and all 2020 json files contain only masks others have json bboxes. Some are coc json
img_pths2019 = glob.glob(r"Z:\__Organized_Directories_InProgress\Annotated_Images\fish\finalized_RGB\2019*\Original_Labeled_Images_RGB\*.png")
json_lbls2019 = glob.glob(r"Z:\__Organized_Directories_InProgress\Annotated_Images\fish\finalized_RGB\2019*\Labeled_JSON_Files\*.json")
## finding the boolean idx of images with corresponding json files
bn = lambda f: str(os.path.basename(f)).split(".")[0]
img_pths2019_w_json_idx = pd.Series(img_pths2019).apply(bn).isin(pd.Series(json_lbls2019).apply(bn))
img_pths2019j = list(pd.Series(img_pths2019)[img_pths2019_w_json_idx])
assert len(img_pths2019j) == len(json_lbls2019)
print(len(img_pths2019j))

img_pths2020 = glob.glob(r"Z:\__Organized_Directories_InProgress\Annotated_Images\fish\finalized_RGB\2020*\Original_Labeled_Images_RGB\*.png")
json_lbls2020 = glob.glob(r"Z:\__Organized_Directories_InProgress\Annotated_Images\fish\finalized_RGB\2020*\Labeled_JSON_Files\*.json")
assert len(img_pths2020) == len(json_lbls2020)
print(len(img_pths2020))

## 2021 all have bboxes in the json files
img_pths2021_huron = glob.glob(r"Z:\2021_MA\LakeHuron\_LABALED\*\Original_Labeled_Images_RGB\*.png")
json_lbls2021_huron = glob.glob(r"Z:\2021_MA\LakeHuron\_LABALED\*\json_files\*.json")
assert len(img_pths2021_huron) == len(json_lbls2021_huron)
print(len(img_pths2021_huron))

img_pths = img_pths2019j + img_pths2020 + img_pths2021_huron
jsn_pths = json_lbls2019 + json_lbls2020 + json_lbls2021_huron
'''
598
26497
6239
'''
# generate_labels(img_pth_lst=img_pths, jsn_pth_lst=jsn_pths).json_to_img_lbls(save_path = 'datasets_2019-2021_all/labels_from_json')

### LABEL GEN - .PNG MASKS

In [None]:
## Generating ratio labels from BINARY png masks

## 2019 mask overlays
## some 2019- and all 2020 json files contain only masks others have json bboxes. Some are coc json
img_pths2019 = glob.glob(r"Z:\__Organized_Directories_InProgress\Annotated_Images\fish\finalized_RGB\2019*\Original_Labeled_Images_RGB\*.png")
msk_lbls2019 = glob.glob(r"Z:\__Organized_Directories_InProgress\Annotated_Images\fish\finalized_RGB\2019*\Fish_Binary_Masks\*.png")
# ## finding the boolean idx of images with corresponding json files
# bn = lambda f: str(os.path.basename(f)).split(".")[0]
# img_pths2019_w_msk_idx = pd.Series(img_pths2019).apply(bn).isin(pd.Series(json_lbls2019).apply(bn))
# img_pths2019 = list(pd.Series(img_pths2019)[img_pths2019_w_msk_idx])
assert len(img_pths2019) == len(msk_lbls2019)
print(len(img_pths2019))

img_pths2020 = glob.glob(r"Z:\__Organized_Directories_InProgress\Annotated_Images\fish\finalized_RGB\2020*\Original_Labeled_Images_RGB\*.png")
msk_lbls2020 = glob.glob(r"Z:\__Organized_Directories_InProgress\Annotated_Images\fish\finalized_RGB\2020*\Fish_Binary_Masks\*.png")
assert len(img_pths2020) == len(msk_lbls2020)
print(len(img_pths2020))

img_pths = img_pths2019 + img_pths2020 
msk_pths = msk_lbls2019 + msk_lbls2020
''' 
2147
26497
'''
# generate_labels(img_pth_lst  = img_pths, 
#                 lbl_msk_pths = msk_pths).lbl_masks_to_img_lbls(save_path = 'tst_lbls', color=None)

In [2]:
## move labels from json and labels from mask to the same folder but make sure the better of the two are kept
# filepath_json = r"D:\ageglio-1\gobyfinder_yolov8\datasets_2019-2021_all\labels_from_json"
# filepath_mask = r"D:\ageglio-1\gobyfinder_yolov8\datasets_2019-2021_all\labels_from_masks"
# filepath_all = r"D:\ageglio-1\gobyfinder_yolov8\datasets_2019-2021_all\labels"


# labels_in_json = os.listdir(r"D:\ageglio-1\gobyfinder_yolov8\datasets_2019-2021_all\labels_from_json")
# labels_in_mask = os.listdir(r"D:\ageglio-1\gobyfinder_yolov8\datasets_2019-2021_all\labels_from_masks")

def combine_labels(filepath_json, filepath_mask, filepath_all, labels_in_json, labels_in_mask):
    labels_common = list(set(labels_in_mask).intersection(labels_in_json))
    labels_mask_not_json = list(set(labels_in_mask).difference(labels_in_json))
    labels_json_not_mask = list(set(labels_in_json).difference(labels_in_mask))

    print(len(labels_mask_not_json))
    for label in labels_mask_not_json:
        file_mask = os.path.join(filepath_mask, label)
        shutil.copy(file_mask, filepath_all)

    print(len(labels_json_not_mask))
    for label in labels_json_not_mask:
        file_json = os.path.join(filepath_json, label)
        shutil.copy(file_json, filepath_all)

    print(len(labels_common))
    for label in labels_common:
        file_json = os.path.join(filepath_json, label)
        file_mask = os.path.join(filepath_mask, label)
        # Json labels overwrite the mask labels because they include individual boxes for overlapping fish
        if os.path.getsize(file_json) >= os.path.getsize(file_mask):
            shutil.copy(file_json, filepath_all)
        else:
            shutil.copy(file_mask, filepath_all)

## label masks to contours

In [None]:
# i = 1
# df = generate_labels(img_pth_lst  = img_pths, lbl_msk_pths = msk_pths).lbl_masks_to_contours(save_path = 'tst_lbls2')