In [1]:
import os
import shutil as sh
import numpy as np


In [2]:
dataset_dir = "../data/Dataset/"

In [3]:
experts = ['Ptg1', 'Ptg2', 'Ptg3', 'Stud1', 'Stud2', 'Stud3', 'Stud4']

In [4]:
os.path.isfile("../data/Dataset/Images/Agreement/Explicit/Ptg1/1060.png")

True

In [5]:
def check_file(path, extention):
    return path.endswith("." + extention) and os.path.isfile(path)

In [6]:
def strip_images_dir_from_path(initial_path, images_dir):
    sep = os.path.sep
    unstripped = os.path.normpath(initial_path).split(sep)[len(os.path.normpath(images_dir).split(sep)):]
    return os.path.join(*unstripped)

def generate_new_path(initial_path, images_dir, lists_dir):
    meaningful_part = strip_images_dir_from_path(initial_path, images_dir)

    new_path = os.path.join(lists_dir, meaningful_part)
    return new_path

In [7]:
def generate_lists(source_dir, target_dir, filename, extention):
    for root, dirs, files in os.walk(source_dir, topdown=False):

        for name in dirs:
            dir_path = os.path.join(root, name)

            new_dir_path = generate_new_path(dir_path, source_dir, target_dir)
            if not os.path.exists(new_dir_path):
                os.makedirs(new_dir_path, exist_ok=True)
            files_paths = [os.path.relpath(os.path.join(dir_path,x), start=new_dir_path) for x in os.listdir(dir_path) if check_file(os.path.join(dir_path,x), extention)]
            if len(files_paths):
                with open(os.path.join(new_dir_path, ".".join([filename, "txt"])), "w+") as file:

                        lines_to_write = [str(x) for x in files_paths]
                        lines_to_write.sort()
                        lines_to_write = "\n".join(lines_to_write)
                        file.write(lines_to_write)

            print(new_dir_path, len(files_paths))


In [8]:
def labels_float_to_int(labels_dir, extention):
    for root, dirs, files in os.walk(labels_dir, topdown=False):

        for name in dirs:
            dir_path = os.path.join(root, name)

            for filename in os.listdir(dir_path):
                file_path = os.path.join(dir_path, filename)
                if check_file(file_path, extention):
                    with open(file_path, "r") as file:
                        lines = file.readlines()
                        new_lines = []
                        for line in lines:
                            line_contents = line.strip().split(" ")
                            line_rounded = tuple(str(np.round(float(x)).astype(int)) for x in line_contents)
                            line_rounded = " ".join(line_rounded)
                            new_lines.append(line_rounded)
                        new_lines = "\n".join(new_lines)
                    with open(file_path, "w") as file:
                        file.write(new_lines)
                    
            

In [9]:
import cv2
import albumentations as A
from endoanalysis.datasets import extract_images_and_labels_paths
from endoanalysis.datasets import load_keypoints

In [10]:
def load_keypoints(file_path, x_size, y_size):
    """
    Load keypoints from a specific file as tuples

    Parameters
    ----------
    file_path : str
        path to the file with keypoints
    x_size : int
        maximum x_value. If keypoint coordinate is greater  or equal
        to x_size, it will be set to be x_size - 1
    y_size : int
        the same as x_size, but for y
    Returns
    -------
    keypoints : list of tuples
        list of keypoint tuples in format (x, y, obj_class)

    Note
    ----
    This function serves as helper for the pointdet.utils.dataset.PointsDataset class
    and probably should be moved there
    """

    keypoints = []

    with open(file_path, "r") as labels_file:
        for line in labels_file:
            line_contents = line.strip().split(" ")
            line_floated = tuple(np.round(float(x)).astype(int) for x in line_contents)
            x_center, y_center, obj_class = tuple(line_floated)
            if x_center >= x_size:
                x_center -= x_size - 1
            if y_center == y_size:
                y_center -= 1
            keypoint = x_center, y_center, obj_class
            keypoints.append(keypoint)

    return keypoints

In [11]:
def resize_dataset(images_list,labels_list, target_size=(256,256)):
    
    transorm = A.Compose([A.Resize(height=target_size[0], width=target_size[1])], keypoint_params=A.KeypointParams(format="xy"))
    
    if type(images_list) != list:
        images_list = [images_list]
        labels_list = [labels_list]
    images_paths = []
    labels_paths = []
    for images_list_path, labels_list_path in zip(images_list, labels_list):
        images_paths_current, labels_paths_current = extract_images_and_labels_paths(images_list_path, labels_list_path)
        images_paths += images_paths_current
        labels_paths +=labels_paths_current

    for image_path, labels_path in zip(images_paths, labels_paths):
        print(image_path)                                                                  
        image = cv2.imread(image_path)
        keypoints = load_keypoints(labels_path, image.shape[1], image.shape[0])
        
    
        if keypoints:
            keypoints = np.array(keypoints)
            coords = keypoints[:,0:2]
            classes = keypoints[:,2]
        else: 
            coords = []
        transformed = transorm(image = image, keypoints=coords)

        cv2.imwrite(image_path, transformed["image"])

        labels_lines = [
            " ".join([str(int(y)) for y in label] + [str(class_id)]) + " \n"
            for label, class_id in zip(transformed["keypoints"], classes)
            ]

        os.remove(labels_path)

        with open(labels_path, "w+") as labels_file:
            labels_file.writelines(labels_lines)

In [12]:
lists_dir = os.path.join(dataset_dir, "files_lists")
if os.path.exists(lists_dir):
    sh.rmtree(lists_dir)
images_dir = os.path.join(dataset_dir, "Images")
labels_dir = os.path.join(dataset_dir, "Labels") 
metadata_dir = os.path.join(dataset_dir, "Metadata")
os.mkdir(lists_dir)

generate_lists(images_dir, lists_dir, "images", extention="png")
generate_lists(labels_dir, lists_dir, "labels", extention="txt")
generate_lists(labels_dir, lists_dir, "metadata", extention="json")


../data/Dataset/files_lists/Bulk/Students 1212
../data/Dataset/files_lists/Bulk/Pathologists 178
../data/Dataset/files_lists/Agreement/Hidden/Incomplete/Stud3 12
../data/Dataset/files_lists/Agreement/Hidden/Incomplete/Ptg2 4
../data/Dataset/files_lists/Agreement/Hidden/Incomplete/Ptg3 12
../data/Dataset/files_lists/Agreement/Hidden/Incomplete/Stud2 15
../data/Dataset/files_lists/Agreement/Hidden/Incomplete/Stud4 10
../data/Dataset/files_lists/Agreement/Hidden/Incomplete/Ptg1 6
../data/Dataset/files_lists/Agreement/Hidden/Incomplete/Stud1 10
../data/Dataset/files_lists/Agreement/Hidden/Complete/Stud3 20
../data/Dataset/files_lists/Agreement/Hidden/Complete/Ptg3 20
../data/Dataset/files_lists/Agreement/Hidden/Complete/Stud2 20
../data/Dataset/files_lists/Agreement/Hidden/Complete/Stud4 20
../data/Dataset/files_lists/Agreement/Hidden/Complete/Stud1 20
../data/Dataset/files_lists/Agreement/Hidden/Incomplete 0
../data/Dataset/files_lists/Agreement/Hidden/Complete 0
../data/Dataset/files_lis

In [13]:
images_lists = []
labels_lists = []

for root, dirs, files in os.walk("../data/Dataset/files_lists/", topdown=False):
    for name in dirs:
        dir_path = os.path.join(root, name)

        
    for file in files:
        if file.endswith(".txt"):
            if file.startswith("images"):
                images_lists.append(os.path.join(root, file))
            elif file.startswith("labels"):
                labels_lists.append(os.path.join(root, file))
                
resize_dataset(images_lists, labels_lists)

../data/Dataset/Images/Bulk/Students/100.png
../data/Dataset/Images/Bulk/Students/1014.png
../data/Dataset/Images/Bulk/Students/1028.png
../data/Dataset/Images/Bulk/Students/1029.png
../data/Dataset/Images/Bulk/Students/1030.png
../data/Dataset/Images/Bulk/Students/1032.png
../data/Dataset/Images/Bulk/Students/1041.png
../data/Dataset/Images/Bulk/Students/1042.png
../data/Dataset/Images/Bulk/Students/1048.png
../data/Dataset/Images/Bulk/Students/1058.png
../data/Dataset/Images/Bulk/Students/1061.png
../data/Dataset/Images/Bulk/Students/1081.png
../data/Dataset/Images/Bulk/Students/1094.png
../data/Dataset/Images/Bulk/Students/1097.png
../data/Dataset/Images/Bulk/Students/1120.png
../data/Dataset/Images/Bulk/Students/1122.png
../data/Dataset/Images/Bulk/Students/1126.png
../data/Dataset/Images/Bulk/Students/1127.png
../data/Dataset/Images/Bulk/Students/1128.png
../data/Dataset/Images/Bulk/Students/1133.png
../data/Dataset/Images/Bulk/Students/1138.png
../data/Dataset/Images/Bulk/Student