In [18]:
from os import path, makedirs
import os

"""
For the given path, get the List of all files in the directory tree 
https://thispointer.com/python-how-to-get-list-of-files-in-directory-and-sub-directories/
"""


def GetFileList(dirName, endings=[".jpg", ".jpeg", ".png", ".mp4"]):
    # create a list of file and sub directories
    # names in the given directory
    listOfFile = os.listdir(dirName)
    allFiles = list()
    # Make sure all file endings start with a '.'

    for i, ending in enumerate(endings):
        if ending[0] != ".":
            endings[i] = "." + ending
    # Iterate over all the entries
    for entry in listOfFile:
        # Create full path
        fullPath = os.path.join(dirName, entry)
        # If entry is a directory then get the list of files in this directory
        if os.path.isdir(fullPath):
            allFiles = allFiles + GetFileList(fullPath, endings)
        else:
            for ending in endings:
                if entry.endswith(ending):
                    allFiles.append(fullPath)
    return allFiles


def ChangeToOtherMachine(filelist, repo="TrainYourOwnYOLO", remote_machine=""):
    """
    Takes a list of file_names located in a repo and changes it to the local machines file names. File must be executed from withing the repository
    Example:
    '/home/ubuntu/TrainYourOwnYOLO/Data/Street_View_Images/vulnerable/test.jpg'
    Get's converted to
    
    'C:/Users/Anton/TrainYourOwnYOLO/Data/Street_View_Images/vulnerable/test.jpg'
    """
    filelist = [x.replace("\\", "/") for x in filelist]
    if repo[-1] == "/":
        repo = repo[:-1]
    if remote_machine:
        prefix = remote_machine.replace("\\", "/")
    else:
        prefix = ((os.path.dirname(os.path.abspath(__file__)).split(repo))[0]).replace(
            "\\", "/"
        )
    new_list = []

    for file in filelist:
        suffix = (file.split(repo))[1]
        if suffix[0] == "/":
            suffix = suffix[1:]
        new_list.append(os.path.join(prefix, repo + "/", suffix).replace("\\", "/"))
    return new_list

In [30]:
from os import path, makedirs
import pandas as pd
import numpy as np
import re
import os
from PIL import Image


def convert_vott_csv_to_yolo(
    vott_df,
    labeldict,
    path="",
    target_name="data_train.txt",
    abs_path=False,
):

    # Encode labels according to labeldict if code's don't exist
    label_names = [
        "Standing",
        "Walking" ]
    if not "code" in vott_df.columns:
        vott_df["code"] = vott_df["label"].apply(lambda x: labeldict[x])
    # Round float to ints
    for col in vott_df[["xmin", "ymin", "xmax", "ymax"]]:
        vott_df[col] = (vott_df[col]).apply(lambda x: round(x))

    # Create Yolo Text file
    last_image = ""
    txt_file = ""

    for index, row in vott_df.iterrows():
        if not last_image == row["image"]:
            if abs_path:
                txt_file += "\n" + row["image_path"] + " "
            else:
                txt_file += "\n" + os.path.join(path, row["image"]) + " "
            txt_file += ",".join(
                [
                    str(x)
                    for x in (row[["xmin", "ymin", "xmax", "ymax", "code"]].tolist())
                ]
            )
        else:
            txt_file += " "
            txt_file += ",".join(
                [
                    str(x)
                    for x in (row[["xmin", "ymin", "xmax", "ymax", "code"]].tolist())
                ]
            )
        last_image = row["image"]
    file = open(target_name, "w")
    file.write(txt_file[1:])
    file.close()
    return True


def csv_from_xml(directory, path_name=""):
    # First get all images and xml files from path and its subfolders
    label_names = [
        "Standing",
        "Walking" ]
    image_paths = GetFileList(directory, ".jpg")
    xml_paths = GetFileList(directory, ".xml")
    result_df = pd.DataFrame()
    if not len(image_paths) == len(xml_paths):
        print("number of annotations doesnt match number of images")
        return False
    for image in image_paths:
        target_filename = os.path.join(path_name, image) if path_name else image
        source_filename = os.path.join(directory, image)
        y_size, x_size, _ = np.array(Image.open(source_filename)).shape
        source_xml = image.replace(".jpg", ".xml")
        txt = open(source_xml, "r").read()
        y_vals = re.findall(r"(?:x>\n)(.*)(?:\n</)", txt)
        ymin_vals = y_vals[::2]
        ymax_vals = y_vals[1::2]
        x_vals = re.findall(r"(?:y>\n)(.*)(?:\n</)", txt)
        xmin_vals = x_vals[::2]
        xmax_vals = x_vals[1::2]
        label_vals = re.findall(r"(?:label>\n)(.*)(?:\n</)", txt)
        label_name_vals = re.findall(r"(?:labelname>\n)(.*)(?:\n</)", txt)
        df = pd.DataFrame()
        df["xmin"] = xmin_vals
        df["xmin"] = df["xmin"].astype(float) * x_size
        df["ymin"] = ymin_vals
        df["ymin"] = df["ymin"].astype(float) * y_size
        df["xmax"] = xmax_vals
        df["xmax"] = df["xmax"].astype(float) * x_size
        df["ymax"] = ymax_vals
        df["ymax"] = df["ymax"].astype(float) * y_size
        df["label"] = label_name_vals
        df["code"] = label_vals
        df["image_path"] = target_filename
        df["image"] = os.path.basename(target_filename)
        result_df = result_df.append(df)
    #     Bring image column first
    cols = list(df.columns)
    cols = [cols[-1]] + cols[:-1]
    result_df = result_df[cols]
    return result_df


def crop_and_save(
    image_df,
    target_path,
    target_file,
    one=True,
    label_dict={0: "house"},
    postfix="cropped",
):
    """Takes a vott_csv file with image names, labels and crop_boxes
    and crops the images accordingly
    
    Input csv file format:
    
    image   xmin ymin xmax ymax label
    im.jpg  0    10   100  500  house
    
    Parameters
    ----------
    df : pd.Dataframe 
        The input dataframe with file_names, bounding box info
        and label
    source_path : str
        Path of source images
    target_path : str, optional
        Path to save cropped images
    one : boolean, optional
        if True, only the most central house will be returned
    Returns
    -------
    True if completed succesfully
    """
    if not path.isdir(target_path):
        makedirs(target_path)

    previous_name = ""
    counter = 0
    image_df.dropna(inplace=True)
    image_df["image_path"] = ChangeToOtherMachine(image_df["image_path"].values)

    def find_rel_position(row):
        current_name = row["image_path"]
        x_size, _ = Image.open(current_name).size
        x_centrality = abs((row["xmin"] + row["xmax"]) / 2 / x_size - 0.5)
        return x_centrality

    if one:
        centrality = []
        for index, row in image_df.iterrows():
            centrality.append(find_rel_position(row))
        image_df["x_centrality"] = pd.Series(centrality)
        image_df.sort_values(["image", "x_centrality"], inplace=True)
        image_df.drop_duplicates(subset="image", keep="first", inplace=True)
    new_paths = []
    for index, row in image_df.iterrows():
        current_name = row["image_path"]
        if current_name == previous_name:
            counter += 1
        else:
            counter = 0
        imageObject = Image.open(current_name)
        cropped = imageObject.crop((row["xmin"], row["ymin"], row["xmax"], row["ymax"]))
        label = row["label"]
        if type(label) == int:
            label = label_dict[label]
        image_name_cropped = (
            "_".join([row["image"][:-4], postfix, label, str(counter)]) + ".jpg"
        )
        new_path = os.path.join(target_path, image_name_cropped)
        cropped.save(new_path)
        new_paths.append(new_path.replace("\\", "/"))
        previous_name = current_name
    pd.DataFrame(new_paths, columns=["image_path"]).to_csv(target_file)
    return True


if __name__ == "__main__":
    # Prepare the houses dataset for YOLO
    label_names = [
        "Standing",
        "Walking" ]
    num_list=[1,2]
    labeldict = dict(zip(label_names, num_list))
    multi_df = pd.read_csv(
        "C:\\Users\\Desktop\\Dataset-SWImages\\vott-csv-export"
    )
    multi_df.drop_duplicates(subset=None, keep="first", inplace=True)
    convert_vott_csv_to_yolo(
        multi_df,
        labeldict,
        path="C:\\Users\\Desktop\\Dataset-SWImages",
        target_name="data_train.txt",
    )

    path="C:\\Users\\Desktop\\Dataset-SWImages"
    label_names = [
        "Standing",
        "Walking" ,]
    convert_vott_csv_to_yolo(
        csv_from_xml(path, "C:\\Users\\Desktop\\Dataset-SWImages"), labeldict
    )

FileNotFoundError: [Errno 2] File b'C:\\Users\\Desktop\\Dataset-SWImages\\vott-csv-export' does not exist: b'C:\\Users\\Desktop\\Dataset-SWImages\\vott-csv-export'

In [5]:
from PIL import Image
from os import path, makedirs
import os
import re
import pandas as pd
import sys
import argparse


sys.path.append(os.path.join("C:\\Users\\pmdna\\Desktop\\Dataset-SWImages", "Utils"))
from Convert_Format import convert_vott_csv_to_yolo

Data_Folder = os.path.join(get_parent_dir(1), "Data")
VoTT_Folder = os.path.join(
    Data_Folder, "Source_Images", "Training_Images", "vott-csv-export"
)
VoTT_csv = os.path.join(VoTT_Folder, "Annotations-export.csv")
YOLO_filename = os.path.join(VoTT_Folder, "data_train.txt")

model_folder = os.path.join(Data_Folder, "Model_Weights")
classes_filename = os.path.join(model_folder, "data_classes.txt")

if __name__ == "__main__":
    # surpress any inhereted default values
    parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS)
    """
    Command line options
    """
    parser.add_argument(
        "--VoTT_Folder",
        type=str,
        default=VoTT_Folder,
        help="Absolute path to the exported files from the image tagging step with VoTT. Default is "
        + VoTT_Folder,
    )

    parser.add_argument(
        "--VoTT_csv",
        type=str,
        default=VoTT_csv,
        help="Absolute path to the *.csv file exported from VoTT. Default is "
        + VoTT_csv,
    )
    parser.add_argument(
        "--YOLO_filename",
        type=str,
        default=YOLO_filename,
        help="Absolute path to the file where the annotations in YOLO format should be saved. Default is "
        + YOLO_filename,
    )

    FLAGS = parser.parse_args()

    # Prepare the dataset for YOLO
    multi_df = pd.read_csv(FLAGS.VoTT_csv)
    labels = multi_df["label"].unique()
    labeldict = dict(zip(labels, range(len(labels))))
    multi_df.drop_duplicates(subset=None, keep="first", inplace=True)
    train_path = FLAGS.VoTT_Folder
    convert_vott_csv_to_yolo(
        multi_df, labeldict, path=train_path, target_name=FLAGS.YOLO_filename
    )

    # Make classes file
    file = open(classes_filename, "w")

    # Sort Dict by Values
    SortedLabelDict = sorted(labeldict.items(), key=lambda x: x[1])
    for elem in SortedLabelDict:
        file.write(elem[0] + "\n")
    file.close()

ModuleNotFoundError: No module named 'Convert_Format'