# Need Drive mounted

In [0]:
import os
import glob
import shutil

from tqdm import tqdm

In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [4]:
%cd "drive/My Drive/labelisation"

/content/drive/My Drive/labelisation


## Get images from labels

In [0]:
def get_images(labels_folder: str, imgs_folder: str, mode: str = "copy"):
    """
    Copy all images from imgs_folder with the same name
    as all labels in labels_folder to labels_folder (dir)
    """
    labels_folder = os.path.join(labels_folder, '')
    imgs_folder = os.path.join(imgs_folder, '')

    labels = [os.path.splitext(i)[0] for i in os.listdir(labels_folder) if os.path.splitext(i)[1] == '.txt']
    labels_jpg = [os.path.splitext(i)[0] for i in os.listdir(labels_folder) if os.path.splitext(i)[1] == '.jpg']
    imgs = [os.path.splitext(i)[0] for i in os.listdir(imgs_folder) if os.path.splitext(i)[1] == '.jpg']

    c = 0
    missing = []

    if mode == "copy":
        for l in tqdm(labels, desc="Looking for files "):
            if l in imgs and l not in labels_jpg:
                shutil.copy(f"{imgs_folder}{l}.jpg", f"{labels_folder}{l}.jpg")
                c += 1
            else:
                missing.append(l)

    elif mode == "move":
        for l in tqdm(labels, desc="Looking for files "):
            if l in imgs and l not in labels_jpg:
                shutil.move(f"{imgs_folder}{l}.jpg", f"{labels_folder}{l}.jpg")
                c += 1
            else:
                missing.append(l)

    else:
        print("Wrong mode. Only 'move' or 'copy'")
        return 
        

    print(f"\n\n{c}/{len(labels)} images found.\n")
    if len(missing) > 0:
        print("Missing images:", missing)

#### WIDGET!

In [23]:
#@title get_images
labels_folder = "armoire_pmz/done/" #@param ["armoire_pmz/done/", "gauche_milieu_droite/done/"]
imgs_folder = "/content/drive/My Drive/multilabel/train/" #@param {type:"string"}
mode = "copy" #@param ["copy", "move"]


get_images(
    labels_folder=labels_folder,
    imgs_folder=imgs_folder,
    mode=mode
)

Looking for files : 100%|██████████| 309/309 [01:59<00:00,  2.58it/s]



309/309 images found.






In [25]:
len(glob.glob("armoire_pmz/done/*.jpg"))

309

## Check duplicates

In [0]:
def check_duplicate(done_folder: str, todo_folder: str):
    """
    Check if images in todo_folder are already done
    """
    done_folder = os.path.join(done_folder, '')
    todo_folder = os.path.join(todo_folder, '')

    done = [os.path.splitext(i)[0] for i in os.listdir(done_folder) if os.path.splitext(i)[1] == '.jpg']
    todo = [os.path.splitext(i)[0] for i in os.listdir(todo_folder) if os.path.splitext(i)[1] == '.jpg']

    c = 0
    dup = []

    for t in tqdm(todo, desc="Checking for duplicates "):
        if t in done:
             c += 1
             dup.append(t)

    print(f"\n\n{c} duplicates images found.\n")
    if len(dup) > 0:
        print("Duplicates filenames:", dup)

#### WIDGET!

In [0]:
#@title check_duplicate
done_folder = "armoire_pmz/done/" #@param ["gauche_milieu_droite/done/", "armoire_pmz/done/"]
todo_folder = "armoire_pmz/todo/" #@param ["gauche_milieu_droite/todo/", "armoire_pmz/todo/"]


check_duplicate(
    done_folder=done_folder,
    todo_folder=todo_folder
)

Checking for duplicates : 0it [00:00, ?it/s]



0 duplicates images found.






## Get unlabeled images from folder

In [0]:
def get_unlabeled(done_folder: str, imgs_folder: str, todo_folder: str):
    """
    Get unlabeled images from images_folder that are not in done_folder
    and copy them in todo_folder
    """
    done_folder = os.path.join(done_folder, '')
    imgs_folder = os.path.join(imgs_folder, '')
    todo_folder = os.path.join(todo_folder, '')

    done = [os.path.splitext(i)[0] for i in os.listdir(done_folder) if os.path.splitext(i)[1] == '.txt']
    imgs = [os.path.splitext(i)[0] for i in os.listdir(imgs_folder) if os.path.splitext(i)[1] == '.jpg']
    todos = [os.path.splitext(i)[0] for i in os.listdir(todo_folder) if os.path.splitext(i)[1] == '.jpg']

    c = 0

    for img in tqdm(list(set(imgs)^set(done)), desc="Checking for unlabeled images "):
        if img not in todos:
            shutil.copy(f"{imgs_folder}{img}.jpg", f"{todo_folder}{img}.jpg")
            c += 1

    print(f"\n\n{c} unlabeled images found.\n")

#### WIDGET!

In [16]:
#@title get_unlabeled
done_folder = "gauche_milieu_droite/done/" #@param ["gauche_milieu_droite/done/", "armoire_pmz/done/"]
imgs_folder = "/content/drive/My Drive/multilabel/train/" #@param {type:"string"}
todo_folder = "gauche_milieu_droite/todo/" #@param ["gauche_milieu_droite/todo/", "armoire_pmz/todo/"]


get_unlabeled(
    done_folder=done_folder,
    imgs_folder=imgs_folder,
    todo_folder=todo_folder
)

Checking for unlabeled images : 100%|██████████| 752/752 [01:08<00:00, 10.99it/s]



124 unlabeled images found.






In [19]:
len(glob.glob('gauche_milieu_droite/todo/*.jpg'))

752