### Create the folder following YOLO tree folder structure

In [None]:
import os 
import shutil
from sklearn.model_selection import train_test_split
import glob
import json

## import the function
from src.utils.util import load_config, load_json, create_list_files
from src.utils.preprocessing import create_yolo_tree_structure



In [2]:
config = load_config("src/config/config.yaml")

In [5]:
list_images = create_list_files(
    os.path.join(config['datasets']['root_folder'], config['datasets']['name_tile_folder']),
    glob = "*.tif"
)

list_bitmap = create_list_files(
    os.path.join(config['datasets']['root_folder'], config['datasets']['name_bitmap_folder']),
    glob = "*.png"
)

list_labels = create_list_files(
    os.path.join(config['datasets']['root_folder'], config['datasets']['name_labels_folder']),
    glob = "*.txt"
)

list_metadata = create_list_files(
    os.path.join(config['datasets']['root_folder'], config['datasets']['name_metadata_folder']),
    glob = "*.json"
)

### Check for Valid Images

Analyse the metadata and see if the percentange of black and white pixels summed are higher than the threshold

In [33]:
threshold = 0.98
list_non_valid_img = []
list_valid_img = []
for metadata in list_metadata:
    with open(metadata,'r') as f:
        f = json.load(f)
        if f["sum_black_white"] >= threshold:
            list_non_valid_img.append(metadata)
        else:
            list_valid_img.append(metadata)

print(f"The whole folder has {len(list_metadata)} files.")
print(f"INVALIDS: There was found {len(list_non_valid_img)} files higher than the threshold")

The whole folder has 817 files.
INVALIDS: There was found 304 files higher than the threshold


### Train - Test Split

In [116]:
## Here it is finding all image inside the folder BITMAP 
## looking for the same image file name at the folder "NAME TILE FOLDER"
## and returning the whole path for the image

## Basically, it is matching the images existent on the folder BITMAP with the folder TILE
name_tile_folder = config['datasets']['name_tile_folder']
list_images_train_test = [os.path.join(f.parent.parent.joinpath(name_tile_folder),
                            f.stem+".tif") for f in list_bitmap]

print(f"There are {len(list_images_train_test)}  images ready to be train and tested")
print(f" The size of the bitmap image is: {len(list_bitmap)}")
print(f"The total number of images is: {len(list_images)}")

There are 72  images ready to be train and tested
 The size of the bitmap image is: 72
The total number of images is: 817


- Apply the function to create the folder structure

In [None]:
from pathlib import Path
output_folder = Path("/mnt/d/desktop/drone-mapping/data/new_tiling/YOLO")

create_yolo_tree_structure(list_imgs = list_images_train_test,
                           name_folder_label = config['datasets']['name_labels_folder'],
                           output_dir=output_folder,
                           random_state=42,
                           test_size=0.1)


Creating YOLO dataset structure in: /mnt/d/desktop/drone-mapping/data/new_tiling/YOLO
Total images: 72
Train/Test split: 90% / 10%

  TRAIN set:
   Images copied: 64
   Labels copied: 64
  TEST set:
   Images copied: 8
   Labels copied: 8


{'train': {'images': 64, 'labels': 64, 'missing_labels': 0},
 'test': {'images': 8, 'labels': 8, 'missing_labels': 0}}