# Install Ultralytics in Colab

In [None]:
pip install ultralytics



# Imports

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os

In [None]:
from ultralytics import YOLO
from ultralytics.data.utils import autosplit

# Split and Merge Functions

**IMPORTANT:** For `DATASET_DIR` to work, you need to:
1. Create a shortcut to the Shared Drive
2. Add it to the root directory of your Drive (ex. `"/content/drive/MyDrive/Verizon ML Project"`)
3. Upload your images and text files **TOGETHER** in the dataset folder

In [None]:
PROJECT_DIR = "/content/drive/MyDrive/Verizon ML Project"
DATASET_DIR = os.path.join(PROJECT_DIR, "dataset")

split_paths = set() # element => (img_path, label_path)

def split():

    # split dataset into train, val, and test
    autosplit(
        path=DATASET_DIR,
        weights=(0.7, 0.2, 0.1),
        annotated_only=True # only images w/ .txt files are split
    )

    # create images and labels directories
    images_path = os.path.join(DATASET_DIR, "images")
    labels_path = os.path.join(DATASET_DIR, "labels")
    try:
        images_dir = os.mkdir(images_path)
        labels_dir = os.mkdir(labels_path)
    except:
        pass

    # key (folder) => value (corresponding .txt from autosplit)
    splits = {"train":"autosplit_train.txt",
              "val":"autosplit_val.txt",
              "test":"autosplit_test.txt"}

    for split_type, split_txt in splits.items():

        # create partitioned directories (ex. ./dataset/images/train)
        split_img_path = os.path.join(images_path, split_type)
        split_label_path = os.path.join(labels_path, split_type)
        split_paths.add((split_img_path, split_label_path))
        try:
            split_img_dir = os.mkdir(split_img_path)
            split_label_dir = os.mkdir(split_label_path)
        except:
            pass

        # move data into partitions
        f = open(os.path.join(PROJECT_DIR, split_txt))
        for img_path in f:

            img_path = os.path.join(PROJECT_DIR, img_path.strip()) # remove \n
            img_name = img_path.split('/')[-1] # CaseDesign_PhoneModels_Number.png

            # change .png or .jpg (from autosplit paths) to .txt
            txt_path = img_path.replace(".png", ".txt").replace(".jpg", ".txt")
            txt_name = txt_path.split('/')[-1] # CaseDesign_PhoneModels_Number.txt

            # move files
            try:
                new_img_path = os.path.join(split_img_path, img_name)
                new_txt_path = os.path.join(split_label_path, txt_name)
                os.rename(img_path, new_img_path)
                os.rename(txt_path, new_txt_path)
            except:
                pass

def is_valid_split():
    for img_path, label_path in split_paths:
        img_files = set( [ f.replace(".png", "").replace(".jpg","") for f in os.listdir(img_path) ] )
        label_files = set( [ f.replace(".txt","") for f in os.listdir(label_path) ] )
        if img_files != label_files:
            return False
    return True

def merge():
    for img_path, label_path in split_paths:
        for f in os.listdir(img_path):
            os.rename(os.path.join(img_path, f), os.path.join(DATASET_DIR, f))
        for f in os.listdir(label_path):
            os.rename(os.path.join(label_path, f), os.path.join(DATASET_DIR, f))

In [None]:
split()

Autosplitting images from /content/drive/MyDrive/Verizon ML Project/dataset, using *.txt labeled images only


100%|██████████| 293/293 [00:02<00:00, 121.72it/s]


In [None]:
is_valid_split()

True

In [None]:
merge()