# Convert data to yolov5 Pytorch format

> **1. Set path for your data that you want to prepare.**

> **2. Break data in Train,test, validation data.**

> **3. Move to respected folders.**

> **4. Read .json file and prepare .yaml file accordingly.**

In [None]:
import os
import json
import yaml
import shutil
from sklearn.model_selection import train_test_split

In [None]:
IMAGES_PATH = "./data_testing/images"
LABELS_PATH = "./data_testing/labels"
NOTES_PATH = "./data_testing/data.json"

In [None]:
# Read labels
labels = os.listdir(LABELS_PATH)

# Split data
train, test = train_test_split(labels, test_size=0.15, shuffle=True)
valid, test = train_test_split(test, test_size=0.2)

print(f"train: {len(train)}; valid: {len(valid)}; test: {len(test)}")

In [None]:
os.makedirs("./data_testing/v1/test/images")
os.makedirs("./data_testing/v1/test/labels")
os.makedirs("./data_testing/v1/train/images")
os.makedirs("./data_testing/v1/train/labels")
os.makedirs("./data_testing/v1/valid/images")
os.makedirs("./data_testing/v1/valid/labels")

In [None]:
def move_files_to_dir(files, dirname):
    for label_filename in files:
        print(label_filename)
        image_filename = f"{label_filename[:-4]}.jpg"
        print(f"{IMAGES_PATH}/{image_filename}")
        shutil.copy(f"{IMAGES_PATH}/{image_filename}", f"{dirname}/images/{image_filename}")
        shutil.copy(f"{LABELS_PATH}/{label_filename}", f"{dirname}/labels/{label_filename}")

# Move splits to folders
move_files_to_dir(train, "./data_testing/v1/train")
move_files_to_dir(test, "./data_testing/v1/test")
move_files_to_dir(valid, "./data_testing/v1/valid")

In [None]:
descr_darknet = json.load(open(NOTES_PATH))

train_path = "./data/train/images"
test_path = "./data/test/images"
valid_path = "./data/valid/images"

nc = len(descr_darknet["categories"])
names = [category for category in descr_darknet["categories"]]

print(
    f"train: {train_path}\n"
    f"test: {test_path}\n"
    f"val: {valid_path}\n\n"
    f"nc: {nc}\n"
    f"names: {names}",
)

In [None]:
with open("./data_testing/data.yaml", "w") as file:
    yaml.dump({
        "train": train_path,
        "test": test_path,
        "val": valid_path,
        "nc": nc,
        "names": [f'{name}' for name in names]
    }, stream=file, default_flow_style=None)