# imports

In [69]:
import os
import shutil
import sys

import pandas as pd
from sklearn.model_selection import train_test_split
from tqdm import tqdm

path = sys.path[0]

# loading data

In [70]:
labels = pd.read_csv(os.path.join(path, 'data/classes/types_causes.csv'))
labels.drop(len(labels) - 1, axis=0, inplace=True)  # getting tid 1095.png

# creating folders

## train val test

In [71]:
types = list(labels["Type"].value_counts().index)
folders = ["images", "annotations"]
sets = ["train", "val", "test"]
base_path = os.path.join(path, "datasets/")
for folder_ in folders:
    os.makedirs(os.path.join(base_path, folder_), exist_ok=True)
    for set_ in sets:
        os.makedirs(os.path.join(base_path, folder_, set_), exist_ok=True)

## classes

In [72]:
for folder_ in folders:
    for set_ in sets:
        for type_ in types:
            path_ = os.path.join(base_path, folder_, set_, type_)
            os.makedirs(path_, exist_ok=True)

# split and transfer

In [73]:
x_train, x_val = train_test_split(labels, test_size=0.2, random_state=42)
x_train, x_test = train_test_split(x_train, test_size=0.2, random_state=42)
print(
    "You will have:\n\t{} train images, \n\t {} validation images, \n\t {} test images".format(len(x_train), len(x_val),
                                                                                               len(x_test)))

You will have:
	699 train images, 
	 219 validation images, 
	 175 test images


In [74]:
sets = {
    "train": x_train,
    "val": x_val,
    "test": x_test
}

source_paths = {
    "images": "/home/younes/Desktop/projects/feature_extraction/data/images",
    "annotations": "/home/younes/Desktop/projects/feature_extraction/data/annotations"
}

for folder_ in folders:
    for set_ in sets:
        # print("Working on {}".format(set_))
        indexes = sets[set_].index
        for index_ in tqdm(indexes):
            image = sets[set_].loc[index_, "Image"]
            # print("\tTransferring image {}".format(image))
            type = sets[set_].loc[index_, "Type"]
            # transferring image
            source_path = os.path.join(source_paths[folder_], image)
            dest_path = os.path.join(base_path, folder_, set_, type, image)
            shutil.copy(source_path, dest_path)
            # print("\t\tDone!")


100%|██████████| 699/699 [00:10<00:00, 67.79it/s]
100%|██████████| 219/219 [00:02<00:00, 75.71it/s]
100%|██████████| 175/175 [00:02<00:00, 73.15it/s]
100%|██████████| 699/699 [00:00<00:00, 5685.60it/s]
100%|██████████| 219/219 [00:00<00:00, 6555.05it/s]
100%|██████████| 175/175 [00:00<00:00, 6329.85it/s]
