In [None]:
!pip install opencv-python-headless
!pip install alive-progress

# Dataset Conversion
This notebook is used to create a dataset split into training and validation data. By default it will do an 70/30 split and copy the files into the yolo/dataset folder.
It also creates polyon annotation from the masks for yolo segmentation. This won´t work for multi instance segmentation datasets.

In [None]:
import os
import yaml
import numpy as np

from prepare_dataset_functions import copy_files, convert_mask_to_poly_annotation, split_folder_content

## Copy to separate folders

In [None]:
input_folder_path = "./original-brain-mri/"

output_folder_images = "./data_temp/images"
output_folder_masks = "./data_temp/masks"
output_folder_polys = "./data_temp/polys"

os.makedirs(output_folder_images, exist_ok=True)
os.makedirs(output_folder_masks, exist_ok=True)
os.makedirs(output_folder_polys, exist_ok=True)

# Seperate files in folder mask/images
copy_files(input_folder_path, output_folder_images, output_folder_masks)

## Convert masks to polys

In [None]:
convert_mask_to_poly_annotation(output_folder_masks, output_folder_polys)

## Create split dataset


In [None]:
yolo_dataset_dir = 'yolo/datasets'
dataset_name = 'yolo_mri_brain'

dataset_dir = os.path.join(yolo_dataset_dir, dataset_name)

# Specify the percentage where val and test will split
split_percentage_val = 0.6
split_percentage_test = 0.9

image_file_ending = '.tif'

split_folder_content(output_folder_images, output_folder_polys, output_folder_masks, dataset_dir, split_percentage_val, split_percentage_test, image_file_ending)

## Create yaml file for yolo

In [None]:
yaml_file_name = 'mri_brain.yaml'
labels = [(0,'tumor')]

data = dict(
    path = '/usr/share/kfpv1-workshop/brain-mri/notebooks_data_preparation/yolo/datasets/'+dataset_name,
    train = 'train/images',
    val = 'valid/images',
    test = 'test/images',
    names = dict(labels)
)

with open(os.path.join(yolo_dataset_dir, yaml_file_name), 'w') as outfile:
    yaml.dump(data, outfile, default_flow_style=False)
