# Data Utility notebook

### Kernel creation 

After creating env (**synap**) using **requirements.txt** file, use below command to create kernel for notebooks.
> python -m ipykernel install --user --name synap --display-name "synap"


### This notebook handles   
1. [Binary Label creation](#Convert-labels-to-binary-data )
2. [Data spliting](#Data-spliting )


In [9]:
import shutil
import os
import cv2
import glob
import matplotlib.pyplot as plt
import numpy as np
from tqdm.auto import tqdm

## Convert labels to binary data 
**0** : Background   
**1** : Coord Segmentation mask (naming spinal coord as "Coord")  

In [10]:
labels_dir = "../data/SegmentationClass_exp2" #input label directory
output_folder = "../data/transformed_labels" #output label directory
os.makedirs(output_folder,exist_ok = True)

In [11]:
image_paths = glob.glob(os.path.join(labels_dir,"*.png"))
np.random.shuffle(image_paths)
total_paths = len(image_paths)
print(f"Total label paths :{total_paths}")

Total label paths :11


In [12]:
#convert all the labels
for path in tqdm(image_paths):
    label = cv2.imread(path,0)
    label[label>0]=1
    cv2.imwrite(os.path.join(output_folder,os.path.basename(path)),label)

  0%|          | 0/11 [00:00<?, ?it/s]

## Data spliting  


Below cells splits data into **train, validation and test** folders

In [13]:
## Configs

train_r,val_r,test_r = 0.7,0.2,0.1

In [14]:
image_dir = "../data/original_images"
labels_dir = "../data/transformed_labels"

data_dir = "../data/training_data/"

x_train_dir = os.path.join(data_dir, 'train')
y_train_dir = os.path.join(data_dir, 'trainannot')

x_valid_dir = os.path.join(data_dir, 'val')
y_valid_dir = os.path.join(data_dir, 'valannot')

x_test_dir = os.path.join(data_dir, 'test')
y_test_dir = os.path.join(data_dir, 'testannot')

os.makedirs(x_train_dir,exist_ok = True)
os.makedirs(y_train_dir,exist_ok = True)
os.makedirs(x_valid_dir,exist_ok = True)
os.makedirs(y_valid_dir,exist_ok = True)
os.makedirs(x_test_dir,exist_ok = True)
os.makedirs(y_test_dir,exist_ok = True)

In [15]:
image_paths = glob.glob(os.path.join(image_dir,"*.png"))
np.random.shuffle(image_paths)
total_paths = len(image_paths)
print(f"Total label paths :{total_paths}")

Total label paths :11


In [22]:
## Train
for path in tqdm(image_paths[:int(train_r*total_paths)]):
    shutil.copy(path,os.path.join(x_train_dir,os.path.basename(path))) #copy img
    
    label_src_path = os.path.join(labels_dir,os.path.basename(path).split(".")[0]+".png")
    shutil.copy(label_src_path,os.path.join(y_train_dir,os.path.basename(path).split(".")[0]+".png")) #copy label
    

  0%|          | 0/7 [00:00<?, ?it/s]

In [21]:
## Validation
for path in tqdm(image_paths[int(train_r*total_paths):int(train_r*total_paths)+int(val_r*total_paths)]):
    shutil.copy(path,os.path.join(x_valid_dir,os.path.basename(path))) #copy img
    
    label_src_path = os.path.join(labels_dir,os.path.basename(path).split(".")[0]+".png")
    shutil.copy(label_src_path,os.path.join(y_valid_dir,os.path.basename(path).split(".")[0]+".png")) #copy label
    

  0%|          | 0/2 [00:00<?, ?it/s]

In [19]:
## Test
for path in tqdm(image_paths[int(train_r*total_paths)+int(val_r*total_paths):]):
    shutil.copy(path,os.path.join(x_test_dir,os.path.basename(path))) #copy img
    
    label_src_path = os.path.join(labels_dir,os.path.basename(path).split(".")[0]+".png")
    shutil.copy(label_src_path,os.path.join(y_test_dir,os.path.basename(path).split(".")[0]+".png")) #copy label
    

  0%|          | 0/2 [00:00<?, ?it/s]