In [31]:
import os
import shutil
from tqdm import tqdm

from random import shuffle
import time

import numpy as np

from sklearn.model_selection import KFold

from matplotlib import pyplot as plt
import cv2

In [32]:
dataset_name    = "wound_rajavithi_korean_medetec"
type_wound_name = "wound_tissue"

root            = os.path.join("..", "..", "..", "data", dataset_name, "wound_segmentation", type_wound_name)
source_dir      = os.path.join(root, "training_with_rotation_color")

# Train_Original = '/notebooks/VOLUME_1TB/Thesis_dataset/Train_Original'
Train_Original = source_dir
Train_Features = os.path.join(Train_Original, 'feature')
Train_Labels   = os.path.join(Train_Original, 'label')

for p in [
    Train_Original, Train_Features, Train_Labels
]:
    if not os.path.exists(p):
        raise Exception('%s not found.' % p)
        
Feature_paths = sorted( os.path.join(root, file) \
                     for root, _, files in os.walk(Train_Features) \
                     for file in files if 'check' not in root)

Label_paths   = sorted( os.path.join(root, file) \
                     for root, _, files in os.walk(Train_Labels) \
                     for file in files if 'check' not in root)

samples = list(zip(Feature_paths, Label_paths))

for feature, label in samples:
    f_name = feature.split(os.sep)[-1]
    l_name = label.split(os.sep)[-1]
    if f_name != l_name:
        raise Exception('Name not valid. (%s, %s)' % (f_name, l_name))
        
print('Samples: %d' % (len(samples)))

Samples: 16408


In [33]:
def get_k_fold(data, k):
    data = np.asarray(data)
    kf = KFold(n_splits=k)
    folds = []
    for train_index, test_index in kf.split(data):
        train = data[train_index].tolist()
        test  = data[test_index].tolist()
        folds.append((train, test))
    return folds
        
folds = get_k_fold(samples, 10)
print('Train\tTest\tTotal')
for fold in folds:
    train, test = fold
    print('%5d\t%4d\t%5d' % (len(train), len(test), len(train)+len(test)))

Train	Test	Total
14767	1641	16408
14767	1641	16408
14767	1641	16408
14767	1641	16408
14767	1641	16408
14767	1641	16408
14767	1641	16408
14767	1641	16408
14768	1640	16408
14768	1640	16408


In [34]:
k_fold_dir = os.path.join(root, "training_k_fold_with_rotation_color")

if os.path.exists(k_fold_dir):
    shutil.rmtree(k_fold_dir)
os.mkdir(k_fold_dir)

for index, fold in enumerate(folds):
    fold_dir = os.path.join( k_fold_dir, str(index+1) )
    os.mkdir(fold_dir)
    train, test = fold
    
    train_dir         = os.path.join( fold_dir, 'Train' )
    train_feature_dir = os.path.join(train_dir, 'Features')
    train_label_dir   = os.path.join(train_dir, 'Labels')
    
    os.mkdir(train_dir)
    os.mkdir(train_feature_dir)
    os.mkdir(train_label_dir)
    
    for f_src, l_src in train:
        file_name = f_src.split(os.sep)[-1]
        f_dst = os.path.join(train_feature_dir, file_name)
        l_dst = os.path.join(train_label_dir  , file_name)
        shutil.copy2(f_src, f_dst)
        shutil.copy2(l_src, l_dst)
        
    test_dir = os.path.join( fold_dir, 'Validate' )
    test_feature_dir = os.path.join(test_dir, 'Features')
    test_label_dir   = os.path.join(test_dir, 'Labels')
    os.mkdir(test_dir)
    os.mkdir(test_feature_dir)
    os.mkdir(test_label_dir)
    
    for f_src, l_src in test:
        file_name = f_src.split(os.sep)[-1]
        f_dst = os.path.join(test_feature_dir, file_name)
        l_dst = os.path.join(test_label_dir  , file_name)
        shutil.copy2(f_src, f_dst)
        shutil.copy2(l_src, l_dst)
        
#         f, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))
#         image = cv2.cvtColor(cv2.imread(f_src, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)
#         ax1.imshow(image)
#         image = cv2.imread(l_src, cv2.IMREAD_GRAYSCALE)
#         ax2.imshow(image)
#         f.suptitle(f_src)
#         plt.show()