In [1]:
## k-fold cross validation

import json
import random
from sklearn.model_selection import KFold

# JSON 파일에서 데이터 로드
def load_data_from_json(json_file):
    with open(json_file, 'r') as f:
        data = json.load(f)
    return data

# K-fold 분할 수행
def perform_kfold_split(data, k):
    image_list = sorted(data['images'].keys())
    random.shuffle(image_list)

    kf = KFold(n_splits=k)
    fold_indices = list(kf.split(image_list))

    kfold_splits = []
    for train_index, val_index in fold_indices:
        train_images = [image_list[i] for i in train_index]
        val_images = [image_list[i] for i in val_index]

        train_data = {image: data['images'][image] for image in train_images}
        val_data = {image: data['images'][image] for image in val_images}

        kfold_splits.append({'train': train_data, 'val': val_data})

    return kfold_splits

# JSON 파일로 데이터 저장
def save_data_to_json(data, json_file):
    with open(json_file, 'w') as f:
        json.dump(data, f)

# 예시: JSON 파일에서 K-fold 분할 수행
json_file = '/opt/ml/input/data/medical/ufo/total_img.json'
k = 5

# JSON 파일에서 데이터 로드
data = load_data_from_json(json_file)
# K-fold 분할 수행
kfold_splits = perform_kfold_split(data, k)

# 분할된 데이터 저장
for i, split in enumerate(kfold_splits):
    train_json_file = f'train_{i+1}.json'
    val_json_file = f'val_{i+1}.json'

    # Train 데이터 저장
    train_data = {
        'images' : split['train']
                        }
    val_data= {
        'images' : split['val']
                        }
    save_data_to_json(train_data, train_json_file)
    print(f'Train data for Fold {i+1} saved to {train_json_file}')
    print(len(split['train']))
    # Validation 데이터 저장
    save_data_to_json(val_data, val_json_file)
    print(f'Validation data for Fold {i+1} saved to {val_json_file}')
    print(len(split['val']))

    print()

Train data for Fold 1 saved to train_1.json
240
Validation data for Fold 1 saved to val_1.json
61

Train data for Fold 2 saved to train_2.json
241
Validation data for Fold 2 saved to val_2.json
60

Train data for Fold 3 saved to train_3.json
241
Validation data for Fold 3 saved to val_3.json
60

Train data for Fold 4 saved to train_4.json
241
Validation data for Fold 4 saved to val_4.json
60

Train data for Fold 5 saved to train_5.json
241
Validation data for Fold 5 saved to val_5.json
60

