In [1]:
import os
import numpy as np
import json
from sklearn.model_selection import StratifiedGroupKFold

In [2]:
# workding directory 세팅
working_dir = 'synthesis-car-od'
dir_len = len(working_dir)

path = os.getcwd().replace('\\', '/')
index = path.find(working_dir)
working_dir = path[:index + dir_len + 1]
working_dir

'd:/Assignments/dacon/synthesis-car-od/'

In [3]:
# 데이터 경로 세팅
DATA_PATH = os.path.join(working_dir, 'data/')

In [4]:
sgkf = StratifiedGroupKFold(
    n_splits=4,
    shuffle=True,
    random_state=41
)

In [5]:
train_image_indices = []
val_image_indices = []

train_indices = []
val_indices = []

with open(os.path.join(DATA_PATH, 'coco_train.json'), 'r') as coco_train:
    coco_train = json.load(coco_train)

    images = coco_train['images']
    annotations = coco_train['annotations']
    categories = coco_train['categories']

    image_ids = np.array([ann['image_id'] for ann in annotations])
    category_ids = [ann['category_id'] for ann in annotations]

    for i, (train_id, val_id) in enumerate(sgkf.split(image_ids, category_ids, image_ids)):
        train_image_id = np.unique(image_ids[train_id])
        val_image_id = np.unique(image_ids[val_id])

        train_image_indices.append(train_image_id)
        val_image_indices.append(val_image_id)

        train_indices.append(train_id)
        val_indices.append(val_id)

        print(f'fold{i}')
        print(train_image_id)
        print(val_image_id)
        print()

fold0
[   0    1    2 ... 6477 6478 6480]
[   5    6   10 ... 6466 6476 6479]

fold1
[   0    1    2 ... 6478 6479 6480]
[   3    7    9 ... 6464 6468 6469]

fold2
[   2    3    4 ... 6476 6479 6480]
[   0    1    8 ... 6475 6477 6478]

fold3
[   0    1    3 ... 6477 6478 6479]
[   2    4   17 ... 6460 6473 6480]



In [6]:
for i, (train_image_id, val_image_id) in enumerate(zip(train_image_indices, val_image_indices)):

    print(f'train fold{i} length: {len(train_image_id)}')
    print(f'val fold{i} length: {len(val_image_id)}')
    print()

train fold0 length: 4860
val fold0 length: 1621

train fold1 length: 4859
val fold1 length: 1622

train fold2 length: 4860
val fold2 length: 1621

train fold3 length: 4864
val fold3 length: 1617



In [7]:
for i, (train_image_id, val_image_id) in enumerate(zip(train_image_indices, val_image_indices)):
    print(f'fold{i}')
    print(set(train_image_id) & set(val_image_id))
    print()

fold0
set()

fold1
set()

fold2
set()

fold3
set()



In [8]:
category_count = dict()

with open(os.path.join(DATA_PATH, 'coco_train.json'), 'r') as coco_train:
    coco_train = json.load(coco_train)

    annotations = coco_train['annotations']

    category_ids = np.array([ann['category_id'] for ann in annotations])

    for i, (train_id, val_id) in enumerate(zip(train_indices, val_indices)):
        train_category_ids = category_ids[train_id]
        val_category_ids = category_ids[val_id]

        print(f'train fold{i}')
        unique, counts = np.unique(train_category_ids, return_counts=True)
        print(dict(zip(unique, counts)))
        print()

        print(f'val fold{i}')
        unique, counts = np.unique(val_category_ids, return_counts=True)
        print(dict(zip(unique, counts)))
        print()


train fold0
{0: 385, 1: 379, 2: 384, 3: 378, 4: 390, 5: 371, 6: 381, 7: 389, 8: 379, 9: 369, 10: 360, 11: 366, 12: 382, 13: 376, 14: 376, 15: 359, 16: 369, 17: 364, 18: 369, 19: 384, 20: 377, 21: 364, 22: 367, 23: 367, 24: 368, 25: 366, 26: 379, 27: 381, 28: 367, 29: 387, 30: 377, 31: 377, 32: 370, 33: 378}

val fold0
{0: 115, 1: 121, 2: 116, 3: 122, 4: 110, 5: 129, 6: 119, 7: 111, 8: 121, 9: 131, 10: 140, 11: 134, 12: 118, 13: 124, 14: 124, 15: 141, 16: 131, 17: 136, 18: 131, 19: 116, 20: 123, 21: 136, 22: 133, 23: 133, 24: 132, 25: 134, 26: 121, 27: 119, 28: 133, 29: 113, 30: 123, 31: 123, 32: 130, 33: 122}

train fold1
{0: 375, 1: 368, 2: 370, 3: 377, 4: 369, 5: 384, 6: 385, 7: 366, 8: 370, 9: 377, 10: 364, 11: 378, 12: 368, 13: 385, 14: 367, 15: 388, 16: 375, 17: 397, 18: 387, 19: 351, 20: 368, 21: 387, 22: 376, 23: 390, 24: 369, 25: 380, 26: 362, 27: 371, 28: 362, 29: 373, 30: 379, 31: 378, 32: 364, 33: 379}

val fold1
{0: 125, 1: 132, 2: 130, 3: 123, 4: 131, 5: 116, 6: 115, 7: 13