In [1]:
import os
import sys
import matplotlib.pyplot as plt
import cv2
import random
import numpy as np
from datetime import datetime
from tqdm.auto import tqdm

In [3]:
data_root = '../dataset/52_2/'

final_root = os.path.join(data_root, 'final')
final_root = "../dataset/52_2/final/"
final_rgb_dir = os.path.join(final_root, 'RGBFolder')
final_label_dir = os.path.join(final_root, 'LabelFolder')
final_modalX_dir = os.path.join(final_root, 'ModalXFolder')

print('RGBFolder: ', os.path.exists(final_rgb_dir))
print('LabelFolder: ', os.path.exists(final_label_dir))
print('ModalXFolder: ', os.path.exists(final_modalX_dir))


RGBFolder:  True
LabelFolder:  True
ModalXFolder:  True


In [4]:
label_fn_list = os.listdir(final_label_dir)
print(len(label_fn_list))

6419


In [5]:
label_fn_list[0]

'20220906_154332_26.png'

In [6]:
label_fn_list = [os.path.splitext(fn)[0] for fn in label_fn_list]
print(label_fn_list[:2])

['20220906_154332_26', '20220817_100424_15']


In [7]:
class_names = {}

with open('./class_names.txt', 'r') as f:
    lines = f.readlines()
    
    for line in lines:
        cls_name = line.rstrip('\n')
        if cls_name not in class_names:
            class_names[cls_name] = len(class_names)

class_names

{'unlabeled': 0,
 'OUT_Road': 1,
 'OUT_Structure': 2,
 'OUT_Tree': 3,
 'OUT_Fence': 4,
 'OUT_Pole': 5,
 'OUT_Sign': 6,
 'Vehicle': 7,
 'OUT_Curbstone': 8,
 'OUT_Pavement': 9,
 'OUT_Undefined Stuff': 10,
 'IN_Hall Way': 11,
 'IN_Open Space': 12,
 'IN_Wall': 13,
 'IN_Pillar': 14,
 'IN_Obstruction': 15,
 'IN_Gate': 16,
 'IN_Elevator': 17,
 'IN_Sign': 18,
 'IN_Fence': 19,
 'IN_Undefined Stuff': 20,
 'OUT_Grass': 21,
 'Human': 22,
 'OUT_Bench': 23,
 'Stroller': 24,
 'OUT_Sculpture': 25,
 'OUT_Traffic Safety Stuff': 26,
 'Undefined Object': 27,
 'OUT_Gate': 28,
 'Two-wheeled Vehicle': 29,
 'Animal': 30,
 'Wheelchair': 31,
 'IN_Bench': 32,
 'IN_Emergency Stuff': 33}

In [8]:
predefined_class = {
    'unlabeled': 0,
    'OUT_Road': 1,
    'OUT_Structure': 2,
    'OUT_Tree': 3,
    'OUT_Fence': 4,
    'OUT_Pole': 5,
    'OUT_Sign': 6,
    'Vehicle': 7,
    'OUT_Curbstone': 8,
    'OUT_Pavement': 9,
    'OUT_Undefined Stuff': 10,
    'IN_Hall Way': 11,
    'IN_Open Space': 12,
    'IN_Wall': 13,
    'IN_Pillar': 14,
    'IN_Obstruction': 15,
    'IN_Gate': 16,
    'IN_Elevator': 17,
    'IN_Sign': 18,
    'IN_Fence': 19,
    'IN_Undefined Stuff': 20,
    'OUT_Grass': 21,
    'Human': 22,
    'OUT_Bench': 23,
    'Stroller': 24,
    'OUT_Sculpture': 25,
    'OUT_Traffic Safety Stuff': 26,
    'Undefined Object': 27,
    'OUT_Gate': 28,
    'Two-wheeled Vehicle': 29,
    'Animal': 30,
    'Wheelchair': 31,
    'IN_Bench': 32,
    'IN_Emergency Stuff': 33
}

In [9]:
class_names == predefined_class

True

In [10]:
reverse_class_names = {v: k for k, v in class_names.items()}
reverse_class_names

{0: 'unlabeled',
 1: 'OUT_Road',
 2: 'OUT_Structure',
 3: 'OUT_Tree',
 4: 'OUT_Fence',
 5: 'OUT_Pole',
 6: 'OUT_Sign',
 7: 'Vehicle',
 8: 'OUT_Curbstone',
 9: 'OUT_Pavement',
 10: 'OUT_Undefined Stuff',
 11: 'IN_Hall Way',
 12: 'IN_Open Space',
 13: 'IN_Wall',
 14: 'IN_Pillar',
 15: 'IN_Obstruction',
 16: 'IN_Gate',
 17: 'IN_Elevator',
 18: 'IN_Sign',
 19: 'IN_Fence',
 20: 'IN_Undefined Stuff',
 21: 'OUT_Grass',
 22: 'Human',
 23: 'OUT_Bench',
 24: 'Stroller',
 25: 'OUT_Sculpture',
 26: 'OUT_Traffic Safety Stuff',
 27: 'Undefined Object',
 28: 'OUT_Gate',
 29: 'Two-wheeled Vehicle',
 30: 'Animal',
 31: 'Wheelchair',
 32: 'IN_Bench',
 33: 'IN_Emergency Stuff'}

---

In [11]:
def get_target_data(path):
    target_list = []
    for (root, _, files) in os.walk(path):
        if 'calibration' in root and len(files) > 0 and files[0] != '.DS_Store':
            target_list.append(
                root.replace(path, '').split('/')[:-1]
            )
    return target_list

target_data = get_target_data(data_root)

In [12]:
target_data

[['DOGU', 'CP', 'a', 'cloudy', '220719', '17-19'],
 ['DOGU', 'CP', 'b', 'cloudy', '220719', '16-17'],
 ['DOGU', 'YS', 'c', 'cloudy', '220718', '14-16'],
 ['DOGU', 'YS', 'd', 'cloudy', '220715', '17-19'],
 ['DOGU', 'YS', 'd', 'normal-rain', '220830', '15-16'],
 ['DOGU', 'YS', 'f', 'cloudy', '220819', '10-11'],
 ['DOGU', 'YS', 'f', 'normal-rain', '220830', '16-17'],
 ['DOGU', 'YS', 'indoor', 'cloudy', '220811', '16-17'],
 ['DOGU', 'YS', 'indoor', 'normal-rain', '220830', '16-17'],
 ['DOGU', 'YS', 'indoor', 'weak-rain', '220817', '10-11'],
 ['RASTECH', 'DCC', 'D', 'cloudy', '220901', '11-14'],
 ['RASTECH', 'DCC', 'D', 'cloudy', '220902', '14-19'],
 ['RASTECH', 'DCC', 'D', 'sunny', '220805', '14-19'],
 ['RASTECH', 'DCC', 'D', 'sunny', '220905', '14-19'],
 ['RASTECH', 'DCC', 'D', 'sunny', '220906', '14-19'],
 ['RASTECH', 'DCC', 'D', 'sunny', '220908', '14-19'],
 ['RASTECH', 'DCC', 'D', 'sunny', '220915', '11-14']]

## train_test example 1

- DOGU만 train, RASTECH는 test
    - Train: 0.73%
    - Test: 0.27%

In [13]:
total_file_path_list = []

train_list, test_list = [], []

for target in target_data:
    files = os.listdir(os.path.join(
        data_root, '/'.join(target), 'seg', 'segmentation',
    ))
    files = [os.path.splitext(fn)[0] for fn in files if not os.path.isdir(fn)]
    common_files = list(set(label_fn_list) & set(files))

    if 'DOGU' in target:
        train_list.extend(common_files)
    else:
        test_list.extend(common_files)

In [14]:
print(len(train_list))
print(len(test_list))

5638
781


In [15]:
print("Train: {:,.2f}%".format(len(train_list) / len(label_fn_list)))
print("Test: {:,.2f}%".format(len(test_list) / len(label_fn_list)))

Train: 0.88%
Test: 0.12%


In [16]:
class_count = dict({k: 0 for k in class_names.keys()})    # instance 개수 고려 하지 않음. 즉, Max: train 파일 개수, Min: 0

for label_fn in tqdm(label_fn_list):
    label_img = cv2.imread(os.path.join(final_label_dir, "{}.png".format(label_fn)), cv2.IMREAD_GRAYSCALE)
    for val in np.unique(label_img):
        class_count[reverse_class_names[val]] += 1

  0%|          | 0/6419 [00:00<?, ?it/s]

In [17]:
class_count

{'unlabeled': 5868,
 'OUT_Road': 2893,
 'OUT_Structure': 2913,
 'OUT_Tree': 4334,
 'OUT_Fence': 1771,
 'OUT_Pole': 1088,
 'OUT_Sign': 1255,
 'Vehicle': 393,
 'OUT_Curbstone': 3381,
 'OUT_Pavement': 3506,
 'OUT_Undefined Stuff': 3055,
 'IN_Hall Way': 1075,
 'IN_Open Space': 1240,
 'IN_Wall': 2013,
 'IN_Pillar': 140,
 'IN_Obstruction': 1464,
 'IN_Gate': 1722,
 'IN_Elevator': 237,
 'IN_Sign': 398,
 'IN_Fence': 393,
 'IN_Undefined Stuff': 796,
 'OUT_Grass': 3227,
 'Human': 2921,
 'OUT_Bench': 944,
 'Stroller': 49,
 'OUT_Sculpture': 458,
 'OUT_Traffic Safety Stuff': 207,
 'Undefined Object': 127,
 'OUT_Gate': 196,
 'Two-wheeled Vehicle': 65,
 'Animal': 10,
 'Wheelchair': 12,
 'IN_Bench': 395,
 'IN_Emergency Stuff': 684}

In [18]:
today = datetime.today()
year, month, day = today.year, today.month, today.day
train_output_fn = "train_{:04d}{:02d}{:02d}.txt".format(year, month, day)
test_output_fn = "test_{:04d}{:02d}{:02d}.txt".format(year, month, day)

print(train_output_fn)
print(test_output_fn)

train_20221101.txt
test_20221101.txt


In [19]:
if not os.path.exists(train_output_fn):
    with open(train_output_fn, 'w') as f:
        f.write('\n'.join(train_list)) 
    print('Saved!')
else:
    print("Already exists!")

Saved!


In [20]:
if not os.path.exists(test_output_fn):
    with open(test_output_fn, 'w') as f:
        f.write('\n'.join(test_list))
    print("Saved!")
else:
    print("Already exists!")

Saved!


--- 

- train/test 내의 class 비율
    - 마찬가지로 instance 비율은 따지지 않음
    
    - 해당 클래스가 있는지 없는지 여부만 확인

In [21]:
class_count = dict({k: 0 for k in class_names.keys()})    # instance 개수 고려 하지 않음. 즉, Max: train 파일 개수, Min: 0

with open(train_output_fn, 'r') as f:
    train_fn_list = f.readlines()

train_fn_list = [fn.rstrip('\n') for fn in train_fn_list]
for label_fn in tqdm(train_fn_list):
    label_img = cv2.imread(os.path.join(final_label_dir, "{}.png".format(label_fn)), cv2.IMREAD_GRAYSCALE)
    for val in np.unique(label_img):
        class_count[reverse_class_names[val]] += 1
        
class_count

  0%|          | 0/5638 [00:00<?, ?it/s]

{'unlabeled': 5121,
 'OUT_Road': 2885,
 'OUT_Structure': 2879,
 'OUT_Tree': 4305,
 'OUT_Fence': 1771,
 'OUT_Pole': 1088,
 'OUT_Sign': 1248,
 'Vehicle': 393,
 'OUT_Curbstone': 3365,
 'OUT_Pavement': 3471,
 'OUT_Undefined Stuff': 3027,
 'IN_Hall Way': 1062,
 'IN_Open Space': 507,
 'IN_Wall': 1265,
 'IN_Pillar': 104,
 'IN_Obstruction': 769,
 'IN_Gate': 1000,
 'IN_Elevator': 217,
 'IN_Sign': 323,
 'IN_Fence': 322,
 'IN_Undefined Stuff': 738,
 'OUT_Grass': 3227,
 'Human': 2789,
 'OUT_Bench': 942,
 'Stroller': 49,
 'OUT_Sculpture': 458,
 'OUT_Traffic Safety Stuff': 205,
 'Undefined Object': 85,
 'OUT_Gate': 186,
 'Two-wheeled Vehicle': 65,
 'Animal': 10,
 'Wheelchair': 12,
 'IN_Bench': 307,
 'IN_Emergency Stuff': 423}

In [22]:
class_count = dict({k: 0 for k in class_names.keys()})    # instance 개수 고려 하지 않음. 즉, Max: test 파일 개수, Min: 0

with open(test_output_fn, 'r') as f:
    train_fn_list = f.readlines()

train_fn_list = [fn.rstrip('\n') for fn in train_fn_list]
for label_fn in tqdm(train_fn_list):
    label_img = cv2.imread(os.path.join(final_label_dir, "{}.png".format(label_fn)), cv2.IMREAD_GRAYSCALE)
    for val in np.unique(label_img):
        class_count[reverse_class_names[val]] += 1
        
class_count

  0%|          | 0/781 [00:00<?, ?it/s]

{'unlabeled': 747,
 'OUT_Road': 8,
 'OUT_Structure': 34,
 'OUT_Tree': 29,
 'OUT_Fence': 0,
 'OUT_Pole': 0,
 'OUT_Sign': 7,
 'Vehicle': 0,
 'OUT_Curbstone': 16,
 'OUT_Pavement': 35,
 'OUT_Undefined Stuff': 28,
 'IN_Hall Way': 13,
 'IN_Open Space': 733,
 'IN_Wall': 748,
 'IN_Pillar': 36,
 'IN_Obstruction': 695,
 'IN_Gate': 722,
 'IN_Elevator': 20,
 'IN_Sign': 75,
 'IN_Fence': 71,
 'IN_Undefined Stuff': 58,
 'OUT_Grass': 0,
 'Human': 132,
 'OUT_Bench': 2,
 'Stroller': 0,
 'OUT_Sculpture': 0,
 'OUT_Traffic Safety Stuff': 2,
 'Undefined Object': 42,
 'OUT_Gate': 10,
 'Two-wheeled Vehicle': 0,
 'Animal': 0,
 'Wheelchair': 0,
 'IN_Bench': 88,
 'IN_Emergency Stuff': 261}

## train_test example 2

- 전체 데이터 중, train:test = 7:3 
- random split

In [23]:
today = datetime.today()
year, month, day = today.year, today.month, today.day

train_random_split_output_fn = "train_random_split_{:04d}{:02d}{:02d}.txt".format(year, month, day)
test_random_split_output_fn = "test_random_split_{:04d}{:02d}{:02d}.txt".format(year, month, day)

print(train_random_split_output_fn)
print(test_random_split_output_fn)

train_random_split_20221101.txt
test_random_split_20221101.txt


In [24]:
total_file_path_list = []

train_ratio = 0.7
train_list = random.sample(label_fn_list, int(len(label_fn_list) * train_ratio))
test_list = list(set(label_fn_list) - set(train_list))

In [25]:
print(len(train_list))
print(len(test_list))

4493
1926


In [26]:
print("Train: {:,.2f}%".format(len(train_list) / len(label_fn_list)))
print("Test: {:,.2f}%".format(len(test_list) / len(label_fn_list)))

Train: 0.70%
Test: 0.30%


In [27]:
if not os.path.exists(train_random_split_output_fn):
    with open(train_random_split_output_fn, 'w') as f:
        f.write('\n'.join(train_list)) 
    print('{} Saved!'.format(train_random_split_output_fn))
else:
    print("Already exists!")

train_random_split_20221101.txt Saved!


In [28]:
if not os.path.exists(test_random_split_output_fn):
    with open(test_random_split_output_fn, 'w') as f:
        f.write('\n'.join(test_list))
    print('{} Saved!'.format(test_random_split_output_fn))
else:
    print("Already exists!")

test_random_split_20221101.txt Saved!


In [29]:
class_count = dict({k: 0 for k in class_names.keys()})    # instance 개수 고려 하지 않음. 즉, Max: train 파일 개수, Min: 0

with open(train_random_split_output_fn, 'r') as f:
    train_fn_list = f.readlines()

train_fn_list = [fn.rstrip('\n') for fn in train_fn_list]
for label_fn in tqdm(train_fn_list):
    label_img = cv2.imread(os.path.join(final_label_dir, "{}.png".format(label_fn)), cv2.IMREAD_GRAYSCALE)
    for val in np.unique(label_img):
        class_count[reverse_class_names[val]] += 1
        
class_count

  0%|          | 0/4493 [00:00<?, ?it/s]

{'unlabeled': 4109,
 'OUT_Road': 2057,
 'OUT_Structure': 2063,
 'OUT_Tree': 3051,
 'OUT_Fence': 1243,
 'OUT_Pole': 785,
 'OUT_Sign': 907,
 'Vehicle': 287,
 'OUT_Curbstone': 2377,
 'OUT_Pavement': 2457,
 'OUT_Undefined Stuff': 2122,
 'IN_Hall Way': 732,
 'IN_Open Space': 859,
 'IN_Wall': 1390,
 'IN_Pillar': 92,
 'IN_Obstruction': 1023,
 'IN_Gate': 1207,
 'IN_Elevator': 149,
 'IN_Sign': 271,
 'IN_Fence': 259,
 'IN_Undefined Stuff': 538,
 'OUT_Grass': 2265,
 'Human': 2087,
 'OUT_Bench': 675,
 'Stroller': 33,
 'OUT_Sculpture': 311,
 'OUT_Traffic Safety Stuff': 137,
 'Undefined Object': 93,
 'OUT_Gate': 132,
 'Two-wheeled Vehicle': 49,
 'Animal': 5,
 'Wheelchair': 6,
 'IN_Bench': 270,
 'IN_Emergency Stuff': 483}

In [30]:
class_count = dict({k: 0 for k in class_names.keys()})    # instance 개수 고려 하지 않음. 즉, Max: test 파일 개수, Min: 0

with open(test_random_split_output_fn, 'r') as f:
    train_fn_list = f.readlines()

train_fn_list = [fn.rstrip('\n') for fn in train_fn_list]
for label_fn in tqdm(train_fn_list):
    label_img = cv2.imread(os.path.join(final_label_dir, "{}.png".format(label_fn)), cv2.IMREAD_GRAYSCALE)
    for val in np.unique(label_img):
        class_count[reverse_class_names[val]] += 1
        
class_count

  0%|          | 0/1926 [00:00<?, ?it/s]

{'unlabeled': 1759,
 'OUT_Road': 836,
 'OUT_Structure': 850,
 'OUT_Tree': 1283,
 'OUT_Fence': 528,
 'OUT_Pole': 303,
 'OUT_Sign': 348,
 'Vehicle': 106,
 'OUT_Curbstone': 1004,
 'OUT_Pavement': 1049,
 'OUT_Undefined Stuff': 933,
 'IN_Hall Way': 343,
 'IN_Open Space': 381,
 'IN_Wall': 623,
 'IN_Pillar': 48,
 'IN_Obstruction': 441,
 'IN_Gate': 515,
 'IN_Elevator': 88,
 'IN_Sign': 127,
 'IN_Fence': 134,
 'IN_Undefined Stuff': 258,
 'OUT_Grass': 962,
 'Human': 834,
 'OUT_Bench': 269,
 'Stroller': 16,
 'OUT_Sculpture': 147,
 'OUT_Traffic Safety Stuff': 70,
 'Undefined Object': 34,
 'OUT_Gate': 64,
 'Two-wheeled Vehicle': 16,
 'Animal': 5,
 'Wheelchair': 6,
 'IN_Bench': 125,
 'IN_Emergency Stuff': 201}