In [1]:
import os
import shutil
import json
import pickle
from pprint import pprint
from tqdm import tqdm

from collections import defaultdict

In [2]:
def read_annotation(anno_path):
    with open(anno_path) as f:
        try:
            annotation = json.load(f)
            # pprint.pprint(annotation)
        except json.decoder.JSONDecodeError:
            print('The string dose NOT contain valid JSON')
    
    return annotation

In [3]:
TRAIN_PATH = '../../json/dataset/train.pkl'
TEST_PATH = '../../json/dataset/test.pkl'

with open(TRAIN_PATH, 'rb') as f:
    train = pickle.load(f)
    
with open(TEST_PATH, 'rb') as f:
    test = pickle.load(f)

In [4]:
train_annotations = list()
test_annotations = list()

for _, _, _, anno in train:
    train_annotations.append(anno)
    
for _, _, _, anno in test:
    test_annotations.append(anno)

In [6]:
train_dict = dict()
facilities = set()

facility, normal, abnormal = 0, 0, 0

normal_facility = list()
abnormal_facility = list()

for i, anno_path in enumerate(train_annotations):
    anno = read_annotation(anno_path)
    if i % 400 == 0:
        facility, normal, abnormal = 0, 0, 0
        
    name = anno_path.split('/', maxsplit=6)[-1].rsplit('/', maxsplit=1)[0]
    facilities.add(name)
    
    for ano in anno['annotations']:
        facility += 1
        if ano['attributes']['status'] == 'normal':
            normal += 1
            normal_facility.append(anno_path)
        elif ano['attributes']['status'] == 'danger':
            abnormal += 1
            abnormal_facility.append(anno_path)
        else:
            raise ValueError("Found Missing Class!")

    train_dict[name] = f'normal: {normal} | danger: {abnormal}, facilities: {facility}'

In [11]:
# normal case facility save to json
normal_path = '../../json/normal_case.json'

with open(normal_path, 'w+') as f:
    json.dump(normal_facility, f, ensure_ascii=False, indent=4)

In [12]:
# abnormal case facility save to json
abnormal_path = '../../json/abnormal_case.json'

with open(abnormal_path, 'w+') as f:
    json.dump(abnormal_facility, f, ensure_ascii=False, indent=4)

In [13]:
test_dict = dict()
facility, normal, abnormal = 0, 0, 0

normal_facility = list()
abnormal_facility = list()

for i, anno_path in enumerate(test_annotations):
    anno = read_annotation(anno_path)
    if i % 100 == 0:
        facility, normal, abnormal = 0, 0, 0
        
    name = anno_path.split('/', maxsplit=6)[-1].rsplit('/', maxsplit=1)[0]
    
    for ano in anno['annotations']:
        facility += 1
        if ano['attributes']['status'] == 'normal':
            normal += 1
        elif ano['attributes']['status'] == 'danger':
            abnormal += 1
        else:
            raise ValueError("Found Missing Class!")

    test_dict[name] = f'normal: {normal} | danger: {abnormal}, facilities: {facility}'

In [14]:
pprint(train_dict)

{'1.서부발전/1.고압전동기': 'normal: 360 | danger: 88, facilities: 448',
 '1.서부발전/2.차단기': 'normal: 1153 | danger: 168, facilities: 1321',
 '1.서부발전/3.변압기 접속부': 'normal: 968 | danger: 86, facilities: 1054',
 '2.변전소/1.단로기(DS)': 'normal: 1247 | danger: 113, facilities: 1360',
 '2.변전소/2.계기용변성기(MOF)': 'normal: 1070 | danger: 125, facilities: 1195',
 '2.변전소/3.변압기 케이블 연결개소 포함': 'normal: 1091 | danger: 133, facilities: 1224',
 '2.변전소/4.계기용변압기(PT)': 'normal: 660 | danger: 178, facilities: 838',
 '2.변전소/6.전자접촉기': 'normal: 2832 | danger: 141, facilities: 2973',
 '2.변전소/7.케이블&부스 연결개소': 'normal: 1136 | danger: 78, facilities: 1214'}


In [15]:
pprint(test_dict)

{'1.서부발전/1.고압전동기': 'normal: 87 | danger: 26, facilities: 113',
 '1.서부발전/2.차단기': 'normal: 317 | danger: 27, facilities: 344',
 '1.서부발전/3.변압기 접속부': 'normal: 237 | danger: 29, facilities: 266',
 '2.변전소/1.단로기(DS)': 'normal: 322 | danger: 20, facilities: 342',
 '2.변전소/2.계기용변성기(MOF)': 'normal: 278 | danger: 21, facilities: 299',
 '2.변전소/3.변압기 케이블 연결개소 포함': 'normal: 265 | danger: 40, facilities: 305',
 '2.변전소/4.계기용변압기(PT)': 'normal: 173 | danger: 45, facilities: 218',
 '2.변전소/6.전자접촉기': 'normal: 693 | danger: 33, facilities: 726',
 '2.변전소/7.케이블&부스 연결개소': 'normal: 284 | danger: 19, facilities: 303'}


##### __*save LWIR image*__ #####

In [33]:
BASE_COPY_PATH = '/data3/datasets/kepco_ai_hub/capstone/train/images/'
facilities = sorted(list(facilities))

for lwir, _, _, _ in tqdm(train):
    # facilities
    path_name = lwir.split('/', maxsplit=6)[-1].rsplit('/', maxsplit=1)
    ADD_PATH = path_name[0]
    if ADD_PATH not in facilities:
        print('NOT FOUND PATH: {ADD_PATH}')
    
    source = lwir.rsplit('/', maxsplit=1)
    src = '"' + source[0] + '/' + source[1] + '"'
    dest = '"' + BASE_COPY_PATH + ADD_PATH +  '/' + path_name[1] + '"'

    os.system('sudo cp ' + src + ' ' + dest)

100%|██████████| 3600/3600 [01:03<00:00, 56.40it/s]


In [39]:
BASE_COPY_PATH = '/data3/datasets/kepco_ai_hub/capstone/train/annotations/'
facilities = sorted(list(facilities))

for _, _, _, annotation in tqdm(train):
    # facilities
    path_name = annotation.split('/', maxsplit=6)[-1].rsplit('/', maxsplit=1)
    ADD_PATH = path_name[0]
    if ADD_PATH not in facilities:
        print('NOT FOUND PATH: {ADD_PATH}')
    
    source = annotation.rsplit('/', maxsplit=1)
    src = '"' + source[0] + '/' + source[1] + '"'
    dest = '"' + BASE_COPY_PATH + ADD_PATH +  '/' + path_name[1] + '"'

    os.system('sudo cp -rf ' + src + ' ' + dest)

100%|██████████| 3600/3600 [00:53<00:00, 67.51it/s]


In [17]:
BASE_COPY_PATH = '/data3/datasets/kepco_ai_hub/capstone/test/images/'
facilities = sorted(list(facilities))

for lwir, _, _, _ in tqdm(test):
    # facilities
    path_name = lwir.split('/', maxsplit=6)[-1].rsplit('/', maxsplit=1)
    ADD_PATH = path_name[0]
    if ADD_PATH not in facilities:
        print('NOT FOUND PATH: {ADD_PATH}')
    
    source = lwir.rsplit('/', maxsplit=1)
    src = '"' + source[0] + '/' + source[1] + '"'
    dest = '"' + BASE_COPY_PATH + ADD_PATH +  '/' + path_name[1] + '"'

    os.system('sudo cp ' + src + ' ' + dest)

100%|██████████| 900/900 [00:17<00:00, 52.47it/s]


In [18]:
BASE_COPY_PATH = '/data3/datasets/kepco_ai_hub/capstone/test/annotations/'
facilities = sorted(list(facilities))

for _, _, _, annotation in tqdm(test):
    # facilities
    path_name = annotation.split('/', maxsplit=6)[-1].rsplit('/', maxsplit=1)
    ADD_PATH = path_name[0]
    if ADD_PATH not in facilities:
        print('NOT FOUND PATH: {ADD_PATH}')
    
    source = annotation.rsplit('/', maxsplit=1)
    src = '"' + source[0] + '/' + source[1] + '"'
    dest = '"' + BASE_COPY_PATH + ADD_PATH +  '/' + path_name[1] + '"'

    os.system('sudo cp ' + src + ' ' + dest)

100%|██████████| 900/900 [00:12<00:00, 71.48it/s]
