In [19]:
import numpy as np
import pandas as pd
import json
from pandas import json_normalize

############################ (Labeled data) ################################
labeled_json_data_path = "./stratified_kfold/cv_train_1.json" # 경로수정 필요

with open(labeled_json_data_path) as f:
	labeled_data = json.load(f)

df_images = json_normalize(labeled_data['images'])
df_annotations = json_normalize(labeled_data['annotations'])

# 마지막 요소의 값들 가져오기
width, height, _, license, flickr_url, coco_url, date_captured, image_id_1 = df_images.tail(1).values[0]
image_id_2, category_id, area, bbox, iscrowd, anno_id = df_annotations.tail(1).values[0]

############################ (UnLabeled data) ################################
submission_csv = './work_dirs/submission/cascade_swin_t_best.csv' # 경로수정 필요
data = pd.read_csv(submission_csv, keep_default_na=False)
data = data.values.tolist()

unlabeled = dict() # json 변환을 위한 dictionary
unlabeled['images'] = []
unlabeled['annotations'] = []
confidence_threshold = 0.7

# 예측 하지 못한 경우는 pass
for predict, image in data:
    if predict == None: 
        continue
    predict = predict.strip() 
    if predict == '': 
        continue

    count = 0 # annotation 개수 체크
    split_predict = predict.split(' ')
    anns_length = len(split_predict) // 6 # annotation 개수
    
    image_save = False # 이미지 저장 여부
    temp_image = dict() # image 정보 저장
    temp_annotation = dict() # annotation 정보 저장

    for i in range(anns_length):
        class_ = int(split_predict[i*6])
        confidence = float(split_predict[(i*6)+1])
        Left = float(split_predict[(i*6)+2])
        Top = float(split_predict[(i*6)+3])
        Right = float(split_predict[(i*6)+4])
        Bottom = float(split_predict[(i*6)+5])
        Width = Right - Left
        Height = Bottom - Top
        Area = round(Width * Height, 2)
        if confidence_threshold != None: # confidence score에 대한 threshold 설정
            if confidence < confidence_threshold:
                continue
        
        # Image 추가
        if image_save == False: # 추가된 이미지인지 확인
            image_id_2 += 1
            temp_image['width'] = width 
            temp_image['height'] = height 
            temp_image['file_name'] = image
            temp_image['license'] = license 
            temp_image['flickr_url'] = flickr_url 
            temp_image['coco_url'] = coco_url 
            temp_image['date_captured'] = date_captured
            temp_image['id'] = image_id_2
            image_save = True

        # Annotation 추가
        anno_id += 1
        count += 1
        temp_annotation['image_id'] = image_id_2
        temp_annotation['category_id'] = class_
        temp_annotation['area'] = Area
        temp_annotation['bbox'] = [round(Left, 1), round(Top, 1), round(Width, 1), round(Height, 1)]
        temp_annotation['iscrowd'] = iscrowd 
        temp_annotation['id'] = anno_id

    if count > 0: # annotation이 존재한다면
        unlabeled['images'].append(temp_image)
        unlabeled['annotations'].append(temp_annotation)



In [21]:
# Labeled Data + Unlabeled Data ################################################################################
labeled_data['images'] += unlabeled['images']
labeled_data['annotations'] += unlabeled['annotations']
    
with open("./train_new.json", "w") as new_file:
	json.dump(labeled_data, new_file)