In [1]:
import pandas as pd
import numpy as np
import json
import seaborn as sns
import os
import cv2
import random
from matplotlib import pyplot as plt
from tqdm import tqdm

## Mosaic

### 구현 순서

1. 순서대로 이미지를 뽑는다
2. 뽑힌 이미지를 left-top으로 두고 랜덤으로 3개 이미지를 추출
3. 추출된 이미지의 bbox좌표를 right-top -> left-bottom -> right-bottom 순으로 결합
4. 각각 순서대로 bbox의 좌표이동

In [2]:
base_dir = '/opt/ml/detection/dataset/'
data_json = base_dir + 'train_fold3.json'

In [3]:
#json 불러오기 + Dataframe형태로 변경
with open (data_json, 'r') as outfile:
    data = json.load(outfile)

df_images = pd.DataFrame(data['images'])
df_annotations = pd.DataFrame(data['annotations'])
df_categories = pd.DataFrame(data['categories'])

In [4]:
mosaic_list = ['lt', 'rt', 'lb', 'rb']
df_images.head(2)

Unnamed: 0,width,height,file_name,license,flickr_url,coco_url,date_captured,id
0,1024,1024,train/0000.jpg,0,,,2020-12-26 14:44:23,0
1,1024,1024,train/0001.jpg,0,,,2021-01-10 16:30:39,1


In [5]:
df_images['mosaic_file_name'] = df_images['file_name'].str.replace('train', 'mosaic_train')
df_images.loc[0, 'mosaic_file_name']

'mosaic_train/0000.jpg'

In [6]:
df_images
np.random.seed(2114)

In [7]:
def make_save_image(image_idx_list):
    
    img_list = []
    
    for idx in image_idx_list:       
        img = cv2.imread(base_dir + df_images.loc[idx, 'file_name'])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_list.append(img)
        
    
    img1 = np.hstack((img_list[0], img_list[1]))
    img2 = np.hstack((img_list[2], img_list[3]))
    
    img = np.vstack((img1, img2))
    
    plt.imsave(base_dir + df_images.loc[img_idx_list[0], 'mosaic_file_name'], img)

def append_anno(df_param, location):

    anno_bboxes = list(df_param['bbox'])
    
    changed_bboxes = []
    
    if location != 'lt':
        
        if location == 'rt':
    
            for anno_bbox in anno_bboxes:
                x = anno_bbox[0] +1024 
                y = anno_bbox[1]
                w = anno_bbox[2]
                h = anno_bbox[3]
                changed_bboxes.append([x, y, w, h])
                
        elif location == 'lb':
          
            for anno_bbox in anno_bboxes:
                x = anno_bbox[0] 
                y = anno_bbox[1] +1024 
                w = anno_bbox[2]
                h = anno_bbox[3]
                changed_bboxes.append([x, y, w, h])

        elif location == 'rb':
            
            for anno_bbox in anno_bboxes:
                x = anno_bbox[0] +1024
                y = anno_bbox[1] +1024 
                w = anno_bbox[2]
                h = anno_bbox[3]
                changed_bboxes.append([x, y, w, h]) 
         
        df_new = df_param.drop('bbox', axis=1)
        df_new['bbox'] = changed_bboxes
        df_new['location'] = location
    else:
       
        df_new = df_param
        df_new['location'] = location

    return df_new

In [8]:
df_annotation_new = pd.DataFrame()
for i in tqdm(range(len(df_images))): #df_images
    
    img_idx_list = []
    img_idx = i
    
    original_image_id = df_images.loc[img_idx, 'id']
    
    for location in mosaic_list: #Randomly chosen 3 images
        
        if len(img_idx_list) == 0:
            random_idx = img_idx
        else:
            random_idx = np.random.randint(len(df_images))
        
        ##image concatenation을 위한 index 저장
        img_idx_list.append(random_idx)
    
        ##annotation 저장
        image_id = df_images.loc[random_idx, 'id']
        register_anno = df_annotations.loc[df_annotations['image_id']==image_id]
        register_anno['image_id'] = original_image_id
        df_new = append_anno(register_anno, location)
        df_annotation_new = df_annotation_new.append(df_new, ignore_index=True)
    
print(img_idx_list)
    #image 저장 dataset/mosaic/train/
    #df_images['mosaic_file_name'] = df_images['file_name'].str.replace('train', 'mosaic_train')
    #make_save_image(img_idx_list)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
100%|██████████| 3905/3905 [00:54<00:00, 71.88it/s]

[3904, 2742, 2595, 2371]





In [9]:
df_annotation_new.sort_values(by='image_id')

Unnamed: 0,image_id,category_id,area,bbox,iscrowd,id,location
0,0,0,257301.66,"[197.6, 193.7, 547.8, 469.7]",0,0,lt
1,0,0,128771.94,"[1247.2, 418.8, 430.1, 299.4]",0,13341,rt
2,0,0,283267.14,"[636.6, 1062.9, 308.1, 919.4]",0,1827,lb
3,0,0,339985.14,"[416.5, 1098.9, 389.4, 873.1]",0,1828,lb
4,0,7,337879.20,"[0.0, 1414.6, 552.0, 612.1]",0,1829,lb
...,...,...,...,...,...,...,...
74548,4882,7,57309.72,"[302.1, 439.3, 265.2, 216.1]",0,23140,lt
74547,4882,5,768591.81,"[0.0, 116.2, 944.1, 814.1]",0,23139,lt
74563,4882,5,31050.11,"[371.7, 1402.9, 193.7, 160.3]",0,15314,lb
74554,4882,1,11955.06,"[1398.9, 721.6, 152.1, 78.6]",0,16198,rt


In [10]:
# 수정된 json 저장
df_new_images = df_images.drop('file_name', axis=1)
df_new_images = df_new_images.rename(columns={'mosaic_file_name':'file_name'})

In [11]:
new_images_json = df_new_images.to_json(orient = 'records')
new_images_json = json.loads(new_images_json)

new_anno_json = df_annotation_new.to_json(orient = 'records')
new_anno_json = json.loads(new_anno_json)

new_json = json.dumps({'images':new_images_json, 'annotations':new_anno_json, 'categories':data['categories']})
parsed_json = json.loads(new_json)

file_path = '/opt/ml/detection/dataset/mosaic_train.json'

with open(file_path, 'w') as outfile:
    json.dump(parsed_json, outfile, indent=4)