In [13]:
from glob import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
plt.rcParams['axes.grid'] = False

import json
import os
import cv2
import random

import warnings
warnings.filterwarnings(action='ignore')


######### 데이터 경로 #########
data_path = ''

In [14]:
# EDA하기 쉽게 DataFrame 형식으로 변환
def json_to_dataframe(data_path):
    df = pd.DataFrame(columns=['part','oil','sensitive','pigmentation','wrinkle','hydration','file_name'])
    for anno in glob(os.path.join(data_path,'*.json')):
        with open(anno) as f:
            sample = json.load(f)
        
        df = df.append(sample,ignore_index=True)
        
    return df

# 부위별 피부 평가 항목 분포
def eval_count(df,part_number:int=None):
    eval_list = ['oil','sensitive','pigmentation','wrinkle','hydration']

    if part_number is None:
        part_df = df.copy()
    else:
        part_df = df[df['part'] == part_number]
    
    plt.figure(figsize=(20,10))
    for i in range(5):
        plt.subplot(1,5,i+1)
        ax = plt.gca()
        ax.set_ylabel(' ',fontsize=0)
        item = eval_list[i]
        sns.barplot(part_df[item].value_counts().keys(),part_df[item].value_counts())


# 특정 부위의 항목별 비교
def item_comp(df,part_number:int=None):
    plt.figure(figsize=(50,150))
    eval_list = ['oil','sensitive','pigmentation','wrinkle','hydration']
    rotation = 1

    if part_number is None:
        part_df = df.copy()
    else:
        part_df = df[df['part'] == part_number]

    for item in eval_list:
        if -2 in part_df[item]:
            continue
        
        for score in range(5):
            score_df = part_df[part_df[item] == score]

            if len(score_df) > 3:
                idx = 3
            else:
                idx = len(score_df)
            for i in range(idx):
                sample = score_df.iloc[i]
                file_name = sample['file_name']
                image = cv2.imread(os.path.join(data_path,file_name))
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                
                plt.subplot(25,3,rotation)
                rotation += 1
                if idx < 3 and i == (3-idx):
                    rotation += 1
                plt.title(item + ' ' + str(score),fontsize=25)
                plt.axis("off")
                plt.imshow(image)

In [15]:
df = json_to_dataframe(data_path)

In [None]:
df

In [None]:
# 부위별 상관관계
colormap = plt.cm.PuBu
plt.figure(figsize=(15,15))


for i in range(4):
    plt.subplot(2,2,i+1)
    corr_df = df[df['part']==i]
    corr = corr_df.iloc[:,1:6].astype(float).corr(method='spearman',min_periods=1)
    sns.heatmap(corr, linewidths = 0.1, vmax = 1.0, square = True, cmap = colormap, linecolor = "white", annot = True, annot_kws = {"size" : 16})
    plt.savefig('train_part_corr.png',dpi=200)


In [None]:
# 부위별 피부 평가 항목 분포
eval_count(df,2)

In [None]:
# 해당 부위의 항목별 비교
item_comp(df,2)

In [None]:
import json
import random
import skimage.io as io

from pycocotools.coco import COCO

def getClassName(classID, cats):
    for i in range(len(cats)):
        if cats[i]['id']==classID:
            return cats[i]['name']
    return "None"

#### 이미지가 담겨 있는 디렉토리 경로 ####
img_dir = '/opt/ml/atlab/naverboostcamp_train/'

####### annotation 파일 경로 ########
anno_filepath = '/opt/ml/atlab/naverboostcamp_train/annotations.json'


#### annotation 파일 초기화 ####
coco = COCO(anno_filepath)

In [None]:
# annotation의 category 목록 확인
cats = coco.loadCats(coco.getCatIds())
cat_list = [cat['name'] for cat in cats]
print('카테고리 목록 : \n',set(cat_list))

supercat_list = [cat['supercategory'] for cat in cats]
print('슈퍼 카테고리 목록 : \n',set(supercat_list))

In [217]:
# 이미지에 포함되는 category 선택
# 'acne', 'smallacne', 'troub', '_background_'
# 여러개도 가능
catIds = coco.getCatIds(catNms=['troub'])

# 특정 category를 포함하는 image id list 얻기
imgIds = coco.getImgIds(catIds=catIds)

# image id list에서 무작위 id 선택
random_img_id = random.choice(imgIds)

# 선택한 id에 맞는 image 불러오기
imgId = coco.getImgIds(imgIds = [random_img_id])
img = coco.loadImgs(imgId)[0]

# image에 맞는 annotation 불러오기
annIds = coco.getAnnIds(imgIds=imgId,catIds=catIds,iscrowd=None)
anns = coco.loadAnns(annIds)

In [None]:
# image 확인
I = io.imread(img_dir + '/' + img['file_name'])

plt.figure(figsize=(10,10))
plt.subplot(1,2,1)
plt.imshow(I); plt.axis('off')
coco.showAnns(anns,draw_bbox=False)

plt.subplot(1,2,2)
plt.imshow(I)

In [None]:
# image
I = io.imread(img_dir + '/' + img['file_name'])

# mask
mask = np.zeros((img['height'],img['width']))
for i in range(len(anns)):
    className = getClassName(anns[i]['category_id'], cats)
    pixel_value = cat_list.index(className)+1
    mask = np.maximum(coco.annToMask(anns[i])*pixel_value, mask)

# show image
plt.figure(figsize=(10,10))

plt.subplot(2,1,1)
plt.imshow(I)
plt.axis('off')
coco.showAnns(anns,draw_bbox=True)

plt.subplot(2,1,2)
plt.imshow(mask)
plt.axis('off')