In [10]:
import os

import json
import pandas as pd
import numpy as np
from tqdm import tqdm
from glob import glob

import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib

from shapely.geometry import Polygon
from shapely.geometry import Polygon
from rasterio.features import geometry_mask
from rasterio.transform import from_origin

In [None]:
# !pip install seaborn
# !pip install shapely
# !pip install rasterio

In [11]:
def calculate_area(points):
    return Polygon(points).area

# Data load

In [13]:
id_list = [f'ID{i:03}' for i in range(1, 549)]
json_dir = '/data/ephemeral/home/data/train/outputs_json/'
IDs = [os.path.join(json_dir, name) for name in os.listdir(json_dir) if os.path.isdir(os.path.join(json_dir, name))]

In [14]:
data = []
for idx in IDs:
    json_files = [f for f in os.listdir(idx) if f.endswith('.json')]
    for file in json_files:
        with open(os.path.join(idx, file), 'r') as f:
            json_data = json.load(f)
            data.append(json_data)

df = pd.DataFrame(data)


In [15]:
ann = pd.DataFrame()

for i in range(0,len(df)):
    tmp_ann = pd.DataFrame(df['annotations'][i]).sort_values('label')
    if (i % 2) == 0:
        tmp_ann['R/L'] = "R"
    else:
        tmp_ann['R/L'] = "L"
    ann = pd.concat([ann, tmp_ann])

In [16]:
a = []
for i in range(len(IDs)):
    a.extend([IDs[i][-3:]] * 58)

In [None]:
IDs[1]

In [18]:
ann['ID'] = [int(item[2:]) for item in a]

In [19]:
ann.reset_index(inplace=True)
del ann['index']

In [20]:
ann['area'] = ann['points'].apply(calculate_area)

In [None]:
ann.head(5)

## label 크기 비교

In [None]:
plt.figure(figsize=(10, 8))
sns.barplot(x='label', y='area', data=ann.groupby('label')['area'].mean().reset_index())
plt.title('mean area')
plt.xticks(rotation=90)  # x 축의 글자를 세로로 변경
# plt.savefig('./EDA/mean_area_by_label.png')  # 이미지를 저장
plt.show()


In [23]:
gb_labal = ann.groupby('label')['area'].mean().reset_index()
gb_labal['ratio'] = gb_labal['area']/(2048*2048)*100

## 전체 이미지에 대한 label 크기 비율 

In [None]:
plt.figure(figsize=(10, 8))
sns.barplot(x='label', y='ratio', data=gb_labal, color= 'orange')
plt.title('area ratio')
plt.xticks(rotation=90)  # x 축의 글자를 세로로 변경
plt.ylim(0,100)
# plt.savefig('./EDA/area_ratio_by_label.png')  # 이미지를 저장

plt.show()


## 전체 이미지 HEATMAB

In [None]:
labels = ['Ulna', 'Radius',
    'finger-1', 'finger-2', 'finger-3', 'finger-4', 'finger-5', 'finger-6', 
    'finger-7', 'finger-8', 'finger-9', 'finger-10', 'finger-11', 'finger-12',
    'finger-13', 'finger-14', 'finger-15', 'finger-16', 'finger-17', 'finger-18', 'finger-19', 
    'Capitate', 'Hamate', 'Lunate', 'Pisiform',  'Scaphoid',
    'Trapezium', 'Trapezoid', 'Triquetrum'
]

# 특정 언어와 이미지 크기 필터링 조건 설정
# target_lang = 'chinese'  # 특정 언어를 지정 (예: 'chinese')
target_size = (2048, 2048)  # 필터링할 이미지 크기

# 히트맵 크기 설정
img_height, img_width = target_size
HEATMAP_SIZE = (img_height, img_width)

colors_2 = sns.color_palette("Reds", 2)         # 2개 색상 (팔뼈 톤)
colors_19 = sns.color_palette("Blues", 19)    # 19개 색상 (손가락 뼈 톤)
colors_8 = sns.color_palette("Greens", 8)          # 8개 색상 (손목 뼈 톤)
colors_1 = sns.color_palette("Greys", 1)          # 1개 색상 (전체 톤)
# 각 톤을 하나의 리스트로 합침
all_colors = colors_2 + colors_19 + colors_8 + colors_1

# 각 색상에 맞게 커스텀 컬러맵 생성
custom_cmaps = [
    matplotlib.colors.LinearSegmentedColormap.from_list(
        colors=[(0.05, 0.05, 0.05), c, (0.95, 0.95, 0.95)], 
        name=f"custom_{i}") 
    for i, c in enumerate(all_colors)
]

heatmap_all = np.zeros(HEATMAP_SIZE, dtype=np.int16)

for i, label in enumerate(labels):
    points_list = ann[ann['label'] == label]  # 특정 라벨에 해당하는 데이터 선택
    # 히트맵 초기화
    heatmap = np.zeros(HEATMAP_SIZE, dtype=np.int16)

    transform = from_origin(0, img_height, 1, 1)  # 좌상단 (0, img_height), 1픽셀당 1단위

    for points in tqdm(points_list['points'], total=len(points_list['points'])):
        polygon = Polygon(points)  # points를 Polygon으로 변환
        mask = geometry_mask([polygon], transform=transform, invert=True, out_shape=HEATMAP_SIZE)  # 다각형 마스크 변환
        heatmap += mask.astype(np.int16)
        heatmap_all += mask.astype(np.int16)

# 히트맵 누적 계산

    # 히트맵 시각화
    plt.figure(figsize=(10, 8))
    plt.imshow(heatmap, cmap=custom_cmaps[i], interpolation="nearest")
    plt.colorbar(label="Count of Points")
    plt.gca().invert_yaxis()
    plt.axis('off')
    plt.title(label)
    # plt.savefig(f'./EDA/heatmap_{label}.png')

    plt.show()

plt.figure(figsize=(10, 8))
plt.imshow(heatmap_all, cmap=custom_cmaps[-1], interpolation="nearest")
plt.colorbar(label="Count of Points")
plt.gca().invert_yaxis()
plt.axis('off')
plt.title('All')
# plt.savefig(f'./EDA/heatmap_All.png')

plt.show()




In [None]:
# 짝수(R)
heatmap_all = np.zeros(HEATMAP_SIZE, dtype=np.int16)

for i, label in enumerate(labels):
    points_list = ann[ann['label'] == label][::2]  # 특정 라벨에 해당하는 데이터 선택
    # 히트맵 초기화
    heatmap = np.zeros(HEATMAP_SIZE, dtype=np.int16)
    transform = from_origin(0, img_height, 1, 1)  # 좌상단 (0, img_height), 1픽셀당 1단위

    for points in tqdm(points_list['points'], total=len(points_list['points'])):
        polygon = Polygon(points)  # points를 Polygon으로 변환
        mask = geometry_mask([polygon], transform=transform, invert=True, out_shape=HEATMAP_SIZE)  # 다각형 마스크 변환
        heatmap += mask.astype(np.int16)
        heatmap_all += mask.astype(np.int16)
# 히트맵 누적 계산

    # 히트맵 시각화
    plt.figure(figsize=(10, 8))
    plt.imshow(heatmap, cmap=custom_cmaps[i], interpolation="nearest")
    plt.colorbar(label="Count of Points")
    plt.gca().invert_yaxis()
    plt.axis('off')
    plt.title(label)
    # plt.savefig(f'./EDA/heatmap_R_{label}.png')

    plt.show()

plt.figure(figsize=(10, 8))
plt.imshow(heatmap_all, cmap=custom_cmaps[-1], interpolation="nearest")
plt.colorbar(label="Count of Points")
plt.gca().invert_yaxis()
plt.axis('off')
plt.title('All_R')
# plt.savefig(f'./EDA/heatmap_R_All.png')

plt.show()





In [None]:
# 홀수(L)
heatmap_all = np.zeros(HEATMAP_SIZE, dtype=np.int16)

for i, label in enumerate(labels):
    points_list = ann[ann['label'] == label][1::2]  # 특정 라벨에 해당하는 데이터 선택
    # 히트맵 초기화
    heatmap = np.zeros(HEATMAP_SIZE, dtype=np.int16)
    transform = from_origin(0, img_height, 1, 1)  # 좌상단 (0, img_height), 1픽셀당 1단위

    for points in tqdm(points_list['points'], total=len(points_list['points'])):
        polygon = Polygon(points)  # points를 Polygon으로 변환
        mask = geometry_mask([polygon], transform=transform, invert=True, out_shape=HEATMAP_SIZE)  # 다각형 마스크 변환
        heatmap += mask.astype(np.int16)
        heatmap_all += mask.astype(np.int16)
# 히트맵 누적 계산

    # 히트맵 시각화
    plt.figure(figsize=(10, 8))
    plt.imshow(heatmap, cmap=custom_cmaps[i], interpolation="nearest")
    plt.colorbar(label="Count of Points")
    plt.gca().invert_yaxis()
    plt.axis('off')
    plt.title(label)
    # plt.savefig(f'./EDA/heatmap_L_{label}.png')

    plt.show()

plt.figure(figsize=(10, 8))
plt.imshow(heatmap_all, cmap=custom_cmaps[-1], interpolation="nearest")
plt.colorbar(label="Count of Points")
plt.gca().invert_yaxis()
plt.axis('off')
plt.title('All_L')
# plt.savefig(f'./EDA/heatmap_L_All.png')

plt.show()




## 손가락, 손목, 팔 별 heatmap

In [None]:
# 각 톤을 하나의 리스트로 합침
all_colors = sns.color_palette("YlGnBu", 3)  

# 각 색상에 맞게 커스텀 컬러맵 생성
custom_cmaps = [
    matplotlib.colors.LinearSegmentedColormap.from_list(
        colors=[(0.05, 0.05, 0.05), c, (0.95, 0.95, 0.95)], 
        name=f"custom_{i}") 
    for i, c in enumerate(all_colors)
]

heatmap_all = np.zeros(HEATMAP_SIZE, dtype=np.int16)

for i, label in enumerate(labels[2:21]):
    points_list = ann[ann['label'] == label][1::2]  # 특정 라벨에 해당하는 데이터 선택
    # 히트맵 초기화
    heatmap = np.zeros(HEATMAP_SIZE, dtype=np.int16)

    transform = from_origin(0, img_height, 1, 1)  # 좌상단 (0, img_height), 1픽셀당 1단위

    for points in tqdm(points_list['points'], total=len(points_list['points'])):
        polygon = Polygon(points)  # points를 Polygon으로 변환
        mask = geometry_mask([polygon], transform=transform, invert=True, out_shape=HEATMAP_SIZE)  # 다각형 마스크 변환
        heatmap += mask.astype(np.int16)
        heatmap_all += mask.astype(np.int16)


plt.figure(figsize=(10, 8))
plt.imshow(heatmap_all, cmap=custom_cmaps[0], interpolation="nearest")
plt.colorbar(label="Count of Points")
plt.gca().invert_yaxis()
plt.axis('off')
plt.title('finger_L')
plt.savefig(f'./EDA/heatmap_L_finger.png')

plt.show()




In [None]:

heatmap_all = np.zeros(HEATMAP_SIZE, dtype=np.int16)

for i, label in enumerate(labels[21:]):
    points_list = ann[ann['label'] == label][1::2]  # 특정 라벨에 해당하는 데이터 선택
    # 히트맵 초기화
    heatmap = np.zeros(HEATMAP_SIZE, dtype=np.int16)

    transform = from_origin(0, img_height, 1, 1)  # 좌상단 (0, img_height), 1픽셀당 1단위

    for points in tqdm(points_list['points'], total=len(points_list['points'])):
        polygon = Polygon(points)  # points를 Polygon으로 변환
        mask = geometry_mask([polygon], transform=transform, invert=True, out_shape=HEATMAP_SIZE)  # 다각형 마스크 변환
        heatmap += mask.astype(np.int16)
        heatmap_all += mask.astype(np.int16)


plt.figure(figsize=(10, 8))
plt.imshow(heatmap_all, cmap=custom_cmaps[1], interpolation="nearest")
plt.colorbar(label="Count of Points")
plt.gca().invert_yaxis()
plt.axis('off')
plt.title('arm_L')
plt.savefig(f'./EDA/heatmap_L_arm.png')

plt.show()




In [None]:

heatmap_all = np.zeros(HEATMAP_SIZE, dtype=np.int16)

for i, label in enumerate(labels[:2]):
    points_list = ann[ann['label'] == label][1::2]  # 특정 라벨에 해당하는 데이터 선택
    # 히트맵 초기화
    heatmap = np.zeros(HEATMAP_SIZE, dtype=np.int16)

    transform = from_origin(0, img_height, 1, 1)  # 좌상단 (0, img_height), 1픽셀당 1단위

    for points in tqdm(points_list['points'], total=len(points_list['points'])):
        polygon = Polygon(points)  # points를 Polygon으로 변환
        mask = geometry_mask([polygon], transform=transform, invert=True, out_shape=HEATMAP_SIZE)  # 다각형 마스크 변환
        heatmap += mask.astype(np.int16)
        heatmap_all += mask.astype(np.int16)


plt.figure(figsize=(10, 8))
plt.imshow(heatmap_all, cmap=custom_cmaps[2], interpolation="nearest")
plt.colorbar(label="Count of Points")
plt.gca().invert_yaxis()
plt.axis('off')
plt.title('wrist_L')
# plt.savefig(f'./EDA/heatmap_L_wrist.png')

plt.show()


