# stage_2_test
* dataframe에 position2값에 기반한 slice_id 추가
* Dicom파일을 png로 변환

### 데이터 읽기

In [1]:
import pandas as pd
from tqdm import tqdm
import os

path = '../dataset/stage_2_test_cls.csv'
# csv 확인
df = pd.read_csv(path)
df.head(5)

Unnamed: 0,filename,epidural,intraparenchymal,intraventricular,subarachnoid,subdural,any,patient_id,study_instance_uid,series_instance_uid,image_position,samples_per_pixel,pixel_spacing,pixel_representation,window_center,window_width,rescale_intercept,rescale_slope
0,ID_57c3c0e68,0,0,0,0,0,0,ID_6be49c67,ID_73f1f42302,ID_9c277b7ad1,"[-126.408875, -126.408875, -231.713654]",1,"[0.494750976563, 0.494750976563]",1,35.000000,135.000000,-1024.0,1.0
1,ID_a10185368,0,0,0,0,0,0,ID_13a98073,ID_52b738ab7b,ID_c7595b5b3f,"[-125.000, -108.000, 62.500]",1,"[0.488281, 0.488281]",1,40,150,-1024.0,1.0
2,ID_15c931500,0,0,0,0,0,0,ID_98b1a444,ID_46850e15b0,ID_16e0e1d2a1,"[-125.000, -141.384, 80.664]",1,"[0.488281, 0.488281]",1,30,80,-1024.0,1.0
3,ID_a3bac3e6b,0,0,0,0,0,0,ID_007e7be5,ID_154e0d735f,ID_091f4b8f2c,"[-125.000, -129.423, 150.908]",1,"[0.488281, 0.488281]",1,30,80,-1024.0,1.0
4,ID_28feed104,0,0,0,0,0,0,ID_ac5d1815,ID_7dd2a00ba1,ID_01f06c7cb6,"[-116.5, 2.5, 157.900024]",1,"[0.455078125, 0.455078125]",0,"[00036, 00036]","[00080, 00080]",-1024.0,1.0


### Dataframe 재구성 (slice id, position2 값 추가)

In [20]:
from tqdm import tqdm

uids = df.study_instance_uid.unique()
header=['filename', 'epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural', 'any', 'patient_id', 'study_instance_uid', 'series_instance_uid', 'image_position', 'samples_per_pixel', 'pixel_spacing', 'pixel_representation', 'window_center', 'window_width', 'rescale_intercept', 'rescale_slope', 'Position2', 'slice_id']
train_df = pd.DataFrame(columns=header)

for uid in tqdm(uids, position=0, leave=True):
    
    # 환자단위 dataframe가져오기
    tmp_df = df[df['study_instance_uid']==uid].copy()
    
    # position2를 기준으로 정렬
    position2 = tmp_df.image_position.apply(lambda x:float(x.split(',')[2][1:-1]))
    tmp_df['Position2'] = position2
    tmp_df = tmp_df.sort_values(by='Position2')
    
    # slice id 추가
    tmp_df['slice_id']=range(len(position2))
    
    train_df = pd.concat([train_df,tmp_df])

100%|██████████| 3518/3518 [03:56<00:00, 14.90it/s]


In [23]:
train_df.head()

Unnamed: 0,filename,epidural,intraparenchymal,intraventricular,subarachnoid,subdural,any,patient_id,study_instance_uid,series_instance_uid,image_position,samples_per_pixel,pixel_spacing,pixel_representation,window_center,window_width,rescale_intercept,rescale_slope,Position2,slice_id
120749,ID_19af5cb54,0,0,0,0,0,0,ID_6be49c67,ID_73f1f42302,ID_9c277b7ad1,"[-126.408875, -126.408875, -296.713654]",1,"[0.494750976563, 0.494750976563]",1,35.0,135.0,-1024.0,1.0,-296.713654,0
66005,ID_2d2d59cab,0,0,0,0,0,0,ID_6be49c67,ID_73f1f42302,ID_9c277b7ad1,"[-126.408875, -126.408875, -291.713654]",1,"[0.494750976563, 0.494750976563]",1,35.0,135.0,-1024.0,1.0,-291.713654,1
75358,ID_ade117c6b,0,0,0,0,0,0,ID_6be49c67,ID_73f1f42302,ID_9c277b7ad1,"[-126.408875, -126.408875, -286.713654]",1,"[0.494750976563, 0.494750976563]",1,35.0,135.0,-1024.0,1.0,-286.713654,2
80454,ID_91530d96c,0,0,0,0,0,0,ID_6be49c67,ID_73f1f42302,ID_9c277b7ad1,"[-126.408875, -126.408875, -281.713654]",1,"[0.494750976563, 0.494750976563]",1,35.0,135.0,-1024.0,1.0,-281.713654,3
6434,ID_5c87b6d54,0,0,0,0,0,0,ID_6be49c67,ID_73f1f42302,ID_9c277b7ad1,"[-126.408875, -126.408875, -276.713654]",1,"[0.494750976563, 0.494750976563]",1,35.0,135.0,-1024.0,1.0,-276.713654,4


In [25]:
# 저장
train_df.to_csv('../dataset/stage_2_test_with_sliceN.csv', index=None)

### 이미지 파일로 저장

In [2]:
# dicom 이미지 변환 및 확인
import pydicom
import matplotlib.pyplot as plt
import numpy as np
import PIL

def save_img(img, save_dir):
    if len(img.shape)==2:
        im = PIL.Image.fromarray((img).astype(np.int8), mode='L')
    else:
        im = PIL.Image.fromarray((img).astype(np.int8), mode='RGB')
    im.save(save_dir)

def get_image_8bits(img, window_center, window_width):

    img_min = window_center - window_width//2
    img_max = window_center + window_width//2
    img[img<img_min] = img_min
    img[img>img_max] = img_max
    img = normalize_minmax(img).astype(np.int)
    
    return img

def normalize_minmax(img):
    mi, ma = img.min(), img.max()
    return (img - mi) / (ma - mi) * 255

def get_image_24bits(img, window_center, window_width):
    
    imgs = []
    for i in range(len(window_center)):
        imgs.append(get_image_8bits(img.copy(), window_center[i], window_width[i]))
    imgs = np.array(imgs)
    
    return imgs.transpose(1,2,0)

def window_image(img, window_center, window_width, intercept, slope):
    
    img = (img * slope + intercept)
    
    if type(window_center) is int:
        return get_image_8bits(img, window_center, window_width)
    
    if len(window_center)==1 and len(window_width)==1:
        return get_image_8bits(img, window_center[0], window_width[0])
        
    elif len(window_center)==3 and len(window_width)==3:
        return get_image_24bits(img, window_center, window_width)
    
    else:
        raise ValueError('Window Center, Width is not supported value [{},{}]'.format(window_center, window_width))

        
    return img 

def get_first_of_dicom_field_as_int(x):
    if type(x) == pydicom.multival.MultiValue:
        return int(x[0])
    else:
        return int(x)

def get_meta_from_dicom(data):
    dicom_fields = [data[('0028','1050')].value, #window center
                    data[('0028','1051')].value, #window width
                    data[('0028','1052')].value, #intercept
                    data[('0028','1053')].value] #slope
    return [get_first_of_dicom_field_as_int(x) for x in dicom_fields]

def get_img_from_dicom_dir(dicom_dir, window=False):
    dicom = pydicom.read_file(dicom_dir)
    meta = get_meta_from_dicom(dicom)
    
    if not window or not window[0]:
        dcm_img = window_image(dicom.pixel_array, *meta)
    else:
        dcm_img = window_image(dicom.pixel_array, window[0], window[1], meta[2], meta[3])
    
    return dcm_img

window_index = 2
windows = [
    ([],[]), # 0번: dicom 내의 window center, window width 사용
    ([40],[80]), # 1번: 1채널로 window center, width를 각각 40, 80으로 사용
    ([40, 80, 600], [80, 200, 2800]) # 2번: 3채널로 (40,80), (80,200), (600,2800)으로 사용
]
window = windows[window_index]

In [3]:
train_df = pd.read_csv('../dataset/stage_2_test_with_sliceN.csv')

img_save_dir = os.path.expanduser('../../dataset/kaggle_rsna_stage_2_test/')
train_dicom_dir = os.path.expanduser('../../rsna-intracranial-hemorrhage-detection/stage_2_test/')

for fn in tqdm(train_df.filename, position=0, leave=True):
    fn_dcm = fn+'.dcm'
    fn_png = fn+'.png'
    img = get_img_from_dicom_dir(train_dicom_dir+fn_dcm, window)
    save_img(img, img_save_dir+fn_png)
    

100%|██████████| 121232/121232 [1:46:32<00:00, 18.96it/s]
