#  sarcopenia preprocess

### 준비 사항
- dcm 파일 혹은 img
- dcm인 경우 몇 번째 dcm이 l3인지 알려주는 정보
- mask 파일
- 

## 포맷

- [segmentation_models_pytorch](https://segmentation-modelspytorch.readthedocs.io/en/latest/) 라이브러리를 채택
- 위 라이브러리의 인풋과 아웃풋을 따름

```md
code_home
├── annotations
│   ├── trimaps
│   └── xmls
└── images

```

- annotations/trimaps: PNG files, 0(배경) or 1(obj)로 전처리하여 사용
- annotations/xmls: 이미지 메타 정보
- images: JPG images


## Training set 처리

```md
train_data_home
├── SarcopeniaCase1
│   ├── 20061220_DCM
│   ├── 20061220_ROI
│   ├── 20061220_RAW.txt
│   ├── [날짜]_DCM
│   ├── [날짜]_ROI
│   └── [날짜]_RAW.txt
├── SarcopeniaCase2
├── SarcopeniaCase3
└── SarcopeniaCase4
```

In [1]:
import os
from glob import glob
from PIL import Image
import pydicom
import numpy as np

### image 처리

#### img 처리 함수들

In [2]:
def norm_img(img):
    norm_image = np.array(img, dtype=np.float64)
    norm_image -= np.min(norm_image)
    norm_image /= np.max(norm_image)
    normalized_image = norm_image * 255
    normalized_image = normalized_image.astype(np.uint8)
    return normalized_image
    
def dcm_windowing(ds, center=40, width=400):
    dcm_array = ds.pixel_array
    hu_image = dcm_array * ds.RescaleSlope + ds.RescaleIntercept
    # try:
    #     hu_image = dcm_array * dcm.RescaleSlope + dcm.RescaleIntercept
    # except AttributeError:
    #     hu_image = dcm_array
    # hu_image[hu_image < -1024] = -1024

    # img_min = dcm.WindowCenter - dcm.WindowWidth // 2
    # img_max = dcm.WindowCenter + dcm.WindowWidth // 2
    img_min = center - width // 2
    img_max = center + width // 2
    
    window_image = hu_image.copy()
    window_image[window_image < img_min] = img_min
    window_image[window_image > img_max] = img_max

    normalized_img = norm_img(window_image)
    p_image = Image.fromarray(normalized_img)
    p_image = p_image.resize((512, 512))
    
    return p_image



#### 원본 이미지 저장(train_data)

In [3]:
train_data2_home = "C:\\Users\\qwe14\\0.code\\sarcopenia\\data_train_raw\\TrainingData2차_raw\\"
train_img_save_path2 = "C:\\Users\\qwe14\\0.code\\sarcopenia\\data_train\\images_2\\"

In [5]:
train_data_dirs = os.listdir(train_data2_home)
data_numbering = 0
relation_table = ""
error_table = ""

for train_dir in train_data_dirs:
    dir_path = os.path.join(train_data2_home, train_dir)
    data_instances_path = glob(f"{dir_path}\\*")
    for data_path in data_instances_path:
        data_abs_path = "\\".join(data_path.split("\\")[-2:])
        data_numbering += 1
        try:
            dcms = glob(f"{data_path}\\*.dcm")
            dcm_path = dcms[0]
        except IndexError as e:
            error_table += f"FileNotFound(*.dcm): {data_path}\n"
        if len(dcms) > 1:
            error_table += f"Many dcms: {data_path}\n"
            
        try:
            ds = pydicom.dcmread(dcm_path)
        except pydicom.errors.InvalidDicomError as e:
            error_table += f"InvalidDicom: {dcm_path}\n"

        jpg_path = os.path.join(train_img_save_path2, f"{data_numbering:05d}.jpg")

        generate_jpg = jpg_path.split("\\")[-1]
        relation_table += f"{data_abs_path}\t{generate_jpg}\n"
        img_array = dcm_windowing(ds)
        img_array.save(jpg_path)
    with open("data_relation.txt", "w", encoding='utf-8') as f:
        f.write(relation_table)
    with open("data_error.txt", "w", encoding='utf-8') as f:
        f.write(error_table)

### annotation 처리

In [4]:
import numpy as np
import copy
from PIL import Image
from glob import glob

In [5]:
data_home2 = "C:\\Users\\qwe14\\0.code\\sarcopenia\\data_train_raw\\TrainingData2차_raw\\"
annot_home2 = "C:\\Users\\qwe14\\0.code\\sarcopenia\\data_train\\annotations_2\\"

In [6]:
with open("data_relation.txt", "r", encoding='utf-8') as f:
    lines = f.readlines()

err_msg = ""
train_annot_data = ""
for line in lines:
    # 200307_L3_External validation_MASK\sarc-10001	00001.jpg
    dcm_dir_path, jpg_path = line.strip().split("\t")
    mask_home = f"{data_home2}\\{dcm_dir_path}\\*.png"
    dcm_mask_paths = glob(mask_home)

    mask_map_list = []
    for dcm_mask_path in dcm_mask_paths:
        image_mask_map = np.array(Image.open(dcm_mask_path))
        np_mask_map = image_mask_map.astype(np.int8)
        mask_map_list.append(np_mask_map)
    try:
        if((mask_map_list[0].shape == mask_map_list[1].shape) and (mask_map_list[1].shape == mask_map_list[2].shape)):
            class_d = ["S", "M", "V"]
            for i, class_init in enumerate(class_d):
                mask_img = np.zeros_like(mask_map_list)
                if i != 2:
                    mask_img = mask_map_list[i] - mask_map_list[i + 1]
                else:
                    mask_img = mask_map_list[i]
                mask_img[mask_img == 100] = 255
                mask_img = mask_img.astype(np.int8)
                print(i, np.unique(mask_img))
                p_image = Image.fromarray(mask_img)
                p_image = p_image.resize((512, 512))
                new_annot_img = f"{annot_home2}{class_init}{jpg_path}"
                p_image = p_image.convert('RGB')
                p_image.save(new_annot_img)
                train_annot_data += f"{class_init}{jpg_path} {i+1} {i+1} {1}\n"
        else:
            err_msg += f"Mask Shape Difference:\t{dcm_dir_path}\n"
    except IndexError:
        err_msg += f"Not Enough Mask:\t{dcm_dir_path}\n"
    
with open("annot_error.txt", "w", encoding='utf-8') as f:
    f.write(err_msg)
with open("annot_train.txt", "w", encoding='utf-8') as f:
    f.write(train_annot_data)

For the old behavior, usually:
    np.array(value).astype(dtype)
will give the desired result (the cast overflows).
  mask_img[mask_img == 100] = 255


0 [-1  0]
1 [-100   -1    0]
2 [-1  0]
0 [-1  0]
1 [-100   -1    0]
2 [-1  0]
0 [-1  0]
1 [-1  0]
2 [-1  0]
0 [-1  0]
1 [-1  0]
2 [-1  0]
0 [-1  0]
1 [-1  0]
2 [-1  0]
0 [-1  0]
1 [-1  0]
2 [-1  0]
0 [-1  0]
1 [-1  0]
2 [-1  0]
0 [-1  0]
1 [-1  0]
2 [-1  0]
0 [-1  0]
1 [-1  0]
2 [-1  0]
0 [-1  0]
1 [-1  0]
2 [-1  0]
0 [-1  0]
1 [-1  0]
2 [-1  0]
0 [-1  0]
1 [-1  0]
2 [-1  0]
0 [-1  0]
1 [-1  0]
2 [-1  0]
0 [-1  0]
1 [-1  0]
2 [-1  0]
0 [-1  0]
1 [-1  0]
2 [-1  0]
0 [-1  0]
1 [-1  0]
2 [-1  0]
0 [-1  0]
1 [-1  0]
2 [-1  0]
0 [-1  0]
1 [-1  0]
2 [-1  0]
0 [-1  0]
1 [-100   -1    0]
2 [-1  0]
0 [-1  0]
1 [-1  0]
2 [-1  0]
0 [-1  0]
1 [-1  0]
2 [-1  0]
0 [-1  0]
1 [-1  0]
2 [-1  0]
0 [-1  0]
1 [-1  0]
2 [-1  0]
0 [-1  0]
1 [-1  0]
2 [-1  0]
0 [-1  0]
1 [-100   -1    0]
2 [-1  0]
0 [-1  0]
1 [-1  0]
2 [-1  0]
0 [-1  0]
1 [-1  0]
2 [-1  0]
0 [-1  0]
1 [-1  0]
2 [-1  0]
0 [-1  0]
1 [-1  0]
2 [-1  0]
0 [-1  0]
1 [-1  0]
2 [-1  0]
0 [-1  0]
1 [-1  0]
2 [-1  0]
0 [-1  0]
1 [-1  0]
2 [-1  0]
0 [-

----

----

## 검증

In [31]:
annot_home2 = "C:\\Users\\qwe14\\0.code\\sarcopenia\\data_train\\annotations_2\\"
path = f"{annot_home2}*.jpg"
jpg_list = glob(path)
for jpg in jpg_list:
    image_mask_map = np.array(Image.open(dcm_mask_path))
    np_mask_map = image_mask_map.astype(np.int8)

[  0 100] C:\Users\qwe14\0.code\sarcopenia\data_train\annotations_2\M00001.jpg
[  0 100] C:\Users\qwe14\0.code\sarcopenia\data_train\annotations_2\M00002.jpg
[  0 100] C:\Users\qwe14\0.code\sarcopenia\data_train\annotations_2\M00003.jpg
[  0 100] C:\Users\qwe14\0.code\sarcopenia\data_train\annotations_2\M00004.jpg
[  0 100] C:\Users\qwe14\0.code\sarcopenia\data_train\annotations_2\M00005.jpg
[  0 100] C:\Users\qwe14\0.code\sarcopenia\data_train\annotations_2\M00006.jpg
[  0 100] C:\Users\qwe14\0.code\sarcopenia\data_train\annotations_2\M00007.jpg
[  0 100] C:\Users\qwe14\0.code\sarcopenia\data_train\annotations_2\M00008.jpg
[  0 100] C:\Users\qwe14\0.code\sarcopenia\data_train\annotations_2\M00009.jpg
[  0 100] C:\Users\qwe14\0.code\sarcopenia\data_train\annotations_2\M00010.jpg
[  0 100] C:\Users\qwe14\0.code\sarcopenia\data_train\annotations_2\M00011.jpg
[  0 100] C:\Users\qwe14\0.code\sarcopenia\data_train\annotations_2\M00012.jpg
[  0 100] C:\Users\qwe14\0.code\sarcopenia\data_trai

In [None]:
annot_home2 = "C:\\Users\\qwe14\\0.code\\sarcopenia\\data_train\\annotations_2\\"
path = f"{annot_home2}*.jpg"
jpg_list = glob(path)
for jpg in jpg_list:
    image_mask_map = np.array(Image.open(dcm_mask_path))
    np_mask_map = image_mask_map.astype(np.int8)
    print(np.unique(np_mask_map), jpg)