## The preprocessing of THOR data
>author: Tao He

>Institution: Sichuan University

>email: taohe@stu.scu.edu.cn

>copyright: machine intelligence laboratory

### the source data download from https://ent.normandie-univ.fr/filex/get?k=oZgYIeT5lnbxhtHZ2u8
### the data path organized like: 
```
../data/data_source/Patient_01/GT.nii
../data/data_source/Patient_01/Patient_01.nii
```
### or download from my Baidu Netdisk https://pan.baidu.com/s/1dQHYKIkUd5qCXIvdxSijNg; password: i41q
### using this data, you should reference
>@INPROCEEDINGS{trullo17isbi,

>author = {Roger Trullo and C. Petitjean and Su Ruan and
Bernard Dubray and Dong Nie and Dinggang Shen},

>title = {Segmentation of Organs at Risk in Thoracic {CT} images using a SharpMask Architecture and Conditional Random Fields},

>booktitle = {IEEE 14th International Symposium on Biomedical Imaging (ISBI)},

>pages = {1003--1006},

>year = {2017}
}

### processing the THOR data

In [1]:
import numpy as np
import torch
import os
import time
import cv2
import nibabel as nib
import pdb


def truncated_range(img):
    max_hu = 384
    min_hu = -384
    img[np.where(img > max_hu)] = max_hu
    img[np.where(img < min_hu)] = min_hu
    return (img - min_hu) / (max_hu - min_hu) * 255.


path = '../data/data_source/'
save_path = '../data/data_npy/'

if not os.path.exists(save_path):
    os.makedirs(save_path)

files = os.listdir(path)
count = 0
print('begin processing data')

means = []
stds = []
total_imgs = []

for i, volume in enumerate(files):
    cur_file = os.path.join(path, volume)
    print(i, cur_file)
    cur_save_path = os.path.join(save_path, volume)
    if not os.path.exists(cur_save_path):
        os.makedirs(cur_save_path)
    img = nib.load(os.path.join(cur_file, volume + '.nii'))
    img = np.array(img.get_data())
    label = nib.load(os.path.join(cur_file, 'GT.nii'))
    label = np.array(label.get_data())
    img = truncated_range(img)

    for idx in range(img.shape[2]):
        if idx == 0 or idx == img.shape[2] - 1:
            continue
        # 2.5D data, using adjacent 3 images
        cur_img = img[:, :, idx - 1:idx + 2].astype('uint8')
        total_imgs.append(cur_img)
        cur_label = label[:, :, idx].astype('uint8')
        count += 1
#         np.save(
#             os.path.join(cur_save_path,
#                          volume + '_' + str(idx) + '_image.npy'), cur_img)
#         np.save(
#             os.path.join(cur_save_path,
#                          volume + '_' + str(idx) + '_label.npy'), cur_label)
    
    total_imgs = np.stack(total_imgs, 3) / 255.
    means.append(np.mean(total_imgs))
    stds.append(np.std(total_imgs))
    total_imgs = []

print('data mean is %f' % np.mean(means))
print('data std is %f' % np.std(stds))
print('total data size is %f' % count)
print('processing data end !')

begin processing data
0 ../data/data_source/Patient_19
1 ../data/data_source/Patient_34
2 ../data/data_source/Patient_32
3 ../data/data_source/Patient_35
4 ../data/data_source/Patient_06
5 ../data/data_source/Patient_14
6 ../data/data_source/Patient_10
7 ../data/data_source/Patient_37
8 ../data/data_source/Patient_07
9 ../data/data_source/Patient_01
10 ../data/data_source/Patient_22
11 ../data/data_source/Patient_40
12 ../data/data_source/Patient_20
13 ../data/data_source/Patient_11
14 ../data/data_source/Patient_13
15 ../data/data_source/Patient_26
16 ../data/data_source/Patient_28
17 ../data/data_source/Patient_21
18 ../data/data_source/Patient_02
19 ../data/data_source/Patient_36
20 ../data/data_source/Patient_15
21 ../data/data_source/Patient_12
22 ../data/data_source/Patient_33
23 ../data/data_source/Patient_38
24 ../data/data_source/Patient_16
25 ../data/data_source/Patient_08
26 ../data/data_source/Patient_24
27 ../data/data_source/Patient_05
28 ../data/data_source/Patient_31
29