In [1]:
import numpy as np
import cv2
import nibabel as nib
import pydicom
import os
import glob

# Folder Structure Before Processing

Get the Dataset folder from https://github.com/ngaggion/HybridGNet which has the txt files and the groundthruth segmentation in graph format. Register at the Japanese Society of Radiological Technology website (http://db.jsrt.or.jp/eng.php) and download the .zip file containing all 247 images. Unzip the images on the Dataset/All247images folder

```
Dataset
│   test_files.txt
│   train_files.txt
│   val_files.txt
│
└───Test
│   │
│   └───landmarks
│       │   JPCLNXXX.npy
│       │   ...
│   
└───Train
│   │
│   └───landmarks
│       │   JPCLNXXX.npy
│       │   ...
│   
└───Validation
│   │
│   └───landmarks
│       │   JPCLNXXX.npy
│       │   ...
│
└───All247images
│   │ JPCLNXXX.npy
│   │   ...

```

In [2]:
def drawBinary(img, organ, color):
    contorno = organ.reshape(-1, 1, 2)

    contorno = contorno.astype('int')
    
    img = cv2.drawContours(img, [contorno], -1, color, -1)
    
    return img

In [3]:
def reverseVector(vector):
    RLUNG = 44
    LLUNG = 50
    HEART = 26
    RCLAV = 23
    #LCLAV = 23
    
    p1 = RLUNG*2
    p2 = p1 + LLUNG*2
    p3 = p2 + HEART*2
    p4 = p3 + RCLAV*2
    
    rl = vector[:p1].reshape(-1,2)
    ll = vector[p1:p2].reshape(-1,2)
    h = vector[p2:p3].reshape(-1,2)
    rc = vector[p3:p4].reshape(-1,2)
    lc = vector[p4:].reshape(-1,2)
    
    return rl, ll, h, rc, lc

In [4]:
def getSeg(landmarks):
    leftlung, rightlung, heart, rc, lc = reverseVector(landmarks.reshape(-1))

    raw = np.zeros([1024,1024])
    
    raw = drawBinary(raw, leftlung, 50)
    raw = drawBinary(raw, rightlung, 100)
    
    raw = drawBinary(raw, heart, 200)
    
    raw = drawBinary(raw, rc, 180)
    raw = drawBinary(raw, lc, 190)
    
    return raw

In [5]:
def store_output(

        path_input: str,
        img: np.ndarray,
        ext: str,
        str_prefix: str = "seg_",
        organ: str = "",
    ) -> str:
        def store_name(path_input: str, ext: str):
            file_name_wo_ext = os.path.splitext(os.path.basename(path_input))[0]
            file_name = str_prefix + organ + file_name_wo_ext + ext
            parent_path = os.path.dirname(path_input).replace('landmarks', 'segments').replace('Val', 'Train')
            os.makedirs(parent_path, exist_ok = True)
            return os.path.join(parent_path, file_name)

        recognised_ext = [".png", ".jpg", ".jpeg", ".dcm", ".nii.gz"]
        if ext not in recognised_ext:
            raise ValueError(
                f"{ext} not recognised. Consider using {recognised_ext} for output extensions"
            )

        out_path = store_name(path_input, ext)
        if img.max() <= 1:
            img *= 255
        if ext == ".png" or ext == ".jpg" or ext == ".jpeg":
            cv2.imwrite(out_path, img)

        elif ext == ".dcm":
            seg_dataset = store_dcmseg(
                source_image=pydicom.dcmread(path_input), seg_img=img, instance_number=1
            )
            seg_dataset.save_as(out_path)

        elif ext == ".nii.gz":
            nifti_img = nib.Nifti1Image(img, np.eye(4))
            nib.save(nifti_img, out_path)

        return out_path


In [6]:
train_npy_files = glob.glob("/home/ubuntu/nnUNet/JSRT/Dataset/Train/landmarks/*.npy")
val_npy_files = glob.glob("/home/ubuntu/nnUNet/JSRT/Dataset/Val/landmarks/*.npy")
test_npy_files = glob.glob("/home/ubuntu/nnUNet/JSRT/Dataset/Test/landmarks/*.npy")
all_npy_files = train_npy_files+ val_npy_files+ test_npy_files

In [7]:
for file in all_npy_files:
    landmarks = np.load(file)
    landmarks = landmarks.astype('float').reshape(-1, 2)
    store_output(file, getSeg(landmarks), '.png', str_prefix='')

In [8]:
def preprocess(folderpath, flist):
    os.makedirs(folderpath, exist_ok = True)
    
    for f in flist:
        p = os.path.join('All247images', f)
        
        w, h = 2048, 2048 

        with open(p, 'rb') as path: 
            dtype = np.dtype('>u2')
            img = np.fromfile(path, dtype=dtype).reshape((h,w)) 

        img = 1 - img.astype('float')  / 4096
        img = cv2.resize(img, (1024,1024))
        img = img*255
       
        p = os.path.join(folderpath, f.replace('.IMG','.png'))
        cv2.imwrite(p, img.astype('uint8'))

In [9]:
trainlist = open('train_files.txt','r').read().splitlines()
trainpath = "Train/Images"
preprocess(trainpath, trainlist)

print("Training images preprocessed")

vallist = open('val_files.txt','r').read().splitlines()
valpath = "Train/Images"
preprocess(valpath, vallist)

print("Validation images preprocessed")

testlist = open('test_files.txt','r').read().splitlines()
testpath = "Test/Images"
preprocess(testpath, testlist)

print("Test images preprocessed")

Training images preprocessed
Validation images preprocessed
Test images preprocessed


# Folder Structure After Processing
```
Dataset
│   test_files.txt
│   train_files.txt
│   val_files.txt
│
└───Test
│   │
│   └───landmarks
│   │   │   JPCLNXXX.npy
│   │   │   ...
│   │ 
│   └───segments
│   │    │   JPCLNXXX.png
│   │    │   ...
│   │ 
│   └───Images
│       │   JPCLNXXX.png
│       │   ...
│   
└───Train
│   │
│   └───landmarks
│   │   │   JPCLNXXX.npy
│   │   │   ...
│   │ 
│   └───segments
│   │    │   JPCLNXXX.png
│   │    │   ...
│   │ 
│   └───Images
│       │   JPCLNXXX.png
│       │   ...
│
└───Validation
│   │
│   └───landmarks
│       │   JPCLNXXX.npy
│       │   ...
│
└───All247images
│   │ JPCLNXXX.IMG
│   │   ...

```