# DRAC2022 Task 1 Experiment Notebook
* creator: Jungrae Cho (team: FindDR)
* created: November 25 2022

This notebook train a sub-task of Task 1 in DRAC2022.

Sub-task contains IRMAs, non-perfusion area, and neovascularization.

Therefore total 3 models is expected to be generated for each sub-task.

And each trained model is expected to produce a submission file; `1.nii.gz`, `2.nii.gz` and `3.nii.gz`.

# Prerequisites

In [None]:
!git clone https://github.com/zhuanjiao2222/DRAC2022.git
!git clone https://github.com/MIC-DKFZ/nnUNet.git
    
!cp nnUNetTrainerV2_20epochs.py ./nnUNet/nnunet/training/network_training/nnUNet_variants/benchmarking/
!cp nnUNetTrainerV2_3ConvPerStage_20epochs.py ./content/nnUNet/nnunet/training/network_training/nnUNet_variants/architectural_variants/

!pip install natsort
!pip install -e /content/nnUNet

After installation, restart the notebook.

# Import packages

In [None]:
import shutil
import pathlib
import zipfile
import os
import cv2
import SimpleITK

import matplotlib.pyplot as plt
import numpy as np

from datetime import datetime
from pytz import timezone
from tqdm.notebook import tqdm
from glob import glob
from PIL import Image
from urllib import request
from nnunet.dataset_conversion.utils import generate_dataset_json
from nnunet.utilities.file_conversions import convert_2d_segmentation_nifti_to_img

# Structure data

Download DRAC2022 dataset checkt out its directory structure.

Dataset structure should be like this:
```
./DRAC2022_Data_Set/
    ├── DRAC2022_Testing_Set
    │   ├── A. Segmentation
    │   │   └── 1. Original Images
    │   │       └── b. Testing Set
    │   ├── B. Image Quality Assessment
    │   │   └── 1. Original Images
    │   │       └── b. Testing Set
    │   └── C. Diabetic Retinopathy Grading
    │       └── 1. Original Images
    │           └── b. Testing Set
    └── DRAC2022_Training_Set
        ├── A. Segmentation
        │   ├── 1. Original Images
        │   │   └── a. Training Set
        │   └── 2. Groundtruths
        │       └── a. Training Set
        │           ├── 1. Intraretinal Microvascular Abnormalities
        │           ├── 2. Nonperfusion Areas
        │           └── 3. Neovascularization
        ├── B. Image Quality Assessment
        │   ├── 1. Original Images
        │   │   └── a. Training Set
        │   └── 2. Groundtruths
        └── C. Diabetic Retinopathy Grading
            ├── 1. Original Images
            │   └── a. Training Set
            └── 2. Groundtruths
```

In [None]:
data_root = "./DRAC2022_Data_Set/" # download and specify DRAC2022 dataset directory path

img_paths = glob(os.path.join(data_root, "*","*","*","*","*.png")) # path of images
seg_paths = glob(os.path.join(data_root, "*","*","*","*","*","*.png")) # path of labels

csv_paths = glob(os.path.join(data_root, "*","*","*","*.csv")) # path of labels

print("# of images: ", len(img_paths))
print("# of CSV files: ",len(csv_paths))
print("# of segmentation mask images: ", len(seg_paths))
print("Example of image path: ", img_paths[0])

In [None]:
tasks = []
data_splits = []
for img_path in img_paths:
    parser = img_path.split("/")
    task = parser[-4]
    data_split = parser[-2]
    tasks.append(task)
    data_splits.append(data_split)
tasks = sorted(list(set(tasks)))
print("Tasks of DRAC2022:")
print(tasks)

In [None]:
seg_img_paths = {
    "train":[],
    "test":[]
}
for img_path in img_paths:
    if tasks[0] in img_path:
        if data_splits[0] in img_path:
            seg_img_paths["train"].append(img_path)
        else:
            seg_img_paths["test"].append(img_path)

print("# of data for Task 1:")
for k, v in seg_img_paths.items():
    print(k,":", len(v))

# Create nnUNet-style dataset

In [None]:
def arr2nii(data, filename, reference_name=None):
    img = SimpleITK.GetImageFromArray(data)
    if (reference_name is not None):
        img_ref = SimpleITK.ReadImage(reference_name)
        img_ref = img_ref[...,:1]
        img.CopyInformation(img_ref)
    SimpleITK.WriteImage(img, filename)

def get_sub_paths(dataset_path):
    images_train_dir = os.path.join(dataset_path, 'imagesTr')
    images_test_dir = os.path.join(dataset_path, 'imagesTs')
    labels_train_dir = os.path.join(dataset_path, 'labelsTr')

    return images_train_dir, images_test_dir, labels_train_dir

def create_dirs(dataset_path):
    os.makedirs(dataset_path,exist_ok=True)
    images_train_dir, images_test_dir, labels_train_dir = get_sub_paths(dataset_path)

    os.makedirs(images_train_dir,exist_ok=True)
    os.makedirs(images_test_dir,exist_ok=True)
    os.makedirs(labels_train_dir,exist_ok=True)

    print('Directory structure created for dataset')

def create_dataset(dataset_path):
    mask_paths = [seg_path for seg_path in seg_paths if int(seg_path.split("/")[-2][0]) == int(dataset_path.split("Task70")[-1][0])]
    mask_paths = sorted(mask_paths, reverse=False, key=lambda x: int(x.split("/")[-1][:-4]))
    images_train_dir, images_test_dir, labels_train_dir = get_sub_paths(dataset_path)

    new_img_number = 0
    for mask_path in tqdm(mask_paths):
        mask_number = mask_path.split("/")[-1][:-4]
        for img_path in seg_img_paths["train"]:
            img_number = img_path.split("/")[-1][:-4]
            if mask_number == img_number:
                img = cv2.imread(img_path, -1)
                mask = cv2.imread(mask_path, -1)

                img = cv2.resize(img, (512,512))
                mask = cv2.resize(mask, (512,512))

                mask = (mask // 255).astype(np.int32)

                img = np.expand_dims(img, 0)
                mask = np.expand_dims(mask, 0)

                img = np.transpose(img, (0,2,1))
                mask = np.transpose(mask, (0,2,1))

                img_number = int(img_number)
                file_name = f"DRAC2022_{img_number:03}"
                new_img_path = os.path.join(images_train_dir, file_name+"_0000.nii.gz")
                new_mask_path = os.path.join(labels_train_dir, file_name+".nii.gz")
                
                arr2nii(img, new_img_path)
                arr2nii(mask, new_mask_path)

                new_img_number += 1

                break

    for img_path in tqdm(seg_img_paths["test"]):
        img_number = int(img_path.split("/")[-1][:-4])
        file_name = f"DRAC2022_{img_number:03}_0000.nii.gz"
        new_img_path = os.path.join(images_test_dir, file_name)

        img = cv2.imread(img_path, -1)
        img = cv2.resize(img, (512,512))
        img = np.expand_dims(img, 0)
        img = np.transpose(img, (0,2,1))
        arr2nii(img, new_img_path)

        new_img_number += 1

In [None]:
here = pathlib.Path('.').resolve()

nnUNet_raw_data_base = here.joinpath('nnUNet_raw_data_base')
nnUNet_preprocessed = here.joinpath('nnUNet_preprocessed')
results_folder = here.joinpath('results')

raw_data_dir = nnUNet_raw_data_base.joinpath('nnUNet_raw_data')
raw_data_dir.mkdir(exist_ok=True, parents=True)

In [None]:
seg_tasks = ["1_IRMA","2_NonPerfusion", "3_Neovascularization"]

dataset_paths = [os.path.join(raw_data_dir, f'Task70{seg_task}')for seg_task in seg_tasks]
print("Sub-tasks of Task 1:")
print(seg_tasks,"\n")

print("Directory path of sub-tasks: ", dataset_paths)

In [None]:
for dataset_path in dataset_paths:
    create_dirs(dataset_path)
    create_dataset(dataset_path)

# Preprocess data and generate a nnUNet plan

In [None]:
TASK_TO_TRAIN = 0 # 0: 'Task701_IRMA', 1: 'Task702_NonPerfusion' 2: 'Task703_Neovascularization'

In [None]:
os.environ["nnUNet_raw_data_base"] = str(nnUNet_raw_data_base)
os.environ["nnUNet_preprocessed"] = str(nnUNet_preprocessed)
os.environ["RESULTS_FOLDER"] = str(results_folder)

task_names = sorted(os.listdir(raw_data_dir)) # ['Task701_IRMA', 'Task702_NonPerfusion', 'Task703_Neovascularization']
task_name = task_names[TASK_TO_TRAIN] 
task_number = task_name.split("_")[0].split("Task")[-1]
target_base = os.path.join(raw_data_dir, task_name)
target_imagesTr = os.path.join(target_base, "imagesTr")
target_imagesTs = os.path.join(target_base, "imagesTs")
target_labelsTs = os.path.join(target_base, "labelsTs")
target_labelsTr = os.path.join(target_base, "labelsTr")

print("Current sub-task: ", task_name)

In [None]:
target_imagesTs

In [None]:
generate_dataset_json(os.path.join(target_base, 'dataset.json'), target_imagesTr, target_imagesTs, ('SSOCTA',),
                        labels={0: 'background', 1: 'abnormality'}, dataset_name=task_name, license='hands off!')

In [None]:
!nnUNet_plan_and_preprocess -t {task_number}

# Train nnUNet

In [None]:
"""
List of models:
1. 
2. nnUNetTrainerV2_20epochs
3. nnUNetTrainerV2_3ConvPerStage_20epochs
"""
MODEL_TO_TRAIN = "nnUNetTrainerV2_3ConvPerStage_20epochs"

In [None]:
!nnUNet_train 2d {MODEL_TO_TRAIN} {task_name} 0 

# Infer model for submission

In [None]:
test_output_folder = here.joinpath("test_output")
test_output_folder = test_output_folder.joinpath(task_name)
test_output_folder.mkdir(exist_ok=True, parents=True)

In [None]:
!nnUNet_predict -i {target_imagesTs} -o {test_output_folder} -t {task_number} -m 2d --save_npz

# Postprocess and export (for only single sub-task)

In [None]:
import os
import cv2
import SimpleITK
import numpy as np

def read_nii(nii_path, data_type=np.uint16):
    img = SimpleITK.ReadImage(nii_path)
    data = SimpleITK.GetArrayFromImage(img)
    return np.array(data, dtype=data_type)

def arr2nii(data, filename, reference_name=None):
    img = SimpleITK.GetImageFromArray(data)
    if (reference_name is not None):
        img_ref = SimpleITK.ReadImage(reference_name)
        img.CopyInformation(img_ref)
    SimpleITK.WriteImage(img, filename)

In [None]:
submission_folder = here.joinpath("submission")
submission_folder = submission_folder.joinpath(task_name)
submission_folder.mkdir(exist_ok=True, parents=True)

In [None]:
test_root = test_output_folder
out_root = submission_folder
task_name = task_names[0]
task_number = task_name[6]
mask = []
for file_name in file_names:
    _mask = np.load(os.path.join(test_root, task_name, file_name))['softmax']
    _mask = np.transpose(_mask, (3, 2, 1, 0))
    _mask = np.argmax(_mask, -1)
    _mask = np.squeeze(_mask, -1)
    _mask = cv2.resize(_mask.astype(np.uint8), (1024, 1024))
    mask.append(_mask)
mask = np.array(mask)
arr2nii(mask, os.path.join(out_root, task_number+".nii.gz"))

# Visualize

In [None]:
img_root = f"/content/{task_name}/imagesTs"
data = read_nii(os.path.join(out_root, task_number+".nii.gz"))
data.shape
for i in range(data.shape[0]):
    img_path = png_paths[i]
    img = cv2.imread(img_path, -1)
    # img = cv2.resize(img, (512, 512))
    
    fig, ax = plt.subplots(1,2,figsize=(10,5))
    ax[0].imshow(img)
    ax[1].imshow(img)
    ax[1].imshow(data[i,...], cmap='jet',alpha=0.4)
    plt.show()

# References
1. [nnUNet with mini-lung.ipynb](https://colab.research.google.com/github/pymedphys/pymedphys/blob/d23b8adda0e4a7/protyping/auto-segmentation/sb/04-mini-data/053-nnUNet-with-mini-lung.ipynb)
2. [nnUNet medium post: nnU-Net : The no-new-UNet for automatic segmentation
](https://medium.com/miccai-educational-initiative/nnu-net-the-no-new-unet-for-automatic-segmentation-8d655f3f6d2a)
3. [nnUNet workshp repository](https://github.com/IML-DKFZ/nnunet-workshop)
4. [nnUNet official repository](https://github.com/MIC-DKFZ/nnUNet)
5. [DRAC2022 submission example](https://github.com/zhuanjiao2222/DRAC2022)
6. [DRAC2022 official web site](https://drac22.grand-challenge.org/)