In [1]:
# This file is modified version of original:
# https://github.com/dangnh0611/kaggle_rsna_breast_cancer/blob/reproduce/src/tools/prepair_classification_dataset.py

In [2]:
import argparse
import os
import shutil

from utils._prepair_classification_dataset_stage1 import *
from utils._prepair_classification_dataset_stage2 import *

from settings import SETTINGS
from misc import rm_and_mkdir

Using global configuration (SETTINGS.json):
--------------------------------------------------------------------------------
ASSETS_DIR: ./assets/
MODEL_CHECKPOINT_DIR: ./checkpoints/
MODEL_FINAL_SELECTION_DIR: ./assets/reproduce/
PROCESSED_DATA_DIR: ./datasets/processed/
RAW_DATA_DIR: 
SUBMISSION_DIR: ./submissions/
TEMP_DIR: ./tmp/
__JSON_PATH__: /media/na/e0adac50-20ce-4eb4-9c9d-98faf82ddd46/rsna_breast/SETTINGS.json
--------------------------------------------------------------------------------






In [3]:
STAGE1_PROCESS_FUNCS = {
    'rsna-breast-cancer-detection': stage1_process_rsna,
#     'vindr': stage1_process_vindr,
#     'miniddsm': stage1_process_miniddsm,
#     'cmmd': stage1_process_cmmd,
#     'cddcesm': stage1_process_cddcesm,
#     'bmcd': stage1_process_bmcd,
}

STAGE2_PROCESS_FUNCS = {
    'rsna-breast-cancer-detection': stage2_process_rsna,
#     'vindr': stage2_process_vindr,
#     'miniddsm': stage2_process_miniddsm,
#     'cmmd': stage2_process_cmmd,
#     'cddcesm': stage2_process_cddcesm,
#     'bmcd': stage2_process_bmcd,
}

In [4]:
def parse_args():
    parser = argparse.ArgumentParser('Prepair classification dataset.')
    parser.add_argument(
        '--num-workers',
        type=int,
        default=4,#4
        help='Number of workers for (dicomsdl + YOLOX) decoding.')
    parser.add_argument(
        '--roi-yolox-engine-path',
        type=str,
        default=None,
        help='Path to TensorRT engine of YOLOX ROI detection model.')
    #args = parser.parse_args()
    args, unknown = parser.parse_known_args()
    return args

In [183]:
def main(args):
    ROI_YOLOX_ENGINE_PATH = os.path.join(SETTINGS.MODEL_FINAL_SELECTION_DIR,
                                         'yolox_nano_416_roi_trt.pth')
    if args.roi_yolox_engine_path:
        ROI_YOLOX_ENGINE_PATH = args.roi_yolox_engine_path
    print('Using YOLOX engine path:', ROI_YOLOX_ENGINE_PATH)

    DATASETS = [
        'rsna-breast-cancer-detection',# 'vindr', 'miniddsm', 'cmmd', 'cddcesm','bmcd'
    ]
    STAGES = ['stage1', 'stage2']

    for dataset in DATASETS:
        print('Processing', dataset)
        raw_root_dir = os.path.join(SETTINGS.RAW_DATA_DIR, dataset)
        
        stage1_images_dir = os.path.join(raw_root_dir, 'stage1_images')
        cleaned_root_dir = os.path.join(SETTINGS.PROCESSED_DATA_DIR,
                                        'classification', dataset)
        cleaned_label_path = os.path.join(cleaned_root_dir,
                                          'cleaned_label.csv')
        cleaned_images_dir = os.path.join(cleaned_root_dir, 'cleaned_images')

        if 'stage1' in STAGES:
            # remove `stage1_images` directory
            if os.path.exists(stage1_images_dir):
                try:
                    shutil.rmtree(stage1_images_dir)
                except OSError:
                    # OSError: Cannot call rmtree on a symbolic link
                    os.remove(stage1_images_dir)
            rm_and_mkdir(cleaned_root_dir)

            stage1_process_func = STAGE1_PROCESS_FUNCS[dataset]
            # raw_root_dir => rsna-breast-cancer-detection
            # stage1_images_dir => rsna-breast-cancer-detection/stage1_images
            # cleaned_root_dir => ./datasets/processed/classification/rsna-breast-cancer-detection

            stage1_process_func(raw_root_dir,
                                stage1_images_dir,
                                cleaned_label_path,
                                force_copy=False)

        if 'stage2' in STAGES:
            rm_and_mkdir(cleaned_images_dir)
            assert os.path.exists(cleaned_label_path)

            stage2_process_func = STAGE2_PROCESS_FUNCS[dataset]
            print('Converting to 8-bits png images..')
            stage2_process_func(ROI_YOLOX_ENGINE_PATH,
                                stage1_images_dir,
                                cleaned_label_path,
                                cleaned_images_dir,
                                n_jobs=args.num_workers,
                                n_chunks=args.num_workers)
        print('Done!')
        print('-----------------\n\n')


In [186]:
if __name__ == '__main__':
    args = parse_args()
    main(args)

Using YOLOX engine path: ./assets/reproduce/yolox_nano_416_roi_trt.pth
Processing rsna-breast-cancer-detection


100%|████████████████████████████| 54706/54706 [00:53<00:00, 1026.34it/s]


Converting to 8-bits png images..
Starting 4 jobs with backend `loky`, 4 chunks...
Loading YOLOX from ./assets/reproduce/yolox_nano_416_roi_trt.pth
ROI extractor (YOLOX) loaded!
Loading YOLOX from ./assets/reproduce/yolox_nano_416_roi_trt.pth
ROI extractor (YOLOX) loaded!
Loading YOLOX from ./assets/reproduce/yolox_nano_416_roi_trt.pth
ROI extractor (YOLOX) loaded!
Loading YOLOX from ./assets/reproduce/yolox_nano_416_roi_trt.pth
ROI extractor (YOLOX) loaded!


100%|█████████▉| 13676/13677 [2:22:18<00:00,  1.85it/s]  

Convert done in 8541.229033470154 sec
Done!
-----------------




100%|██████████| 13677/13677 [2:22:19<00:00,  1.76it/s]100%|██████████| 13677/13677 [2:22:19<00:00,  1.60it/s]

ROI detection: using Otsu.
ROI detection: using Otsu.
ROI detection: using Otsu.
ROI detection: using Otsu.
ROI detection: using Otsu.




