In [1]:
# !pip install SimpleITK
# !pip install glob2

Collecting SimpleITK
  Downloading SimpleITK-2.2.1-cp38-cp38-win_amd64.whl (27.0 MB)
Installing collected packages: SimpleITK
Successfully installed SimpleITK-2.2.1


In [1]:
import SimpleITK as sitk
import os
from tqdm import tqdm
import numpy as np
import glob2

In [2]:
WRITER = sitk.ImageFileWriter()
WRITER.KeepOriginalImageUIDOn()
def write_dicom(slide, slides_name, output_path, i):
    global WRITER
    file_name = f"{slides_name.split('.')[0]}_{i}.dcm"
    if os.path.isfile(os.path.join(output_path, file_name)):
        print('[INFO] Alert Filename Duplicated!____________________________________________')
    else:
        WRITER.SetFileName(os.path.join(output_path, file_name))
        WRITER.Execute(slide)

In [3]:
def load_nii_format_volume (input_path):
    file_name = os.path.basename(input_path)
    print("[INFO] Loading ", file_name)

    reader = sitk.ImageFileReader()
    reader.SetImageIO("NiftiImageIO")
    reader.SetFileName(input_path)
    slides = reader.Execute()

    print("[INFO] Volume Size: ", slides.GetSize())
    return slides, file_name

In [4]:
# StatisticsImageFilter computes the minimum, maximum, sum, sum of squares, mean, variance sigma of an image
STATIC_IMAGE_FILTER = sitk.StatisticsImageFilter()
def is_black_mask(slide_mask):
    STATIC_IMAGE_FILTER.Execute(slide_mask) # Execute the filter on the input image
    max_value = STATIC_IMAGE_FILTER.GetMaximum() # Return the computed Maximum.
    if max_value > 0.0:
        return False
    else:
        return True

In [5]:
THRESHOLD_FILTER = sitk.ThresholdImageFilter()
THRESHOLD_FILTER.SetLower(0.0)
THRESHOLD_FILTER.SetLower(0.0)
THRESHOLD_FILTER.SetOutsideValue(1.0)
def normalize_mask(slide_mask):
    global THRESHOLD_FILTER
    slide_mask = THRESHOLD_FILTER.Execute(slide_mask)
    return slide_mask

In [7]:
def covid19_ct_lung_and_infection_segmentation(
    input_ct_dir=r'D:\YSC2023\Dataset\COVID-19 CT Lung and Infection Segmentation Dataset\COVID-19-CT-Seg_20cases',
    input_lungmask_dir=r'D:\YSC2023\Dataset\COVID-19 CT Lung and Infection Segmentation Dataset\Lung_Mask',
    input_lesionmask_dir=r'D:\YSC2023\Dataset\COVID-19 CT Lung and Infection Segmentation Dataset\Infection_Mask',
    output_covid_ct_dir=r'D:\YSC2023\Implementation\Dataset\COVID\Covid',
    output_covid_lungmask_dir=r'D:\YSC2023\Implementation\Dataset\COVID\Covid_LungMask',
    output_covid_lesionmask_dir=r'D:\YSC2023\Implementation\Dataset\COVID\Covid_LesionMask',
    output_normal_ct_dir=r'D:\YSC2023\Implementation\Dataset\NONCOVID\Normal',
    output_normal_lungmask_dir=r'D:\YSC2023\Implementation\Dataset\NONCOVID\Normal_LungMask'
):
    for filename in os.listdir(input_ct_dir):
        slides_ct, slides_ct_name = load_nii_format_volume(os.path.join(input_ct_dir, filename))
        slides_lungmask, slides_lungmask_name = load_nii_format_volume(os.path.join(input_lungmask_dir, filename))
        slides_lesionmask, slides_lesionmask_name = load_nii_format_volume(os.path.join(input_lesionmask_dir, filename))
        if (slides_ct.GetDepth() == slides_lungmask.GetDepth() and slides_ct.GetDepth() == slides_lesionmask.GetDepth()):
            print('[INFO] Valid Data:', slides_ct_name)
            print('[INFO] Normalizing Mask...')
            slides_lungmask = normalize_mask(slides_lungmask)
            slides_lesionmask = normalize_mask(slides_lesionmask)
            print('[INFO] Saving...')
            for i in tqdm(range(slides_ct.GetDepth())):
                if not is_black_mask(slides_lungmask[:,:,i]):
                    if not is_black_mask(slides_lesionmask[:,:,i]):
                        write_dicom(slides_ct[:,:,i], slides_ct_name, output_covid_ct_dir, i)
                        write_dicom(slides_lungmask[:,:,i], slides_ct_name, output_covid_lungmask_dir, i)
                        write_dicom(slides_lesionmask[:,:,i], slides_ct_name, output_covid_lesionmask_dir, i)
                    else:
                        write_dicom(slides_ct[:,:,i], slides_ct_name, output_normal_ct_dir, i)
                        write_dicom(slides_lungmask[:,:,i], slides_ct_name, output_normal_lungmask_dir, i)
        else:
            print('[INFO] Invalid Data:', slides_ct_name)
            break

In [8]:
CAST_FILTER = sitk.CastImageFilter()
CAST_FILTER.SetOutputPixelType(sitk.sitkInt16)
def covid19_ct_segmentation_dataset(
    input_ct_file=r'D:\YSC2023\Dataset\COVID-19 CT segmentation dataset\tr_im.nii.gz',
    input_lungmask_file=r'D:\YSC2023\Dataset\COVID-19 CT segmentation dataset\tr_lungmasks_updated.nii.gz',
    input_lesionmask_file=r'D:\YSC2023\Dataset\COVID-19 CT segmentation dataset\tr_mask.nii.gz',
    output_covid_ct_dir=r'D:\YSC2023\Implementation\Dataset\COVID\Covid',
    output_covid_lungmask_dir=r'D:\YSC2023\Implementation\Dataset\COVID\Covid_LungMask',
    output_covid_lesionmask_dir=r'D:\YSC2023\Implementation\Dataset\COVID\Covid_LesionMask',
    output_normal_ct_dir=r'D:\YSC2023\Implementation\Dataset\NONCOVID\Normal',
    output_normal_lungmask_dir=r'D:\YSC2023\Implementation\Dataset\NONCOVID\Normal_LungMask'
):
    global CAST_FILTER
    slides_ct, slides_ct_name = load_nii_format_volume(input_ct_file)
    slides_lungmask, slides_lungmask_name = load_nii_format_volume(input_lungmask_file)
    slides_lesionmask, slides_lesionmask_name = load_nii_format_volume(input_lesionmask_file)
    if (slides_ct.GetDepth() == slides_lungmask.GetDepth() and slides_ct.GetDepth() == slides_lesionmask.GetDepth()):
        print('[INFO] Valid Data:', slides_ct_name)
        print('[INFO] Normalizing Mask...')
        slides_lungmask = normalize_mask(slides_lungmask)
        slides_lesionmask = normalize_mask(slides_lesionmask)
        print('[INFO] Casting to int16 ...')
        slides_ct = CAST_FILTER.Execute(slides_ct)
        slides_lungmask = CAST_FILTER.Execute(slides_lungmask)
        slides_lesionmask = CAST_FILTER.Execute(slides_lesionmask)
        print('[INFO] Saving...')
        for i in tqdm(range(slides_ct.GetDepth())):
            if not is_black_mask(slides_lungmask[:,:,i]):
                if not is_black_mask(slides_lesionmask[:,:,i]):
                    write_dicom(slides_ct[:,:,i], slides_ct_name, output_covid_ct_dir, i)
                    write_dicom(slides_lungmask[:,:,i], slides_ct_name, output_covid_lungmask_dir, i)
                    write_dicom(slides_lesionmask[:,:,i], slides_ct_name, output_covid_lesionmask_dir, i)
                else:
                    write_dicom(slides_ct[:,:,i], slides_ct_name, output_normal_ct_dir, i)
                    write_dicom(slides_lungmask[:,:,i], slides_ct_name, output_normal_lungmask_dir, i)
    else:
        print('[INFO] Invalid Data:', slides_ct_name)

In [10]:
def radiopaedia(
    input_ct_dir=r'D:\YSC2023\Dataset\Radiopaedia\rp_im',
    input_lungmask_dir=r'D:\YSC2023\Dataset\Radiopaedia\rp_lung_msk',
    input_lesionmask_dir=r'D:\YSC2023\Dataset\Radiopaedia\rp_msk',
    output_covid_ct_dir=r'D:\YSC2023\Implementation\Dataset\COVID\Covid',
    output_covid_lungmask_dir=r'D:\YSC2023\Implementation\Dataset\COVID\Covid_LungMask',
    output_covid_lesionmask_dir=r'D:\YSC2023\Implementation\Dataset\COVID\Covid_LesionMask',
    output_normal_ct_dir=r'D:\YSC2023\Implementation\Dataset\NONCOVID\Normal',
    output_normal_lungmask_dir=r'D:\YSC2023\Implementation\Dataset\NONCOVID\Normal_LungMask'
):
    for filename in os.listdir(input_ct_dir):
        slides_ct, slides_ct_name = load_nii_format_volume(os.path.join(input_ct_dir, filename))
        slides_lungmask, slides_lungmask_name = load_nii_format_volume(os.path.join(input_lungmask_dir, filename))
        slides_lesionmask, slides_lesionmask_name = load_nii_format_volume(os.path.join(input_lesionmask_dir, filename))
        if (slides_ct.GetDepth() == slides_lungmask.GetDepth() and slides_ct.GetDepth() == slides_lesionmask.GetDepth()):
            print('[INFO] Valid Data:', slides_ct_name)
            print('[INFO] Normalizing Mask...')
            slides_lungmask = normalize_mask(slides_lungmask)
            slides_lesionmask = normalize_mask(slides_lesionmask)
            print('[INFO] Casting to int16 ...')
            slides_ct = CAST_FILTER.Execute(slides_ct)
            slides_lungmask = CAST_FILTER.Execute(slides_lungmask)
            slides_lesionmask = CAST_FILTER.Execute(slides_lesionmask)
            print('[INFO] Saving...')
            for i in tqdm(range(slides_ct.GetDepth())):
                if not is_black_mask(slides_lungmask[:,:,i]):
                    if not is_black_mask(slides_lesionmask[:,:,i]):
                        write_dicom(slides_ct[:,:,i], slides_ct_name, output_covid_ct_dir, i)
                        write_dicom(slides_lungmask[:,:,i], slides_ct_name, output_covid_lungmask_dir, i)
                        write_dicom(slides_lesionmask[:,:,i], slides_ct_name, output_covid_lesionmask_dir, i)
                    else:
                        write_dicom(slides_ct[:,:,i], slides_ct_name, output_normal_ct_dir, i)
                        write_dicom(slides_lungmask[:,:,i], slides_ct_name, output_normal_lungmask_dir, i)
        else:
            print('[INFO] Invalid Data:', slides_ct_name)
            break

In [11]:
covid19_ct_lung_and_infection_segmentation()
covid19_ct_segmentation_dataset()
radiopaedia()

[INFO] Loading  coronacases_001.nii.gz
[INFO] Volume Size:  (512, 512, 301)
[INFO] Loading  coronacases_001.nii.gz
[INFO] Volume Size:  (512, 512, 301)
[INFO] Loading  coronacases_001.nii.gz
[INFO] Volume Size:  (512, 512, 301)


  0%|                                                                                          | 0/301 [00:00<?, ?it/s]

[INFO] Valid Data: coronacases_001.nii.gz
[INFO] Normalizing Mask...
[INFO] Saving...


100%|████████████████████████████████████████████████████████████████████████████████| 301/301 [00:06<00:00, 43.85it/s]


[INFO] Loading  coronacases_002.nii.gz
[INFO] Volume Size:  (512, 512, 200)
[INFO] Loading  coronacases_002.nii.gz
[INFO] Volume Size:  (512, 512, 200)
[INFO] Loading  coronacases_002.nii.gz
[INFO] Volume Size:  (512, 512, 200)
[INFO] Valid Data: coronacases_002.nii.gz
[INFO] Normalizing Mask...


  5%|████                                                                             | 10/200 [00:00<00:02, 88.87it/s]

[INFO] Saving...


100%|████████████████████████████████████████████████████████████████████████████████| 200/200 [00:05<00:00, 39.12it/s]


[INFO] Loading  coronacases_003.nii.gz
[INFO] Volume Size:  (512, 512, 200)
[INFO] Loading  coronacases_003.nii.gz
[INFO] Volume Size:  (512, 512, 200)
[INFO] Loading  coronacases_003.nii.gz
[INFO] Volume Size:  (512, 512, 200)
[INFO] Valid Data: coronacases_003.nii.gz
[INFO] Normalizing Mask...
[INFO] Saving...


100%|████████████████████████████████████████████████████████████████████████████████| 200/200 [00:04<00:00, 42.08it/s]


[INFO] Loading  coronacases_004.nii.gz
[INFO] Volume Size:  (512, 512, 270)
[INFO] Loading  coronacases_004.nii.gz
[INFO] Volume Size:  (512, 512, 270)
[INFO] Loading  coronacases_004.nii.gz
[INFO] Volume Size:  (512, 512, 270)
[INFO] Valid Data: coronacases_004.nii.gz
[INFO] Normalizing Mask...


  2%|█▌                                                                                | 5/270 [00:00<00:06, 41.11it/s]

[INFO] Saving...


100%|████████████████████████████████████████████████████████████████████████████████| 270/270 [00:06<00:00, 39.95it/s]


[INFO] Loading  coronacases_005.nii.gz
[INFO] Volume Size:  (512, 512, 290)
[INFO] Loading  coronacases_005.nii.gz
[INFO] Volume Size:  (512, 512, 290)
[INFO] Loading  coronacases_005.nii.gz
[INFO] Volume Size:  (512, 512, 290)
[INFO] Valid Data: coronacases_005.nii.gz
[INFO] Normalizing Mask...


  4%|███▌                                                                            | 13/290 [00:00<00:02, 106.00it/s]

[INFO] Saving...


100%|████████████████████████████████████████████████████████████████████████████████| 290/290 [00:07<00:00, 40.27it/s]


[INFO] Loading  coronacases_006.nii.gz
[INFO] Volume Size:  (512, 512, 213)
[INFO] Loading  coronacases_006.nii.gz
[INFO] Volume Size:  (512, 512, 213)
[INFO] Loading  coronacases_006.nii.gz
[INFO] Volume Size:  (512, 512, 213)
[INFO] Valid Data: coronacases_006.nii.gz
[INFO] Normalizing Mask...


  8%|██████▍                                                                         | 17/213 [00:00<00:01, 147.64it/s]

[INFO] Saving...


100%|████████████████████████████████████████████████████████████████████████████████| 213/213 [00:04<00:00, 42.85it/s]


[INFO] Loading  coronacases_007.nii.gz
[INFO] Volume Size:  (512, 512, 249)
[INFO] Loading  coronacases_007.nii.gz
[INFO] Volume Size:  (512, 512, 249)
[INFO] Loading  coronacases_007.nii.gz
[INFO] Volume Size:  (512, 512, 249)
[INFO] Valid Data: coronacases_007.nii.gz
[INFO] Normalizing Mask...


  5%|███▉                                                                             | 12/249 [00:00<00:02, 98.98it/s]

[INFO] Saving...


100%|████████████████████████████████████████████████████████████████████████████████| 249/249 [00:06<00:00, 41.14it/s]


[INFO] Loading  coronacases_008.nii.gz
[INFO] Volume Size:  (512, 512, 301)
[INFO] Loading  coronacases_008.nii.gz
[INFO] Volume Size:  (512, 512, 301)
[INFO] Loading  coronacases_008.nii.gz
[INFO] Volume Size:  (512, 512, 301)


  6%|█████                                                                           | 19/301 [00:00<00:01, 182.46it/s]

[INFO] Valid Data: coronacases_008.nii.gz
[INFO] Normalizing Mask...
[INFO] Saving...


100%|████████████████████████████████████████████████████████████████████████████████| 301/301 [00:07<00:00, 39.83it/s]


[INFO] Loading  coronacases_009.nii.gz
[INFO] Volume Size:  (512, 512, 256)
[INFO] Loading  coronacases_009.nii.gz
[INFO] Volume Size:  (512, 512, 256)
[INFO] Loading  coronacases_009.nii.gz
[INFO] Volume Size:  (512, 512, 256)
[INFO] Valid Data: coronacases_009.nii.gz
[INFO] Normalizing Mask...


  4%|██▉                                                                               | 9/256 [00:00<00:03, 75.63it/s]

[INFO] Saving...


100%|████████████████████████████████████████████████████████████████████████████████| 256/256 [00:06<00:00, 41.82it/s]


[INFO] Loading  coronacases_010.nii.gz
[INFO] Volume Size:  (512, 512, 301)
[INFO] Loading  coronacases_010.nii.gz
[INFO] Volume Size:  (512, 512, 301)
[INFO] Loading  coronacases_010.nii.gz
[INFO] Volume Size:  (512, 512, 301)


  6%|█████                                                                           | 19/301 [00:00<00:01, 181.66it/s]

[INFO] Valid Data: coronacases_010.nii.gz
[INFO] Normalizing Mask...
[INFO] Saving...


100%|████████████████████████████████████████████████████████████████████████████████| 301/301 [00:06<00:00, 43.93it/s]


[INFO] Loading  radiopaedia_10_85902_1.nii.gz
[INFO] Volume Size:  (630, 630, 39)
[INFO] Loading  radiopaedia_10_85902_1.nii.gz
[INFO] Volume Size:  (630, 630, 39)
[INFO] Loading  radiopaedia_10_85902_1.nii.gz
[INFO] Volume Size:  (630, 630, 39)
[INFO] Valid Data: radiopaedia_10_85902_1.nii.gz
[INFO] Normalizing Mask...
[INFO] Saving...


100%|██████████████████████████████████████████████████████████████████████████████████| 39/39 [00:00<00:00, 41.23it/s]


[INFO] Loading  radiopaedia_10_85902_3.nii.gz
[INFO] Volume Size:  (630, 630, 418)
[INFO] Loading  radiopaedia_10_85902_3.nii.gz
[INFO] Volume Size:  (630, 630, 418)
[INFO] Loading  radiopaedia_10_85902_3.nii.gz
[INFO] Volume Size:  (630, 630, 418)
[INFO] Valid Data: radiopaedia_10_85902_3.nii.gz
[INFO] Normalizing Mask...


  8%|██████▌                                                                         | 34/418 [00:00<00:02, 167.57it/s]

[INFO] Saving...


100%|████████████████████████████████████████████████████████████████████████████████| 418/418 [00:10<00:00, 39.80it/s]


[INFO] Loading  radiopaedia_14_85914_0.nii.gz
[INFO] Volume Size:  (630, 401, 110)
[INFO] Loading  radiopaedia_14_85914_0.nii.gz
[INFO] Volume Size:  (630, 401, 110)


  0%|                                                                                          | 0/110 [00:00<?, ?it/s]

[INFO] Loading  radiopaedia_14_85914_0.nii.gz
[INFO] Volume Size:  (630, 401, 110)
[INFO] Valid Data: radiopaedia_14_85914_0.nii.gz
[INFO] Normalizing Mask...
[INFO] Saving...


100%|████████████████████████████████████████████████████████████████████████████████| 110/110 [00:03<00:00, 35.92it/s]


[INFO] Loading  radiopaedia_27_86410_0.nii.gz
[INFO] Volume Size:  (630, 630, 66)
[INFO] Loading  radiopaedia_27_86410_0.nii.gz
[INFO] Volume Size:  (630, 630, 66)
[INFO] Loading  radiopaedia_27_86410_0.nii.gz
[INFO] Volume Size:  (630, 630, 66)


 14%|███████████▎                                                                       | 9/66 [00:00<00:00, 69.87it/s]

[INFO] Valid Data: radiopaedia_27_86410_0.nii.gz
[INFO] Normalizing Mask...
[INFO] Saving...


100%|██████████████████████████████████████████████████████████████████████████████████| 66/66 [00:01<00:00, 39.11it/s]
  0%|                                                                                           | 0/42 [00:00<?, ?it/s]

[INFO] Loading  radiopaedia_29_86490_1.nii.gz
[INFO] Volume Size:  (630, 630, 42)
[INFO] Loading  radiopaedia_29_86490_1.nii.gz
[INFO] Volume Size:  (630, 630, 42)
[INFO] Loading  radiopaedia_29_86490_1.nii.gz
[INFO] Volume Size:  (630, 630, 42)
[INFO] Valid Data: radiopaedia_29_86490_1.nii.gz
[INFO] Normalizing Mask...
[INFO] Saving...


100%|██████████████████████████████████████████████████████████████████████████████████| 42/42 [00:00<00:00, 53.43it/s]
  0%|                                                                                           | 0/42 [00:00<?, ?it/s]

[INFO] Loading  radiopaedia_29_86491_1.nii.gz
[INFO] Volume Size:  (630, 630, 42)
[INFO] Loading  radiopaedia_29_86491_1.nii.gz
[INFO] Volume Size:  (630, 630, 42)
[INFO] Loading  radiopaedia_29_86491_1.nii.gz
[INFO] Volume Size:  (630, 630, 42)
[INFO] Valid Data: radiopaedia_29_86491_1.nii.gz
[INFO] Normalizing Mask...
[INFO] Saving...


100%|██████████████████████████████████████████████████████████████████████████████████| 42/42 [00:00<00:00, 42.14it/s]


[INFO] Loading  radiopaedia_36_86526_0.nii.gz
[INFO] Volume Size:  (630, 630, 45)
[INFO] Loading  radiopaedia_36_86526_0.nii.gz
[INFO] Volume Size:  (630, 630, 45)
[INFO] Loading  radiopaedia_36_86526_0.nii.gz
[INFO] Volume Size:  (630, 630, 45)
[INFO] Valid Data: radiopaedia_36_86526_0.nii.gz
[INFO] Normalizing Mask...
[INFO] Saving...


100%|██████████████████████████████████████████████████████████████████████████████████| 45/45 [00:01<00:00, 32.36it/s]


[INFO] Loading  radiopaedia_40_86625_0.nii.gz
[INFO] Volume Size:  (630, 630, 93)
[INFO] Loading  radiopaedia_40_86625_0.nii.gz


 12%|█████████▋                                                                        | 11/93 [00:00<00:00, 90.81it/s]

[INFO] Volume Size:  (630, 630, 93)
[INFO] Loading  radiopaedia_40_86625_0.nii.gz
[INFO] Volume Size:  (630, 630, 93)
[INFO] Valid Data: radiopaedia_40_86625_0.nii.gz
[INFO] Normalizing Mask...
[INFO] Saving...


100%|██████████████████████████████████████████████████████████████████████████████████| 93/93 [00:02<00:00, 36.97it/s]
  0%|                                                                                           | 0/39 [00:00<?, ?it/s]

[INFO] Loading  radiopaedia_4_85506_1.nii.gz
[INFO] Volume Size:  (630, 630, 39)
[INFO] Loading  radiopaedia_4_85506_1.nii.gz
[INFO] Volume Size:  (630, 630, 39)
[INFO] Loading  radiopaedia_4_85506_1.nii.gz
[INFO] Volume Size:  (630, 630, 39)
[INFO] Valid Data: radiopaedia_4_85506_1.nii.gz
[INFO] Normalizing Mask...
[INFO] Saving...


100%|██████████████████████████████████████████████████████████████████████████████████| 39/39 [00:00<00:00, 39.61it/s]
  0%|                                                                                           | 0/45 [00:00<?, ?it/s]

[INFO] Loading  radiopaedia_7_85703_0.nii.gz
[INFO] Volume Size:  (630, 630, 45)
[INFO] Loading  radiopaedia_7_85703_0.nii.gz
[INFO] Volume Size:  (630, 630, 45)
[INFO] Loading  radiopaedia_7_85703_0.nii.gz
[INFO] Volume Size:  (630, 630, 45)
[INFO] Valid Data: radiopaedia_7_85703_0.nii.gz
[INFO] Normalizing Mask...
[INFO] Saving...


100%|██████████████████████████████████████████████████████████████████████████████████| 45/45 [00:01<00:00, 32.70it/s]


[INFO] Loading  tr_im.nii.gz
[INFO] Volume Size:  (512, 512, 100)
[INFO] Loading  tr_lungmasks_updated.nii.gz
[INFO] Volume Size:  (512, 512, 100)
[INFO] Loading  tr_mask.nii.gz


  0%|                                                                                          | 0/100 [00:00<?, ?it/s]

[INFO] Volume Size:  (512, 512, 100)
[INFO] Valid Data: tr_im.nii.gz
[INFO] Normalizing Mask...
[INFO] Casting to int16 ...
[INFO] Saving...


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 33.66it/s]


[INFO] Loading  1.nii.gz
[INFO] Volume Size:  (630, 630, 45)
[INFO] Loading  1.nii.gz
[INFO] Volume Size:  (630, 630, 45)
[INFO] Loading  1.nii.gz
[INFO] Volume Size:  (630, 630, 45)


 11%|█████████▏                                                                         | 5/45 [00:00<00:01, 38.89it/s]

[INFO] Valid Data: 1.nii.gz
[INFO] Normalizing Mask...
[INFO] Casting to int16 ...
[INFO] Saving...


100%|██████████████████████████████████████████████████████████████████████████████████| 45/45 [00:01<00:00, 31.41it/s]


[INFO] Loading  2.nii.gz
[INFO] Volume Size:  (630, 630, 39)
[INFO] Loading  2.nii.gz
[INFO] Volume Size:  (630, 630, 39)
[INFO] Loading  2.nii.gz
[INFO] Volume Size:  (630, 630, 39)
[INFO] Valid Data: 2.nii.gz
[INFO] Normalizing Mask...


 18%|██████████████▉                                                                    | 7/39 [00:00<00:00, 64.99it/s]

[INFO] Casting to int16 ...
[INFO] Saving...


100%|██████████████████████████████████████████████████████████████████████████████████| 39/39 [00:00<00:00, 39.89it/s]


[INFO] Loading  3.nii.gz
[INFO] Volume Size:  (630, 630, 418)
[INFO] Loading  3.nii.gz
[INFO] Volume Size:  (630, 630, 418)
[INFO] Loading  3.nii.gz
[INFO] Volume Size:  (630, 630, 418)
[INFO] Valid Data: 3.nii.gz
[INFO] Normalizing Mask...
[INFO] Casting to int16 ...


  4%|██▊                                                                             | 15/418 [00:00<00:02, 145.27it/s]

[INFO] Saving...


100%|████████████████████████████████████████████████████████████████████████████████| 418/418 [00:10<00:00, 39.02it/s]


[INFO] Loading  4.nii.gz
[INFO] Volume Size:  (630, 630, 39)
[INFO] Loading  4.nii.gz


  0%|                                                                                           | 0/39 [00:00<?, ?it/s]

[INFO] Volume Size:  (630, 630, 39)
[INFO] Loading  4.nii.gz
[INFO] Volume Size:  (630, 630, 39)
[INFO] Valid Data: 4.nii.gz
[INFO] Normalizing Mask...
[INFO] Casting to int16 ...
[INFO] Saving...


100%|██████████████████████████████████████████████████████████████████████████████████| 39/39 [00:01<00:00, 36.47it/s]


[INFO] Loading  5.nii.gz


  0%|                                                                                           | 0/66 [00:00<?, ?it/s]

[INFO] Volume Size:  (630, 630, 66)
[INFO] Loading  5.nii.gz
[INFO] Volume Size:  (630, 630, 66)
[INFO] Loading  5.nii.gz
[INFO] Volume Size:  (630, 630, 66)
[INFO] Valid Data: 5.nii.gz
[INFO] Normalizing Mask...
[INFO] Casting to int16 ...
[INFO] Saving...


100%|██████████████████████████████████████████████████████████████████████████████████| 66/66 [00:01<00:00, 34.06it/s]


[INFO] Loading  6.nii.gz
[INFO] Volume Size:  (630, 630, 42)
[INFO] Loading  6.nii.gz
[INFO] Volume Size:  (630, 630, 42)
[INFO] Loading  6.nii.gz
[INFO] Volume Size:  (630, 630, 42)


 29%|███████████████████████▏                                                         | 12/42 [00:00<00:00, 102.54it/s]

[INFO] Valid Data: 6.nii.gz
[INFO] Normalizing Mask...
[INFO] Casting to int16 ...
[INFO] Saving...


100%|██████████████████████████████████████████████████████████████████████████████████| 42/42 [00:00<00:00, 48.54it/s]


[INFO] Loading  7.nii.gz
[INFO] Volume Size:  (630, 630, 42)
[INFO] Loading  7.nii.gz
[INFO] Volume Size:  (630, 630, 42)


  0%|                                                                                           | 0/42 [00:00<?, ?it/s]

[INFO] Loading  7.nii.gz
[INFO] Volume Size:  (630, 630, 42)
[INFO] Valid Data: 7.nii.gz
[INFO] Normalizing Mask...
[INFO] Casting to int16 ...
[INFO] Saving...


100%|██████████████████████████████████████████████████████████████████████████████████| 42/42 [00:01<00:00, 37.76it/s]


[INFO] Loading  8.nii.gz
[INFO] Volume Size:  (630, 630, 45)
[INFO] Loading  8.nii.gz
[INFO] Volume Size:  (630, 630, 45)


  7%|█████▌                                                                             | 3/45 [00:00<00:01, 29.76it/s]

[INFO] Loading  8.nii.gz
[INFO] Volume Size:  (630, 630, 45)
[INFO] Valid Data: 8.nii.gz
[INFO] Normalizing Mask...
[INFO] Casting to int16 ...
[INFO] Saving...


100%|██████████████████████████████████████████████████████████████████████████████████| 45/45 [00:01<00:00, 32.54it/s]


[INFO] Loading  9.nii.gz
[INFO] Volume Size:  (630, 630, 93)
[INFO] Loading  9.nii.gz
[INFO] Volume Size:  (630, 630, 93)
[INFO] Loading  9.nii.gz
[INFO] Volume Size:  (630, 630, 93)
[INFO] Valid Data: 9.nii.gz
[INFO] Normalizing Mask...
[INFO] Casting to int16 ...
[INFO] Saving...


100%|██████████████████████████████████████████████████████████████████████████████████| 93/93 [00:02<00:00, 36.43it/s]


In [17]:
datset_path = r"D:\YSC2023\Implementation\Dataset"
covid_data_path = r"D:\YSC2023\Implementation\Dataset\COVID\Covid"
normal_data_path = r"D:\YSC2023\Implementation\Dataset\NONCOVID\Normal"

def generate_train_validation_test_set():
    train_ratio = 0.65
    validation_ratio = 0.20
    test_ratio = 0.15

    images_covid = glob2.glob(os.path.join(covid_data_path, "*.dcm"))
    images_normal = glob2.glob(os.path.join(normal_data_path, "*.dcm"))

    covid_length = len(images_covid)
    normal_length = len(images_normal)
    print(covid_length, normal_length)

    images_HU_Covid = []
    images_HU_Normal = []
    for image in images_covid:
        if (os.path.basename(image)).split('_')[0] != "radiopaedia":
            images_HU_Covid.append(image)
    for image in images_normal:
        if (os.path.basename(image)).split('_')[0] != "radiopaedia":
            images_HU_Normal.append(image)

    covid_length = len(images_HU_Covid)
    normal_length = len(images_HU_Normal)
    print(covid_length, normal_length)
    images_covid = images_HU_Covid
    images_normal = images_HU_Normal

    covid_train_end_idx = int(covid_length * train_ratio) - 1 
    covid_validation_end_idx = int(covid_length * (train_ratio + validation_ratio)) - 1
    normal_train_end_idx = int(normal_length * train_ratio) - 1
    normal_validation_end_idx = int(normal_length * (train_ratio + validation_ratio)) - 1
    
    # Xáo trộn dữ liệu
    np.random.shuffle(images_covid)
    np.random.shuffle(images_normal)
    
    # covid
    with open(os.path.join(datset_path, "covid_train.txt"), "w") as f:
        for line in images_covid[:covid_train_end_idx]:
            f.write(os.path.basename(line)+'\n')
    f.close() 

    with open(os.path.join(datset_path, "covid_validation.txt"), "w") as f:
        for line in images_covid[covid_train_end_idx:covid_validation_end_idx]:
            f.write(os.path.basename(line)+'\n')
    f.close() 

    with open(os.path.join(datset_path, "covid_test.txt"), "w") as f:
        for line in images_covid[covid_validation_end_idx:]:
            f.write(os.path.basename(line)+'\n')
    f.close() 

    # normal
    with open(os.path.join(datset_path, "normal_train.txt"), "w") as f:
        for line in images_normal[:normal_train_end_idx]:
            f.write(os.path.basename(line)+'\n')
    f.close() 

    with open(os.path.join(datset_path, "normal_validation.txt"), "w") as f:
        for line in images_normal[normal_train_end_idx:normal_validation_end_idx]:
            f.write(os.path.basename(line)+'\n')
    f.close() 

    with open(os.path.join(datset_path, "normal_test.txt"), "w") as f:
        for line in images_normal[normal_validation_end_idx:normal_length]:
            f.write(os.path.basename(line)+'\n')
    f.close() 

In [18]:
generate_train_validation_test_set()

2315 1471
1822 1147
