# Initial setup
## Load dependencies

In [30]:
import os
import dicom2nifti
from pathlib import Path
import multiprocessing
import time
from shutil import copyfile

## Define locations for data
We seperate output directories into raw and non-raw/final. This is because we need to do a set of preprocessing stages to the raw data before we make the final dataset.

In [38]:
# inputs
input_image_dir = '/media/jake/1tb_ssd/mctv_analysis/head_scans'
input_labels_dir = '/media/jake/1tb_ssd/mctv_analysis/mctv_resfiles'

# outputs
main_dir = '/media/jake/data/jake/mctnet_data'
raw_image_dir = '/media/jake/data/jake/mctnet_data/raw_images'
raw_labels_dir = '/media/jake/data/jake/mctnet_data/raw_labels'
image_dir = '/media/jake/data/jake/mctnet_data/images'
labels_dir = '/media/jake/data/jake/mctnet_data/labels'

if not os.path.isdir(image_dir):
    os.makedirs(image_dir)

if not os.path.isdir(labels_dir):
    os.makedirs(labels_dir)

# Collate images
Our original images are organised in a messy way. Let's collate them together and fix that up.

First, let's find all the images in the input image directory

In [39]:
nii_files = []
dicom_dirs = []
for root, dirs, files in os.walk(input_image_dir):
    for i, file in enumerate(files):
        if file.endswith('.nii'):
            nii_files.append(root + '/' + file)
        if file.endswith('.dcm'):
            dicom_dirs.append(root)

# remove duplicate dicom_dirs
dicom_dirs = list(set(dicom_dirs))

print('Found ' + str(len(nii_files)) + ' nifti files')
print('Found ' + str(len(dicom_dirs)) + ' dicom directories')

Found 26 nifti files
Found 70 dicom directories


Let's plot the first 10

In [40]:
nii_files[:10]

['/media/jake/1tb_ssd/mctv_analysis/head_scans/Brachyscelus.nii',
 '/media/jake/1tb_ssd/mctv_analysis/head_scans/Vibilia_01_FEG191211_087_filterted.nii',
 '/media/jake/1tb_ssd/mctv_analysis/head_scans/Paraphronima_FEG200130_103_head_04.nii',
 '/media/jake/1tb_ssd/mctv_analysis/head_scans/Scypholanceola_head_02_FEG191022_076.nii',
 '/media/jake/1tb_ssd/mctv_analysis/head_scans/Platyscelus_02_FEG191112_082.nii',
 '/media/jake/1tb_ssd/mctv_analysis/head_scans/Phronima_05_FEG200107_090.nii',
 '/media/jake/1tb_ssd/mctv_analysis/head_scans/Paraphronima_FEG200130_102_head_05.nii',
 '/media/jake/1tb_ssd/mctv_analysis/head_scans/Hyperia 01_segmented eyes.nii',
 '/media/jake/1tb_ssd/mctv_analysis/head_scans/Phronima_04_FEG200107_089.nii',
 '/media/jake/1tb_ssd/mctv_analysis/head_scans/psyllid_20190906_male_eye/psyllid_20190906_male_eye.nii']

In [41]:
dicom_dirs[:10]

['/media/jake/1tb_ssd/mctv_analysis/head_scans/Phronima_1450842_body_1_slices',
 '/media/jake/1tb_ssd/mctv_analysis/head_scans/Streetsia_head_sp_4_USNM1450785_2',
 '/media/jake/1tb_ssd/mctv_analysis/head_scans/Cystisoma_FEG190801_035_brain',
 '/media/jake/1tb_ssd/mctv_analysis/head_scans/P_crassipes_FEG200129_099_cropped',
 '/media/jake/1tb_ssd/mctv_analysis/head_scans/P_crassipes_FEG190213_002_head_eyesdamaged',
 '/media/jake/1tb_ssd/mctv_analysis/head_scans/Lanceola_FEG20190212_02b_head_slices',
 '/media/jake/1tb_ssd/mctv_analysis/head_scans/Paraphronima_head_04_FEG200130_103',
 '/media/jake/1tb_ssd/mctv_analysis/head_scans/Pronoe_sp6_1450786F',
 '/media/jake/1tb_ssd/mctv_analysis/head_scans/P_crassipes_FEG190213_003b_02_head',
 '/media/jake/1tb_ssd/mctv_analysis/head_scans/Vibilia_FEG191112_081_blurry']

# Move nifti to raw images folder
Let's move all the nifti files to the raw_images directory.


In [42]:
for d in nii_files:
    p = Path(d)
    filename = p.stem
    new_path = raw_image_dir + "/" + filename + '.nii'
    if not os.path.isfile(new_path):
        copyfile(d, new_path)
        print(f'Copied nifti {d} to {new_path}\n')
    else:
        print(f'File {new_path} already exists, so skipped this copy\n')


File /media/jake/data/jake/mctnet_data/raw_images/Brachyscelus.nii already exists, so skipped this copy

File /media/jake/data/jake/mctnet_data/raw_images/Vibilia_01_FEG191211_087_filterted.nii already exists, so skipped this copy

File /media/jake/data/jake/mctnet_data/raw_images/Paraphronima_FEG200130_103_head_04.nii already exists, so skipped this copy

File /media/jake/data/jake/mctnet_data/raw_images/Scypholanceola_head_02_FEG191022_076.nii already exists, so skipped this copy

File /media/jake/data/jake/mctnet_data/raw_images/Platyscelus_02_FEG191112_082.nii already exists, so skipped this copy

File /media/jake/data/jake/mctnet_data/raw_images/Phronima_05_FEG200107_090.nii already exists, so skipped this copy

File /media/jake/data/jake/mctnet_data/raw_images/Paraphronima_FEG200130_102_head_05.nii already exists, so skipped this copy

File /media/jake/data/jake/mctnet_data/raw_images/Hyperia 01_segmented eyes.nii already exists, so skipped this copy

File /media/jake/data/jake/m

# Convert dicom to nifti
A bunch of files are in the dicom format. Let's convert these to nifti, so that all our data are consistent and in single files.

In [None]:
print(f'Converting {len(dicom_dirs)} dicom scans to nifti')

def convert_nifti_to_dicom(d):
    print(f'Starting conversion of {d} \n...')
    p = Path(d)
    filename = p.stem
    new_path = raw_image_dir + "/" + filename + '.nii'
    if not os.path.isfile(new_path):
        dicom2nifti.dicom_series_to_nifti(d, new_path, reorient_nifti=True)
        print(f'Converted dicom {d} to {new_path}\n')
    else:
        print(f'File {new_path} already exists, so skipped this conversion\n')


starttime = time.time()
pool = multiprocessing.Pool(processes=3)
pool.map(convert_nifti_to_dicom, dicom_dirs)
pool.close()
# for d in dicom_dirs:
#     convert_nifti_to_dicom(d)
print('That took {} seconds'.format(time.time() - starttime))