# Conver DICOM series to NIFTI

Converts to .hdr / .img.gz files

In [1]:
%pip install pyradiomics dicom_numpy pydicom plotly matplotlib scikit-image simpleITK pynrrd dicom2nifti NiBabel NiLearn openpyxl pydicom-seg tqdm dcmstack pandarallel pynrrd

Note: you may need to restart the kernel to use updated packages.


In [19]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import dicom_numpy
import pydicom as dicom

import dicom2nifti
import nibabel as nib
import nilearn as nil
import scipy.ndimage as ndi
import os

import SimpleITK as sitk

import glob
from tqdm import tqdm
import dcmstack

import multiprocessing
from pandarallel import pandarallel

import re

tqdm.pandas()
pandarallel.initialize(progress_bar=True,nb_workers= 20)

INFO: Pandarallel will run on 20 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.

https://nalepae.github.io/pandarallel/troubleshooting/


In [3]:
img_directories = '../../Simple Path Dataset/manifest-1654812109500/'
annotation_boxes = pd.read_excel("../../Simple Path Dataset/Annotation_Boxes.xlsx").set_index("Patient ID")
transfer_directory = '../../Processed NIFTI Dataset/'

In [4]:
os.makedirs(transfer_directory, mode = 777, exist_ok=True)

In [5]:
file_descriptions = pd.read_excel("../../Simple Path Dataset/Breast-Cancer-MRI-filepath_filename-mapping.xlsx")

In [6]:
file_descriptions = file_descriptions[['sop_instance_UID', 'original_path_and_filename', 'classic_path', 'descriptive_path']]
display(file_descriptions)

Unnamed: 0,sop_instance_UID,original_path_and_filename,classic_path,descriptive_path
0,1.3.6.1.4.1.14519.5.2.1.1805789812895034139917...,DICOM_Images/Breast_MRI_001/post_1/Breast_MRI_...,Duke-Breast-Cancer-MRI/Breast_MRI_001/1.3.6.1....,Duke-Breast-Cancer-MRI/BreastMRI001/01-01-1990...
1,1.3.6.1.4.1.14519.5.2.1.4903237729147735321973...,DICOM_Images/Breast_MRI_001/post_1/Breast_MRI_...,Duke-Breast-Cancer-MRI/Breast_MRI_001/1.3.6.1....,Duke-Breast-Cancer-MRI/BreastMRI001/01-01-1990...
2,1.3.6.1.4.1.14519.5.2.1.3061160038794820079325...,DICOM_Images/Breast_MRI_001/post_1/Breast_MRI_...,Duke-Breast-Cancer-MRI/Breast_MRI_001/1.3.6.1....,Duke-Breast-Cancer-MRI/BreastMRI001/01-01-1990...
3,1.3.6.1.4.1.14519.5.2.1.1574717199045785031549...,DICOM_Images/Breast_MRI_001/post_1/Breast_MRI_...,Duke-Breast-Cancer-MRI/Breast_MRI_001/1.3.6.1....,Duke-Breast-Cancer-MRI/BreastMRI001/01-01-1990...
4,1.3.6.1.4.1.14519.5.2.1.2594404476894572978078...,DICOM_Images/Breast_MRI_001/post_1/Breast_MRI_...,Duke-Breast-Cancer-MRI/Breast_MRI_001/1.3.6.1....,Duke-Breast-Cancer-MRI/BreastMRI001/01-01-1990...
...,...,...,...,...
773121,1.3.6.1.4.1.14519.5.2.1.2393425910452664915158...,DICOM_Images/Breast_MRI_922/T1/Breast_MRI_922_...,Duke-Breast-Cancer-MRI/Breast_MRI_922/1.3.6.1....,Duke-Breast-Cancer-MRI/BreastMRI922/01-01-1990...
773122,1.3.6.1.4.1.14519.5.2.1.1921416620775060655567...,DICOM_Images/Breast_MRI_922/T1/Breast_MRI_922_...,Duke-Breast-Cancer-MRI/Breast_MRI_922/1.3.6.1....,Duke-Breast-Cancer-MRI/BreastMRI922/01-01-1990...
773123,1.3.6.1.4.1.14519.5.2.1.1148085546624000943273...,DICOM_Images/Breast_MRI_922/T1/Breast_MRI_922_...,Duke-Breast-Cancer-MRI/Breast_MRI_922/1.3.6.1....,Duke-Breast-Cancer-MRI/BreastMRI922/01-01-1990...
773124,1.3.6.1.4.1.14519.5.2.1.1929308890572593698573...,DICOM_Images/Breast_MRI_922/T1/Breast_MRI_922_...,Duke-Breast-Cancer-MRI/Breast_MRI_922/1.3.6.1....,Duke-Breast-Cancer-MRI/BreastMRI922/01-01-1990...


In [7]:
# process strings to get required info: patient ID, sequence, description, and dicom paths

file_descriptions['patient'] = file_descriptions['classic_path'].apply(lambda x: x.split('/')[1])
file_descriptions['sequence'] = file_descriptions['classic_path'].apply(lambda x: x.split('/')[3])
file_descriptions['description'] = file_descriptions['original_path_and_filename'].apply(lambda x: x.split('/')[2])
file_descriptions['dicom_paths'] = file_descriptions['classic_path'].apply(lambda x: '/'.join(x.split('/')[:-1]))

In [8]:
file_descriptions.description.unique()

array(['post_1', 'post_2', 'post_3', 'post_4', 'pre', 'T1'], dtype=object)

In [9]:
#extract relevant file descriptions, pre and post_1 from simple file paths
relevant_file_descriptions = file_descriptions[file_descriptions['description'].isin(['post_1', 'pre'])]

In [10]:
relevant_data = relevant_file_descriptions[['patient', 'sequence', 'dicom_paths', 'description']].drop_duplicates().reset_index(drop = True)
relevant_data['dicom_paths'] = relevant_data['dicom_paths'].apply(lambda x: os.path.join(img_directories, x))
relevant_data['patient_dir'] = relevant_data['patient'].apply(lambda x: os.path.join(transfer_directory, x))
relevant_data['output_paths'] = relevant_data.apply(lambda x: os.path.join(x['patient_dir'], x['description']) + '.img.gz', axis = 1)
relevant_data

Unnamed: 0,patient,sequence,dicom_paths,description,patient_dir,output_paths
0,Breast_MRI_001,1.3.6.1.4.1.14519.5.2.1.1514720349923003958578...,../../Simple Path Dataset/manifest-16548121095...,post_1,../../Processed NIFTI Dataset/Breast_MRI_001,../../Processed NIFTI Dataset/Breast_MRI_001\p...
1,Breast_MRI_001,1.3.6.1.4.1.14519.5.2.1.1857778498036652445367...,../../Simple Path Dataset/manifest-16548121095...,pre,../../Processed NIFTI Dataset/Breast_MRI_001,../../Processed NIFTI Dataset/Breast_MRI_001\p...
2,Breast_MRI_002,1.3.6.1.4.1.14519.5.2.1.1702386334115195086234...,../../Simple Path Dataset/manifest-16548121095...,post_1,../../Processed NIFTI Dataset/Breast_MRI_002,../../Processed NIFTI Dataset/Breast_MRI_002\p...
3,Breast_MRI_002,1.3.6.1.4.1.14519.5.2.1.2920879504444133146439...,../../Simple Path Dataset/manifest-16548121095...,pre,../../Processed NIFTI Dataset/Breast_MRI_002,../../Processed NIFTI Dataset/Breast_MRI_002\p...
4,Breast_MRI_003,1.3.6.1.4.1.14519.5.2.1.3339321204011089072051...,../../Simple Path Dataset/manifest-16548121095...,post_1,../../Processed NIFTI Dataset/Breast_MRI_003,../../Processed NIFTI Dataset/Breast_MRI_003\p...
...,...,...,...,...,...,...
1839,Breast_MRI_920,1.3.6.1.4.1.14519.5.2.1.2243370307638913554367...,../../Simple Path Dataset/manifest-16548121095...,pre,../../Processed NIFTI Dataset/Breast_MRI_920,../../Processed NIFTI Dataset/Breast_MRI_920\p...
1840,Breast_MRI_921,1.3.6.1.4.1.14519.5.2.1.2153335720893817815538...,../../Simple Path Dataset/manifest-16548121095...,post_1,../../Processed NIFTI Dataset/Breast_MRI_921,../../Processed NIFTI Dataset/Breast_MRI_921\p...
1841,Breast_MRI_921,1.3.6.1.4.1.14519.5.2.1.2995579324872770731987...,../../Simple Path Dataset/manifest-16548121095...,pre,../../Processed NIFTI Dataset/Breast_MRI_921,../../Processed NIFTI Dataset/Breast_MRI_921\p...
1842,Breast_MRI_922,1.3.6.1.4.1.14519.5.2.1.5039771361051598068760...,../../Simple Path Dataset/manifest-16548121095...,post_1,../../Processed NIFTI Dataset/Breast_MRI_922,../../Processed NIFTI Dataset/Breast_MRI_922\p...


In [29]:
def dcm2series(row):
    import os, re
    import SimpleITK as sitk

    os.makedirs(row['patient_dir'], 777, True)
    reader = sitk.ImageSeriesReader()
    dicom_names = reader.GetGDCMSeriesFileNames(row['dicom_paths'])

    dicom_names = sorted(dicom_names, key = lambda x: int(re.search(r'.*-(\d+).dcm', x).group(1)))
    reader.SetFileNames(dicom_names)
    image = reader.Execute()

    sitk.WriteImage(image, row['output_paths'], useCompression=True)


relevant_data.parallel_apply(dcm2series, axis = 1)

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=93), Label(value='0 / 93'))), HBox…

0       None
1       None
2       None
3       None
4       None
        ... 
1839    None
1840    None
1841    None
1842    None
1843    None
Length: 1844, dtype: object