In [1]:
%pip install pyradiomics dicom_numpy pydicom plotly matplotlib scikit-image simpleITK pynrrd dicom2nifti NiBabel NiLearn openpyxl pydicom-seg tqdm

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import dicom_numpy
import pydicom as dicom

import dicom2nifti
import nibabel as nib
import nilearn as nil
import scipy.ndimage as ndi
import os

import glob
from tqdm import tqdm



In [3]:
img_directories = '../../Simple Path Dataset/manifest-1654812109500/'
transfer_directory = '../../New Dataset/'

In [4]:
os.makedirs(transfer_directory, mode = 777, exist_ok=True)

In [5]:
file_descriptions = pd.read_excel("../../Simple Path Dataset/Breast-Cancer-MRI-filepath_filename-mapping.xlsx")

In [6]:
file_descriptions = file_descriptions[['sop_instance_UID', 'original_path_and_filename', 'classic_path', 'descriptive_path']]
display(file_descriptions)

Unnamed: 0,sop_instance_UID,original_path_and_filename,classic_path,descriptive_path
0,1.3.6.1.4.1.14519.5.2.1.1805789812895034139917...,DICOM_Images/Breast_MRI_001/post_1/Breast_MRI_...,Duke-Breast-Cancer-MRI/Breast_MRI_001/1.3.6.1....,Duke-Breast-Cancer-MRI/BreastMRI001/01-01-1990...
1,1.3.6.1.4.1.14519.5.2.1.4903237729147735321973...,DICOM_Images/Breast_MRI_001/post_1/Breast_MRI_...,Duke-Breast-Cancer-MRI/Breast_MRI_001/1.3.6.1....,Duke-Breast-Cancer-MRI/BreastMRI001/01-01-1990...
2,1.3.6.1.4.1.14519.5.2.1.3061160038794820079325...,DICOM_Images/Breast_MRI_001/post_1/Breast_MRI_...,Duke-Breast-Cancer-MRI/Breast_MRI_001/1.3.6.1....,Duke-Breast-Cancer-MRI/BreastMRI001/01-01-1990...
3,1.3.6.1.4.1.14519.5.2.1.1574717199045785031549...,DICOM_Images/Breast_MRI_001/post_1/Breast_MRI_...,Duke-Breast-Cancer-MRI/Breast_MRI_001/1.3.6.1....,Duke-Breast-Cancer-MRI/BreastMRI001/01-01-1990...
4,1.3.6.1.4.1.14519.5.2.1.2594404476894572978078...,DICOM_Images/Breast_MRI_001/post_1/Breast_MRI_...,Duke-Breast-Cancer-MRI/Breast_MRI_001/1.3.6.1....,Duke-Breast-Cancer-MRI/BreastMRI001/01-01-1990...
...,...,...,...,...
773121,1.3.6.1.4.1.14519.5.2.1.2393425910452664915158...,DICOM_Images/Breast_MRI_922/T1/Breast_MRI_922_...,Duke-Breast-Cancer-MRI/Breast_MRI_922/1.3.6.1....,Duke-Breast-Cancer-MRI/BreastMRI922/01-01-1990...
773122,1.3.6.1.4.1.14519.5.2.1.1921416620775060655567...,DICOM_Images/Breast_MRI_922/T1/Breast_MRI_922_...,Duke-Breast-Cancer-MRI/Breast_MRI_922/1.3.6.1....,Duke-Breast-Cancer-MRI/BreastMRI922/01-01-1990...
773123,1.3.6.1.4.1.14519.5.2.1.1148085546624000943273...,DICOM_Images/Breast_MRI_922/T1/Breast_MRI_922_...,Duke-Breast-Cancer-MRI/Breast_MRI_922/1.3.6.1....,Duke-Breast-Cancer-MRI/BreastMRI922/01-01-1990...
773124,1.3.6.1.4.1.14519.5.2.1.1929308890572593698573...,DICOM_Images/Breast_MRI_922/T1/Breast_MRI_922_...,Duke-Breast-Cancer-MRI/Breast_MRI_922/1.3.6.1....,Duke-Breast-Cancer-MRI/BreastMRI922/01-01-1990...


In [7]:
# process strings to get required info: patient ID, sequence, description, and dicom paths

file_descriptions['patient'] = file_descriptions['classic_path'].apply(lambda x: x.split('/')[1])
file_descriptions['sequence'] = file_descriptions['classic_path'].apply(lambda x: x.split('/')[3])
file_descriptions['description'] = file_descriptions['original_path_and_filename'].apply(lambda x: x.split('/')[2])
file_descriptions['dicom_paths'] = file_descriptions['classic_path'].apply(lambda x: '/'.join(x.split('/')[:-1]))

In [9]:
#extract relevant file descriptions, pre and post_1 from simple file paths
relevant_file_descriptions = file_descriptions[file_descriptions['description'].isin(['post_1', 'pre'])]

In [17]:
relevant_data = relevant_file_descriptions[['patient', 'sequence', 'dicom_paths', 'description']].drop_duplicates().reset_index(drop = True)
relevant_data['dicom_paths'] = relevant_data['dicom_paths'].apply(lambda x: os.path.join(img_directories, x))
relevant_data['patient_dir'] = relevant_data['patient'].apply(lambda x: os.path.join(transfer_directory, x))
relevant_data['output_paths'] = relevant_data.apply(lambda x: os.path.join(x['patient_dir'], x['description']) + '.nii.gz', axis = 1)
relevant_data

Unnamed: 0,patient,sequence,dicom_paths,description,patient_dir,output_paths
0,Breast_MRI_001,1.3.6.1.4.1.14519.5.2.1.1514720349923003958578...,../../Simple Path Dataset/manifest-16548121095...,post_1,../../New Dataset/Breast_MRI_001,../../New Dataset/Breast_MRI_001\post_1.nii.gz
1,Breast_MRI_001,1.3.6.1.4.1.14519.5.2.1.1857778498036652445367...,../../Simple Path Dataset/manifest-16548121095...,pre,../../New Dataset/Breast_MRI_001,../../New Dataset/Breast_MRI_001\pre.nii.gz
2,Breast_MRI_002,1.3.6.1.4.1.14519.5.2.1.1702386334115195086234...,../../Simple Path Dataset/manifest-16548121095...,post_1,../../New Dataset/Breast_MRI_002,../../New Dataset/Breast_MRI_002\post_1.nii.gz
3,Breast_MRI_002,1.3.6.1.4.1.14519.5.2.1.2920879504444133146439...,../../Simple Path Dataset/manifest-16548121095...,pre,../../New Dataset/Breast_MRI_002,../../New Dataset/Breast_MRI_002\pre.nii.gz
4,Breast_MRI_003,1.3.6.1.4.1.14519.5.2.1.3339321204011089072051...,../../Simple Path Dataset/manifest-16548121095...,post_1,../../New Dataset/Breast_MRI_003,../../New Dataset/Breast_MRI_003\post_1.nii.gz
...,...,...,...,...,...,...
1839,Breast_MRI_920,1.3.6.1.4.1.14519.5.2.1.2243370307638913554367...,../../Simple Path Dataset/manifest-16548121095...,pre,../../New Dataset/Breast_MRI_920,../../New Dataset/Breast_MRI_920\pre.nii.gz
1840,Breast_MRI_921,1.3.6.1.4.1.14519.5.2.1.2153335720893817815538...,../../Simple Path Dataset/manifest-16548121095...,post_1,../../New Dataset/Breast_MRI_921,../../New Dataset/Breast_MRI_921\post_1.nii.gz
1841,Breast_MRI_921,1.3.6.1.4.1.14519.5.2.1.2995579324872770731987...,../../Simple Path Dataset/manifest-16548121095...,pre,../../New Dataset/Breast_MRI_921,../../New Dataset/Breast_MRI_921\pre.nii.gz
1842,Breast_MRI_922,1.3.6.1.4.1.14519.5.2.1.5039771361051598068760...,../../Simple Path Dataset/manifest-16548121095...,post_1,../../New Dataset/Breast_MRI_922,../../New Dataset/Breast_MRI_922\post_1.nii.gz


In [12]:
relevant_data['dicom_paths'][0]

'../../Simple Path Dataset/manifest-1654812109500/Duke-Breast-Cancer-MRI/Breast_MRI_001/1.3.6.1.4.1.14519.5.2.1.186051521067863971269584893740842397538/1.3.6.1.4.1.14519.5.2.1.15147203499230039585785258602759759529'

In [19]:
# conversion

for idx, row in tqdm(relevant_data.iterrows(), total = len(relevant_data)):
    os.makedirs(row['patient_dir'], 777, True)
    dicom2nifti.dicom_series_to_nifti(row['dicom_paths'], row['output_paths'])

100%|██████████| 1844/1844 [1:48:37<00:00,  3.53s/it]


In [6]:
# for dir in tqdm(os.listdir(img_directories)):
#     dir_path = os.path.join(img_directories, dir)
#     if(os.path.isdir(dir_path)):
#         #traverse the directory
#         for patient in os.listdir(dir_path):
#             #dig deeper
#             base_paths = os.path.join(dir_path, patient)
#             max_elements = 0
#             max_count_subdir = ''
#             for scan_dirs in os.listdir(base_paths):
#                 #determine the number of slices in each file
#                 patient_path = os.path.join(base_paths, scan_dirs)
#                 count = len(glob.glob(os.path.join(patient_path, '*.dcm')))
#                 #print(count)
#                 if(count > max_elements):
#                     max_elements = count
#                     max_count_subdir = scan_dirs
#             # convert max count subdir to nifti

#             #make subdirs
#             nifti_dir = os.path.join(transfer_directory, dir)
#             #print(nifti_dir)
#             os.makedirs(nifti_dir, 777, True)

#             #dump files to subdirs
#             dicom2nifti.convert_directory(os.path.join(base_paths, max_count_subdir), nifti_dir)




100%|██████████| 923/923 [52:05<00:00,  3.39s/it] 
