In [20]:
#imports
import numpy as np
import pandas as pd 
import pydicom
import os
import scipy.ndimage
import matplotlib.pyplot as plt

from skimage import measure, morphology
from mpl_toolkits.mplot3d.art3d import Poly3DCollection

In [21]:
#path constants
ROOT_DIR = '../'
DATA_DIR = 'data/train_images/'
DEST_DIR = 'raw_data/'

In [22]:
def determine_orientation(orientation_arr: list):
    '''
    Conversion was found in a comment under 
    https://stackoverflow.com/questions/34782409/understanding-dicom-image-attributes-to-get-axial-coronal-sagittal-cuts
    '''
    coronal_plane  = [1, 0, 0, 0, 0, -1]
    sagittal_plane = [0, 1, 0, 0, 0, -1]
    axial_plane    = [1, 0, 0, 0, 1,  0]
    
    rounded_arr = [round(i) for i in orientation_arr]
    if rounded_arr == coronal_plane:
        return 'coronal'
    elif rounded_arr == sagittal_plane:
        return 'sagittal'
    elif rounded_arr == axial_plane:
        return 'axial'
    else:
        return 'indeterminate'

In [23]:
#patient directories
studies = os.listdir(os.path.join(ROOT_DIR, DATA_DIR))
studies.sort()

#metadata dataframe
metadata_df = pd.DataFrame(columns=['study_id',
                                    'series_id',
                                    'orientation',
                                    'dim_x',
                                    'dim_y',
                                    'pixel_dim_x',
                                    'pixel_dim_y',
                                    'rows',
                                    'columns',
                                    'num_slices',
                                    'slice_spacing',
                                    'slice_thickness',
                                    'series_description',
                                    'patient_position',
                                    'image_position',
                                    'image_orientation',
                                    'photometric_interpretation',
                                    'scan_dim_x_mm',
                                    'scan_dim_y_mm',
                                    'scan_dim_z_mm'])

In [37]:
# Function taken and modified from 
# https://www.kaggle.com/code/gzuidhof/full-preprocessing-tutorial
def load_scan(path: str)-> tuple[list, list]:
    slices = [pydicom.read_file(path + '/' + s) for s in os.listdir(path)]
    slices.sort(key = lambda x: float(x.ImagePositionPatient[2]))
    
    study_id =  int(str(slices[0].SeriesInstanceUID).split('.')[0])
    series_id = int(str(slices[0].SeriesInstanceUID).split('.')[1])
    orientation = determine_orientation(slices[0].ImageOrientationPatient)
    dim_x = slices[0].pixel_array.shape[0]
    dim_y = slices[0].pixel_array.shape[1]
    pixel_dim_x = float(slices[0].PixelSpacing[0])
    pixel_dim_y = float(slices[0].PixelSpacing[1])
    rows = slices[0].Rows
    columns = slices[0].Columns
    num_slices = len(slices)
    slice_spacing = float(slices[0].SpacingBetweenSlices)
    slice_thickness = float(slices[0].SliceThickness)
    series_description = slices[0].SeriesDescription
    patient_position = slices[0].PatientPosition
    image_position = slices[0].ImagePositionPatient
    image_orientation = slices[0].ImageOrientationPatient
    photometric_interpretation = slices[0].PhotometricInterpretation
    scan_dim_x_mm = dim_x * pixel_dim_x
    scan_dim_y_mm = dim_y * pixel_dim_y
    scan_dim_z_mm =  num_slices * slice_spacing
    
    metadata = [study_id,
                series_id,
                orientation,
                dim_x,
                dim_y,
                pixel_dim_x,
                pixel_dim_y,
                rows,
                columns,
                num_slices,
                slice_spacing,
                slice_thickness,
                series_description,
                patient_position,
                image_position,
                image_orientation,
                photometric_interpretation,
                scan_dim_x_mm,
                scan_dim_y_mm,
                scan_dim_z_mm]
    
    return slices, metadata

In [39]:
ex_dir = '4264413460/975997321'
path = os.path.join(ROOT_DIR, DATA_DIR, ex_dir)
data = load_scan(path)
data[0][0]

Dataset.file_meta -------------------------------
(0002, 0001) File Meta Information Version       OB: b'\x00\x01'
(0002, 0002) Media Storage SOP Class UID         UI: Enhanced MR Image Storage
(0002, 0003) Media Storage SOP Instance UID      UI: 4264413460.1.21
(0002, 0010) Transfer Syntax UID                 UI: RLE Lossless
(0002, 0012) Implementation Class UID            UI: 1.2.40.0.13.1.1.1
(0002, 0013) Implementation Version Name         SH: 'PYDICOM 2.4.2'
-------------------------------------------------
(0008, 0018) SOP Instance UID                    UI: 4264413460.1.21
(0008, 0023) Content Date                        DA: '20240503'
(0008, 0033) Content Time                        TM: '224023.805276'
(0008, 103e) Series Description                  LO: 'T2'
(0010, 0020) Patient ID                          LO: '4264413460'
(0018, 0050) Slice Thickness                     DS: '3.0'
(0018, 0088) Spacing Between Slices              DS: '3.6'
(0018, 5100) Patient Position        