# Imports and Setup

In [None]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from pathlib import Path
import matplotlib.pyplot as plt

# module based imports
from kagglerecipes.preprocess import VoxelData
from kagglerecipes.utils import get_dicom_metadata, get_patient_id

# Read CSV files

In [None]:
DATA_PATH = Path(os.getcwd()).parents[1] / 'data'
TRAIN_PATH = Path(os.getcwd()).parents[1] / 'data/sample/train/'
TEST_DATA = Path(os.getcwd()).parents[1] / 'data/sample/test/'

In [None]:
train_df = pd.read_csv(DATA_PATH / 'train_labels.csv')
test_df = pd.read_csv(DATA_PATH / 'sample_submission.csv')

def get_path(row, path_type):
    patient_id = get_patient_id(int(row.BraTS21ID))
    return f'{path_type}/{patient_id}/'

train_df['path'] = train_df.apply(lambda row: get_path(row, TRAIN_PATH), axis=1)
test_df['path'] = test_df.apply(lambda row: get_path(row, TEST_DATA), axis=1)

train_df.head(2)

Unnamed: 0,BraTS21ID,MGMT_value,path
0,165,0,e:\Kaggle\BrainTumor\kagglerecipes\data\sample...
1,267,0,e:\Kaggle\BrainTumor\kagglerecipes\data\sample...


# Get DICOM metadata

In [None]:
meta_cols = ['SpecificCharacterSet','ImageType','SOPClassUID',
             'SOPInstanceUID','AccessionNumber','Modality', 'SeriesDescription', 
             'PatientID', 'MRAcquisitionType', 'SliceThickness', 
             'EchoTime', 'NumberOfAverages', 'ImagingFrequency', 'ImagedNucleus', 
             'MagneticFieldStrength', 'SpacingBetweenSlices', 
             'EchoTrainLength', 'PercentSampling', 'PercentPhaseFieldOfView',
             'PixelBandwidth', 'TriggerWindow', 'ReconstructionDiameter', 'AcquisitionMatrix',
             'FlipAngle', 'SAR', 'PatientPosition',
             'StudyInstanceUID', 'SeriesInstanceUID', 'SeriesNumber', 'InstanceNumber',
             'ImagePositionPatient', 'ImageOrientationPatient', 'Laterality',
             'PositionReferenceIndicator', 'SliceLocation', 'InStackPositionNumber',
             'SamplesPerPixel', 'PhotometricInterpretation', 'Rows', 'Columns', 'PixelSpacing',
             'BitsAllocated', 'BitsStored', 'HighBit', 'PixelRepresentation', 'WindowCenter',
             'WindowWidth', 'RescaleIntercept', 'RescaleSlope', 'RescaleType']

def get_all_dicom_metadata(df, meta_cols):
    """
    Retrieve metadata for each BraTS21ID and return as a dataframe.
    """
    meta_cols_dict = []
    for i in range(len(df)):
        row = df.iloc[i]
        path = Path(row.path)
        for scan_type in ['FLAIR', 'T1w', 'T1wCE', 'T2w']:
            dicomfile = os.listdir(path / scan_type)[0]
            dicom_metadata = get_dicom_metadata(path / scan_type / dicomfile, meta_cols)
            dicom_metadata['scan_type'] = scan_type
            dicom_metadata['id'] = row.BraTS21ID
            meta_cols_dict.append(dicom_metadata)
            
    return pd.DataFrame(meta_cols_dict)

In [None]:
train_meta_df = get_all_dicom_metadata(train_df, meta_cols)
test_meta_df = get_all_dicom_metadata(test_df, meta_cols)

# Do voxel manipulation for one id

In [None]:
folder = os.listdir(TRAIN_PATH)[0]

connect_voxel = VoxelData(os.path.join(TRAIN_PATH, folder, 'T1w'))
flair = connect_voxel.get_voxel_data(os.path.join(TRAIN_PATH, folder, 'FLAIR'))
t1wce = connect_voxel.get_voxel_data(os.path.join(TRAIN_PATH, folder, 'T1wCE'))
t2 = connect_voxel.get_voxel_data(os.path.join(TRAIN_PATH, folder, 'T2w'))