# Imports and Setup

In [None]:
import os
import ast
import wandb
import numpy as np
import pandas as pd
from tqdm import tqdm
from pathlib import Path
import matplotlib.pyplot as plt

# module based imports
from kagglerecipes.preprocess import VoxelData
from kagglerecipes.utils import *
from kagglerecipes.wandb_utils import log_df_as_tables

# Read CSV files

In [None]:
DATA_PATH = Path(os.getcwd()).parents[1] / 'data'
TRAIN_PATH = Path(os.getcwd()).parents[1] / 'data/sample/train/'
TEST_DATA = Path(os.getcwd()).parents[1] / 'data/sample/test/'

In [None]:
train_df = pd.read_csv(DATA_PATH / 'train_labels.csv')
test_df = pd.read_csv(DATA_PATH / 'sample_submission.csv')

train_df['path'] = train_df.apply(lambda row: get_patient_BraTS21ID_path(row, TRAIN_PATH), axis=1)
test_df['path'] = test_df.apply(lambda row: get_patient_BraTS21ID_path(row, TEST_DATA), axis=1)

train_df.head(2)

Unnamed: 0,BraTS21ID,MGMT_value,path
0,165,0,e:\Kaggle\BrainTumor\kagglerecipes\data\sample...
1,267,0,e:\Kaggle\BrainTumor\kagglerecipes\data\sample...


# Get DICOM metadata

In [None]:
meta_cols = ['SpecificCharacterSet','ImageType','SOPClassUID',
             'SOPInstanceUID','AccessionNumber','Modality', 'SeriesDescription', 
             'PatientID', 'MRAcquisitionType', 'SliceThickness', 
             'EchoTime', 'NumberOfAverages', 'ImagingFrequency', 'ImagedNucleus', 
             'MagneticFieldStrength', 'SpacingBetweenSlices', 
             'EchoTrainLength', 'PercentSampling', 'PercentPhaseFieldOfView',
             'PixelBandwidth', 'TriggerWindow', 'ReconstructionDiameter', 'AcquisitionMatrix',
             'FlipAngle', 'SAR', 'PatientPosition',
             'StudyInstanceUID', 'SeriesInstanceUID', 'SeriesNumber', 'InstanceNumber',
             'ImagePositionPatient', 'ImageOrientationPatient', 'Laterality',
             'PositionReferenceIndicator', 'SliceLocation', 'InStackPositionNumber',
             'SamplesPerPixel', 'PhotometricInterpretation', 'Rows', 'Columns', 'PixelSpacing',
             'BitsAllocated', 'BitsStored', 'HighBit', 'PixelRepresentation', 'WindowCenter',
             'WindowWidth', 'RescaleIntercept', 'RescaleSlope', 'RescaleType']

In [None]:
# Get DICOM metadata
train_meta_df = get_all_dicom_metadata(train_df, meta_cols)
test_meta_df = get_all_dicom_metadata(test_df, meta_cols)

100%|██████████| 10/10 [00:23<00:00,  2.33s/it]
100%|██████████| 10/10 [00:21<00:00,  2.17s/it]


In [None]:
# Get orienation metadata
train_meta_df['Orientation'] = train_meta_df.apply(get_image_plane, axis=1)
test_meta_df['Orientation'] = test_meta_df.apply(get_image_plane, axis=1)

In [None]:
train_meta_df.loc[(train_meta_df.Rows == "256") & 
                  (train_meta_df.Columns == "256") &
                  (train_meta_df.SeriesDescription=="T1w")].groupby(["PatientID" ,'Orientation', "SeriesDescription"]).size().reset_index(name='count') 

Unnamed: 0,PatientID,Orientation,SeriesDescription,count
0,389,axial,T1w,192
1,542,axial,T1w,174
2,623,axial,T1w,176


# Log metadata as W&B Tables

In [None]:
run = wandb.init(entity='ayush-thakur', project='tests')
log_df_as_tables(run, train_meta_df, "raw_train")
wandb.finish()

wandb: wandb version 0.12.1 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 5.03MB of 10.06MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.5003454967…

0,1
_runtime,16
_timestamp,1630024211
_step,0


0,1
_runtime,▁
_timestamp,▁
_step,▁


# Create Dataset

In [None]:
SAVE_PATH = Path(os.getcwd()).parents[1] / 'data/voxel_data/'
os.makedirs(SAVE_PATH, exist_ok=True)

# Initialize voxel manipulator
connect_voxel = VoxelData(os.path.join(TRAIN_PATH / '00542', "T1w"))

# Interate over the dataframe and resample every mri sequence according to the refrerence.
for i in tqdm(range(len(train_df))):
    row = train_df.loc[i]
    t1w = connect_voxel.get_voxel_data(os.path.join(row.path, 'T1w'))
    flair = connect_voxel.get_voxel_data(os.path.join(row.path, 'FLAIR'))
    t1wce = connect_voxel.get_voxel_data(os.path.join(row.path, 'T1wCE'))
    t2 = connect_voxel.get_voxel_data(os.path.join(row.path, 'T2w'))

    # Save the resampled mri sequences
    os.makedirs(SAVE_PATH / get_patient_id(row.BraTS21ID), exist_ok=True)
    np.save(SAVE_PATH / get_patient_id(row.BraTS21ID) / 't1w.npy', t1w)
    np.save(SAVE_PATH / get_patient_id(row.BraTS21ID) / 'flair.npy', flair)
    np.save(SAVE_PATH / get_patient_id(row.BraTS21ID) / 't1wce.npy', t1wce)
    np.save(SAVE_PATH / get_patient_id(row.BraTS21ID) / 't2.npy', t2)


100%|██████████| 10/10 [01:17<00:00,  7.73s/it]
