In [None]:
# default_exp utils

# Utils

> This module offers useful utilities.

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#hide
# %load_ext autoreload
# %autoreload 2

In [None]:
#export
import os
import wandb
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt

# pydicom related imports
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut

In [None]:
#export
def log_datadir_as_artifact(wandb_run, path_to_dir, artifact_name, artifact_type='dataset'):
    """
    Logs a data directory as an artifact in wandb.
    wandb_run: wandb.Run object
    path_to_dir: path to the data directory
    artifact_name: name of the artifact
    artifact_type: type of the artifact
    """
    
    artifact = wandb.Artifact(artifact_name, type=artifact_type)
    artifact.add_dir(path_to_dir)
    wandb_run.log_artifact(artifact)
    

In [None]:
#hide
# run = wandb.init(entity='wandb_fc', project='rsna-miccai-brain', group='data', job_type='sample_data')
# log_datadir_as_artifact(run, '../data/smaller_sample/', 'sample_data', artifact_type='dataset')
# wandb.finish()

In [None]:
#export
def get_dicom_metadata(path_to_dicom_file, meta_cols):
    """
    Returns the metadata of a single dicom file as a dictionary.

    Params:
        path_to_dicom_file: path to the dicom file
        meta_cols: list of metadata columns to extract
    """
    dicom_object = pydicom.dcmread(path_to_dicom_file)

    col_dict_train = dict()
    for col in meta_cols: 
        try:
            col_dict_train[col] = str(getattr(dicom_object, col))
        except AttributeError:
            col_dict_train[col] = "NaN"
    
    return col_dict_train


In [None]:
meta_cols = ['SpecificCharacterSet','ImageType','SOPClassUID',
             'SOPInstanceUID','AccessionNumber','Modality', 'SeriesDescription', 
             'PatientID', 'MRAcquisitionType', 'SliceThickness', 
             'EchoTime', 'NumberOfAverages', 'ImagingFrequency', 'ImagedNucleus', 
             'MagneticFieldStrength', 'SpacingBetweenSlices', 
             'EchoTrainLength', 'PercentSampling', 'PercentPhaseFieldOfView',
             'PixelBandwidth', 'TriggerWindow', 'ReconstructionDiameter', 'AcquisitionMatrix',
             'FlipAngle', 'SAR', 'PatientPosition',
             'StudyInstanceUID', 'SeriesInstanceUID', 'SeriesNumber', 'InstanceNumber',
             'ImagePositionPatient', 'ImageOrientationPatient', 'Laterality',
             'PositionReferenceIndicator', 'SliceLocation', 'InStackPositionNumber',
             'SamplesPerPixel', 'PhotometricInterpretation', 'Rows', 'Columns', 'PixelSpacing',
             'BitsAllocated', 'BitsStored', 'HighBit', 'PixelRepresentation', 'WindowCenter',
             'WindowWidth', 'RescaleIntercept', 'RescaleSlope', 'RescaleType']

TRAIN_PATH = Path(os.getcwd()).parents[0] / 'data/sample/train/'
folder = os.listdir(TRAIN_PATH)[0]
sample_file = os.listdir(TRAIN_PATH / folder / 'T1w')[4]

dicom_metadata = get_dicom_metadata(os.path.join(TRAIN_PATH, folder, 'T1w', sample_file), meta_cols)
assert type(dicom_metadata) == dict

In [None]:
#export
def get_all_dicom_metadata(df, meta_cols):
    """
    Retrieve metadata for each BraTS21ID and return as a dataframe.

    Params:
        df: dataframe with BraTS21IDs
        meta_cols: list of metadata columns to extract
    """
    meta_cols_dict = []
    for i in range(len(df)):
        row = df.iloc[i]
        path = Path(row.path)
        for scan_type in ['FLAIR', 'T1w', 'T1wCE', 'T2w']:
            dicomfile = os.listdir(path / scan_type)[0]
            dicom_metadata = get_dicom_metadata(path / scan_type / dicomfile, meta_cols)
            dicom_metadata['scan_type'] = scan_type
            dicom_metadata['id'] = row.BraTS21ID
            meta_cols_dict.append(dicom_metadata)
            
    return pd.DataFrame(meta_cols_dict)

In [None]:
#export
def get_patient_id(patient_id):
    """
    Returns the correct patient id of a dicom file.
    
    Parameters
    ----------
    patient_id: patient id of the dicom file
    """
    if patient_id < 10:
        return '0000'+str(patient_id)
    elif patient_id >= 10 and patient_id < 100:
        return '000'+str(patient_id)
    elif patient_id >= 100 and patient_id < 1000:
        return '00'+str(patient_id)
    else:
        return '0'+str(patient_id)

In [None]:
assert get_patient_id(1) == '00001'

In [None]:
#hide
from nbdev.export import notebook2script; notebook2script()

Converted 00_data.ipynb.
Converted 00_preprocess.ipynb.
Converted 00_utils.ipynb.
Converted index.ipynb.
