### Extract DCM example files ###

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import glob
from matplotlib import pyplot as plt

import torch

# Appearance of the Notebook
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
np.set_printoptions(linewidth=110)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 1000)

# Pydicom data handlers
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut, apply_modality_lut

# Import this module with autoreload
%load_ext autoreload
%autoreload 2
import textdetection as td
from textdetection.fileutils import FileOP
from textdetection.Dicom import DicOP
print(f'Package version: {td.__version__}')

In [None]:
# GPU checks
is_cuda = torch.cuda.is_available()
print(f'CUDA available: {is_cuda}')
print(f'Number of GPUs found:  {torch.cuda.device_count()}')

if is_cuda:
    print(f'Current device ID:     {torch.cuda.current_device()}')
    print(f'GPU device name:       {torch.cuda.get_device_name(0)}')
    print(f'CUDNN version:         {torch.backends.cudnn.version()}')
    device_str = 'cuda:0'
    torch.cuda.empty_cache() 
else:
    device_str = 'cpu'
device = torch.device(device_str)
print()
print(f'Device for model training/inference: {device}')

In [None]:
# Helper functions. Must be in the calling scripts
def key2val(eval_str):
    output = np.nan
    try:
        val = eval(eval_str)
    except Exception:
        msg = f'"{eval_str}" failed.'
        logger.exception(msg)
    else:
        output = val
    return output

def remove_keys_from_list(key_list, remove_key_list):
    for item in remove_key_list:
        if item in key_list:
            key_list.remove(item)
    return key_list
    
def get_meta(ds, exclude_key_list=[]):
    # Key value pairs for the data set
    k_list = [k for k in ds.dir() if 'UID' not in k]
    if len(exclude_key_list) > 0:
        k_list = remove_keys_from_list(key_list=k_list, remove_key_list=exclude_key_list)
    v_list = [key2val(f'ds.{k}') for k in k_list]
    
    # Need to treat the unique identifiers UIs differently
    k_uid_list = sorted([k for k in ds.dir() if 'UID' in k])
    v_uid_list = [key2val(f'ds.{k}.name') for k in k_uid_list]
    
    # Create dictionary with key: value pairs
    ds_dict = dict(zip(k_list, v_list))
    ds_dict.update(dict(zip(k_uid_list, v_uid_list)))

    return ds_dict

In [None]:
# data directory
# /home/andreas/data/textdetection/sample_images_text
data_dir = os.environ.get('DATA_ROOT')
dcm_dir = os.path.join(data_dir, 'dcmdata')
output_dir = os.path.join(data_dir, 'dcmdata_textdetection')

In [None]:
# Collect files
dcm_file_list = os.path.join(output_dir, 'dcm_files.parquet')
ext = '.dcm'
if os.path.exists(dcm_file_list):
	file_df = pd.read_parquet(dcm_file_list)
else:
	print(f'Generating file list.')
	file_df = FileOP().search_file_tree(top_dir=dcm_dir, file_pattern=f'*{ext}')
	file_df.to_parquet(dcm_file_list)
	print(f'Done. File list saved to: {dcm_file_list}')
file_list = list(file_df['file'].unique())
print(len(file_list))
print(*file_list[:5], sep='\n')

In [None]:
idx = 20
file = file_list[idx]
dcm = FileOP().load_file(file, kind='dicom')

# Export header
ds = dcm.file_meta
ds_meta_dict = get_meta(ds)
ds_dcm_dict = get_meta(ds=ds, exclude_key_list=['PixelData'])
ds_meta_dict.update(ds_dcm_dict)

# Create data frame
ds_meta_dict = {key: [value] for key, value in ds_meta_dict.items()}
ds_meta_df = pd.DataFrame(ds_meta_dict)
ds_meta_df_sorted_cols = sorted(list(ds_meta_df.columns))
ds_meta_df = ds_meta_df[ds_meta_df_sorted_cols]

display(ds_meta_df.iloc[0])

In [None]:
for idx, file in enumerate(file_list[:3]):
    dcm = FileOP().load_file(file, kind='dicom')
    phot = dcm.PhotometricInterpretation
    mod = dcm.Modality

    file_number = str(idx + 1).zfill(3)
    file_name = f'dicom_{mod}_{file_number}.png'
    file_idx = os.path.join(output_dir, file_name)

    print(f'File: {idx + 1} / {len(file_list)}')
    print(f'Name: {file_name}')
    print(f'PhotometricInterpretation: {phot}')
    print(f'MODALITY: {mod}')
    
    color_list = ['MONOCHROME1', 'MONOCHROME2']
    if phot in color_list:
        # Pipeline
        pixel_array = dcm.pixel_array
        pixel_mod = apply_modality_lut(pixel_array, dcm)
        pixel_voi = apply_voi_lut(pixel_array, dcm)
        
        # Make sure that we can divide by maximum even if the image is already 8 bit
        im_array = (pixel_voi - np.amin(pixel_voi)).astype(float)
        im_array /= np.amax(im_array)
        im_array = (im_array * 255).astype(np.uint8)
        
        #print(np.amin(im_array))
        #print(np.amax(im_array))
    
        fig, ax = plt.subplots(figsize=(10, 4))
        ax.imshow(im_array, cmap='gray')
        ax.set(xticks=[], yticks=[])
        ax.set_title(f'Imaging modality: {mod}')
        plt.savefig(file_idx, bbox_inches='tight')
        plt.show()