# Load 🤖 convert DICOM to 3D volume

In [1]:
!pip download -q "python-gdcm" pydicom pylibjpeg "opencv-python-headless" --dest frozen_packages --prefer-binary
!pip wheel -q https://github.com/Borda/kaggle_vol-3D-classify/archive/refs/heads/main.zip --wheel-dir frozen_packages --prefer-binary
!rm frozen_packages/torch-*
!ls -lh frozen_packages

total 128M
-rw-r--r-- 1 root root  92K Oct 26 02:12 Markdown-3.4.1-py3-none-any.whl
-rw-r--r-- 1 root root  25K Oct 26 02:12 MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-rw-r--r-- 1 root root 3.1M Oct 26 02:12 Pillow-9.2.0-cp37-cp37m-manylinux_2_28_x86_64.whl
-rw-r--r-- 1 root root 622K Oct 26 02:12 PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl
-rw-r--r-- 1 root root 228K Oct 26 02:12 Werkzeug-2.2.2-py3-none-any.whl
-rw-r--r-- 1 root root 122K Oct 26 02:12 absl_py-1.3.0-py3-none-any.whl
-rw-r--r-- 1 root root 926K Oct 26 02:12 aiohttp-3.8.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-rw-r--r-- 1 root root 8.1K Oct 26 02:12 aiosignal-1.2.0-py3-none-any.whl
-rw-r--r-- 1 root root 5.7K Oct 26 02:12 async_timeout-4.0.2-py3-none-any.whl
-rw-r--r-- 1 root root  26K Oct 26 02:12 asynctest-0.13.0-py3-none-any.whl
-rw-r--r-- 1 root root  58K Oct 26 02:12 attrs-22.1.0-py2.py3-none-any.whl
-rw-r--r-- 1 root root 9.1K Oct 26 02:12 cachetool

In [2]:
!pip install -qU "python-gdcm" pydicom pylibjpeg kaggle_vol3d_classify --find-links frozen_packages --no-index

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
dask-cudf 21.10.1 requires cupy-cuda114, which is not installed.
flax 0.5.2 requires rich~=11.1, but you have rich 12.1.0 which is incompatible.
dask-cudf 21.10.1 requires dask==2021.09.1, but you have dask 2022.2.0 which is incompatible.
dask-cudf 21.10.1 requires distributed==2021.09.1, but you have distributed 2022.2.0 which is incompatible.
allennlp 2.10.0 requires protobuf==3.20.0, but you have protobuf 3.19.4 which is incompatible.[0m[31m
[0m

In [3]:
%matplotlib inline

import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

PATH_DATASET = "/kaggle/input/rsna-2022-cervical-spine-fracture-detection"

### Loading & saving DICOM image

In [4]:
import cv2
import pydicom
import torch
from PIL import Image
from dipy.denoise.nlmeans import nlmeans
from dipy.denoise.noise_estimate import estimate_sigma
from pydicom.pixel_data_handlers import apply_voi_lut
from kaggle_volclassif.utils import interpolate_volume
from skimage import exposure
    

def convert_volume(dir_path: str, out_dir: str = "train_volumes", size = (100, 100, 40)):
    ls_imgs = glob.glob(os.path.join(dir_path, "*.dcm"))
    ls_imgs = sorted(ls_imgs, key=lambda p: int(os.path.splitext(os.path.basename(p))[0]))

    imgs = []
    for p_img in ls_imgs:
        dicom = pydicom.dcmread(p_img)
        img = apply_voi_lut(dicom.pixel_array, dicom)
        img = cv2.resize(img, size[:2], interpolation=cv2.INTER_LINEAR)
        imgs.append(img.tolist())
    vol = torch.tensor(imgs, dtype=torch.float32)

    vol = (vol - vol.min()) / float(vol.max() - vol.min())
    vol = interpolate_volume(vol, size).numpy()
    
    # https://scikit-image.org/docs/stable/auto_examples/color_exposure/plot_adapt_hist_eq_3d.html
    vol = exposure.equalize_adapthist(vol, kernel_size=np.array([64, 64, 64]), clip_limit=0.01)
    # vol = exposure.equalize_hist(vol)
    vol = np.clip(vol * 255, 0, 255).astype(np.uint8)
    
    path_pt = os.path.join(out_dir, f"{os.path.basename(dir_path)}.pt")
    torch.save(torch.tensor(vol), path_pt)

### Process all images 🤖

In [5]:
from pprint import pprint
from joblib import Parallel, delayed
from tqdm.auto import tqdm

! rm -rf train_volumes
! mkdir train_volumes

ls_dirs = [p for p in glob.glob(os.path.join(PATH_DATASET, "train_images", "*")) if os.path.isdir(p)]
print(f"volumes: {len(ls_dirs)}")

_= Parallel(n_jobs=4)(delayed(convert_volume)(p_dir) for p_dir in tqdm(ls_dirs))

! ls -lh train_volumes

volumes: 2019


  0%|          | 0/2019 [00:00<?, ?it/s]

total 773M
-rw-r--r-- 1 root root 392K Oct 26 02:19 1.2.826.0.1.3680043.10001.pt
-rw-r--r-- 1 root root 392K Oct 26 03:44 1.2.826.0.1.3680043.10005.pt
-rw-r--r-- 1 root root 392K Oct 26 04:03 1.2.826.0.1.3680043.10014.pt
-rw-r--r-- 1 root root 392K Oct 26 03:19 1.2.826.0.1.3680043.10016.pt
-rw-r--r-- 1 root root 392K Oct 26 02:50 1.2.826.0.1.3680043.10032.pt
-rw-r--r-- 1 root root 392K Oct 26 02:51 1.2.826.0.1.3680043.10041.pt
-rw-r--r-- 1 root root 392K Oct 26 02:49 1.2.826.0.1.3680043.10051.pt
-rw-r--r-- 1 root root 392K Oct 26 02:16 1.2.826.0.1.3680043.10058.pt
-rw-r--r-- 1 root root 392K Oct 26 03:24 1.2.826.0.1.3680043.10062.pt
-rw-r--r-- 1 root root 392K Oct 26 02:19 1.2.826.0.1.3680043.1010.pt
-rw-r--r-- 1 root root 392K Oct 26 03:03 1.2.826.0.1.3680043.10136.pt
-rw-r--r-- 1 root root 392K Oct 26 02:40 1.2.826.0.1.3680043.1016.pt
-rw-r--r-- 1 root root 392K Oct 26 03:02 1.2.826.0.1.3680043.10179.pt
-rw-r--r-- 1 root root 392K Oct 26 04:00 1.2.826.0.1.3680043.10204.