# EDA for Spleen dataset

## Setup Environment

In [10]:
# mount driver
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/CardiacSeg/eda/Spleen

# install dependents
!pip install -q "monai-weekly[nibabel, tqdm, einops]"
!python -c "import matplotlib" || pip install -q matplotlib
%matplotlib inline

# sync python module
%load_ext autoreload
%autoreload 2

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/.shortcut-targets-by-id/1jz_DGnICBmKWCr_JL904PDQdIEK0_EQG/CardiacSeg/eda/Spleen
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
# download dataset
# %cd /content/drive/MyDrive/CardiacSeg/dataset
# !wget https://msd-for-monai.s3-us-west-2.amazonaws.com/Task09_Spleen.tar
# !tar xvf Task09_Spleen.tar Task09_Spleen
# %cd /content/drive/MyDrive/CardiacSeg/eda/Spleen

## Import Library

In [6]:
import sys
# set package path
sys.path.append("/content/drive/MyDrive/CardiacSeg")

import os

from monai.data import CacheDataset, DatasetSummary
from monai.transforms import (
    Compose,
    LoadImaged,
    AddChanneld,
    Orientationd,
    Spacingd,
    ScaleIntensityRanged,
    NormalizeIntensityd
)
from monai.utils import first

import torch
import numpy as np
import pandas as pd
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt

from data_utils.spleen_dataset import get_data_dicts
from data_utils.visualization import show_img_lbl
from data_utils.utils import get_data_info

## Prepare data dicts

In [7]:
data_dir = '/content/drive/MyDrive/CardiacSeg/dataset/Task09_Spleen'

In [11]:
data_dicts = get_data_dicts(data_dir)

## Show data info

In [None]:
data_info_df = get_data_info(data_dicts)

Output hidden; open in https://colab.research.google.com to view.

In [None]:
data_info_df

[autoreload of data_utils.spleen_dataset failed: Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/IPython/extensions/autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "/usr/local/lib/python3.7/dist-packages/IPython/extensions/autoreload.py", line 394, in superreload
    module = reload(module)
  File "/usr/lib/python3.7/imp.py", line 314, in reload
    return importlib.reload(module)
  File "/usr/lib/python3.7/importlib/__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 630, in _exec
  File "<frozen importlib._bootstrap_external>", line 728, in exec_module
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "/content/drive/MyDrive/CardiacSeg/data_utils/spleen_dataset.py", line 7, in <module>
    from transforms.spleen_transform import (
ModuleNotFoundError: No module named 'transforms.spleen_transform'
]


Unnamed: 0,pid,img_shape,img_space,lbl_shape,lbl_space,lbl_ids
0,spleen_10,"[512, 512, 55]","[0.976562, 0.976562, 5.0]","[512, 512, 55]","[0.976562, 0.976562, 5.0]","[0.0, 1.0]"
1,spleen_12,"[512, 512, 168]","[0.753906, 0.753906, 1.5]","[512, 512, 168]","[0.753906, 0.753906, 1.5]","[0.0, 1.0]"
2,spleen_13,"[512, 512, 77]","[0.742188, 0.742188, 2.5]","[512, 512, 77]","[0.742188, 0.742188, 2.5]","[0.0, 1.0]"
3,spleen_14,"[512, 512, 54]","[0.851562, 0.851562, 5.0]","[512, 512, 54]","[0.851562, 0.851562, 5.0]","[0.0, 1.0]"
4,spleen_16,"[512, 512, 61]","[0.792969, 0.792969, 8.0]","[512, 512, 61]","[0.792969, 0.792969, 8.0]","[0.0, 1.0]"
5,spleen_17,"[512, 512, 95]","[0.613281, 0.613281, 2.5]","[512, 512, 95]","[0.613281, 0.613281, 2.5]","[0.0, 1.0]"
6,spleen_18,"[512, 512, 164]","[0.966797, 0.966797, 1.5]","[512, 512, 164]","[0.966797, 0.966797, 1.5]","[0.0, 1.0]"
7,spleen_19,"[512, 512, 51]","[0.796875, 0.796875, 5.0]","[512, 512, 51]","[0.796875, 0.796875, 5.0]","[0.0, 1.0]"
8,spleen_2,"[512, 512, 90]","[0.794922, 0.794922, 5.0]","[512, 512, 90]","[0.794922, 0.794922, 5.0]","[0.0, 1.0]"
9,spleen_20,"[512, 512, 168]","[0.933594, 0.933594, 1.5]","[512, 512, 168]","[0.933594, 0.933594, 1.5]","[0.0, 1.0]"


In [None]:
shape_df = pd.DataFrame(data_info_df['img_shape'].to_list(), columns=['w', 'h', 's'])
shape_df.mean()

w    512.00000
h    512.00000
s     89.02439
dtype: float64

In [None]:
space_df = pd.DataFrame(data_info_df['img_space'].to_list(), columns=['x', 'y', 'z'])
space_df.mean()

x    0.812405
y    0.812405
z    4.368292
dtype: float32

## Statcics

In [None]:
base_tf = Compose([
    LoadImaged(keys=['image', 'label']),
    AddChanneld(keys=['image', 'label'])
])

ds = CacheDataset(
    data=data_dicts,
    transform=base_tf
)

In [None]:
# ref: https://arxiv.org/pdf/1904.08128.pdf
s = DatasetSummary(ds)
s.calculate_percentiles()
s.calculate_statistics()
data_target_space = s.get_target_spacing()

In [None]:
print('data target space:')
data_target_space

In [None]:
print('data intensities statcics:')
data_intensities_statcics = pd.DataFrame(
    [[s.data_min, s.data_median, s.data_max, s.data_mean, s.data_std, s.data_min_percentile, s.data_max_percentile]],
    columns=['min', 'med', 'max', 'mean', 'std', 'min_percentile (0.5%)', 'max_percentile (99.5%)']
)
data_intensities_statcics