In [None]:
import zipfile
import os
from pathlib import Path
import nrrd
import pandas as pd

In [None]:
os.makedirs('../dataset/raw', exist_ok=True)

In [None]:
with zipfile.ZipFile('../dataset/ASOCA2020Data.zip', 'r') as zip_ref:
    zip_ref.extractall('../dataset/raw')

In [None]:
os.listdir('../dataset/raw/ASOCA2020Data/')

In [None]:
y_train_path = '../dataset/raw/ASOCA2020Data/Train_Masks'

In [None]:
stats = []
for path in os.listdir(y_train_path):
    vol, header = nrrd.read(Path(y_train_path, path), index_order='C')
    stats.append([int(path.split('.')[0]), vol.size, vol.sum(), vol.mean()])

In [None]:
stats = sorted(stats, key=lambda x: x[0])

In [None]:
df = pd.DataFrame(stats, columns=['vol_id', 'n_voxels', 'n_fg', 'fg_bg_ratio'])

In [None]:
df.set_index('vol_id', inplace=True)

In [None]:
df.to_csv('train_label_stats.csv')

In [None]:
x_train_path = '../dataset/raw/ASOCA2020Data/Train'

In [None]:
stats = []
for path in os.listdir(x_train_path):
    vol, header = nrrd.read(Path(x_train_path, path), index_order='C')
    stats.append([int(path.split('.')[0]), vol.size, vol.sum(), vol.mean()])

In [None]:
import multiprocessing as mp
from concurrent.futures import ProcessPoolExecutor
from tqdm import tqdm
import numpy as np

In [None]:
def get_foreground(paths):
    vol_path, mask_path = paths
    mask, _ = nrrd.read(mask_path, index_order='C')
    vol, _ = nrrd.read(vol_path, index_order='C')
    return vol.flatten()[::10]

In [None]:
train_paths = list(zip(
    [Path(x_train_path, path) for path in os.listdir(x_train_path)],
    [Path(y_train_path, path) for path in os.listdir(y_train_path)],
))

In [None]:
with ProcessPoolExecutor(max_workers=10) as exec:
    fg_voxels = list(tqdm(exec.map(get_foreground, train_paths), total=len(train_paths)))
    fg_voxels = np.concatenate(fg_voxels)
    vol_stats = {
            'mean': np.mean(fg_voxels),
            'std': np.std(fg_voxels),
            'percentile_00_5': np.percentile(fg_voxels, 0.5),
            'percentile_99_5': np.percentile(fg_voxels, 99.5),
           }

In [None]:
vol_stats