## Imports 

In [1]:
import time
from pathlib import Path
from datetime import datetime

import torch
from torch.utils.data import random_split, DataLoader
import pandas as pd
import torchio as tio
import pytorch_lightning as pl
import matplotlib.pyplot as plt
from scipy import stats
import numpy as np
from tqdm import tqdm

from IPython import display
from tqdm.notebook import tqdm

plt.rcParams['figure.figsize'] = 12, 6

print('Last run on', time.ctime())

Last run on Mon Oct 17 15:03:13 2022


In [2]:
import os # fix plotting bug 
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

## Histogram Standardization

We want to standardize image intensities across samples taken from different scanners by calculating the landmarks.

In [3]:
dataset_dir = Path('../data/original/3D')

In [4]:
train_dir = dataset_dir / 'images'
test_dir = dataset_dir / 'test'
input_paths = sorted(train_dir.glob('*.nii.gz'))
test_paths = sorted(test_dir.glob('*.nii.gz'))

In [5]:
paths = input_paths+test_paths

In [6]:
compute_landmarks=False

if compute_landmarks:
    landmarks = tio.HistogramStandardization.train(
        paths,
        output_path='landmarks.npy',
    )
    landmarks

## Set up preprocessing transforms

In [8]:
def get_niis(d):
    return sorted(p for p in d.glob('*.nii.gz*'))

In [9]:
image_training_paths = get_niis(dataset_dir / 'images') 
label_paths = get_niis(dataset_dir / 'labels')
mask_paths = get_niis(dataset_dir / 'masks') 
image_test_paths = get_niis(dataset_dir / 'reference')

In [10]:
train_names = os.listdir(path='../data/original/3D/images')
test_names = os.listdir(path='../data/original/3D/reference')

In [11]:
subjects = []
for image_path, label_path, mask_path, train_name in zip(image_training_paths, label_paths, mask_paths, train_names):
    subject = tio.Subject(
        image=tio.ScalarImage(image_path),
        label=tio.ScalarImage(label_path),
        mask=tio.LabelMap(mask_path),
        name=train_name
    )
    subjects.append(subject)

test_subjects = []
for image_test_path, test_name in zip(image_test_paths, test_names):
    subject = tio.Subject(image=tio.ScalarImage(image_test_path), name=test_name)
    test_subjects.append(subject)

In [12]:
test_subjects

[Subject(Keys: ('image', 'name'); images: 1),
 Subject(Keys: ('image', 'name'); images: 1)]

In [None]:
train_preprocess = tio.Compose([
  tio.HistogramStandardization({'image': 'landmarks.npy', 'label': 'landmarks.npy'}), 
  tio.Clamp(out_min=0, out_max=255),
  tio.CropOrPad([256, 256, 256]),
  tio.Resample(2),  
  tio.ZNormalization(masking_method=tio.ZNormalization.mean),
  tio.Lambda(lambda x: 1 - x, types_to_apply=[tio.LABEL]),
  tio.Mask(masking_method='mask', exclude='label'),  
  tio.Lambda(lambda x: 1 - x, types_to_apply=[tio.LABEL])
])

In [48]:
test_preprocess = tio.Compose([
  tio.HistogramStandardization({'image': 'landmarks.npy'}),
  tio.Clamp(out_min=0, out_max=255),
  tio.CropOrPad([256, 256, 256]),
  tio.Resample(2),
  tio.ZNormalization(masking_method=tio.ZNormalization.mean)
])

In [50]:
train_set = tio.SubjectsDataset(subjects, transform=train_preprocess)
test_set = tio.SubjectsDataset(test_subjects, transform=test_preprocess)

In [47]:
for i in tqdm(range(len(train_set))): 
    train_set[i].image.save(Path('../data/processed/preprocessed/train/images/preprocessed_'+train_set[i].name))
    train_set[i].label.save(Path('../data/processed/preprocessed/train/labels/preprocessed_blended_'+train_set[i].name))
    train_set[i].mask.save(Path('../data/processed/preprocessed/train/masks/preprocessed_mask_'+train_set[i].name))

  0%|          | 0/598 [00:00<?, ?it/s]

In [48]:
for i in tqdm(range(len(test_set))): 
    test_set[i].image.save(Path('../data/processed/preprocessed/test/images/preprocessed_'+test_set[i].name))

  0%|          | 0/300 [00:00<?, ?it/s]