# Brain Shift - Data Summarization
Tyler Spears, Dr. Tom Fletcher

Part of the Pain in the Net (PITN) project

In [None]:
# imports
import pathlib
from pathlib import Path
import json
import collections

import numpy as np
import pandas as pd
import csv
import natsort
from pprint import pprint
import nibabel

# visualization libraries
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt

plt.rcParams.update({'figure.autolayout': True})
plt.rcParams.update({'figure.facecolor': [1.0, 1.0, 1.0, 1.0]})

In [None]:
# Data setup

data_dir = Path("/media/tyler/data/pitn/brain_shift")
assert data_dir.exists()
pre_dir = data_dir / "pre"
intra_dir = data_dir / "during"
assert pre_dir.exists() and intra_dir.exists()

## Extract image names

In [None]:
# Pre-Op
pre_img_names = set(img_file.name.replace('.nii.gz', '') for img_file in pre_dir.glob('*.nii.gz'))
pprint(natsort.natsorted(pre_img_names))
pre_json_names = set(json_file.name.replace('.json', '') for json_file in pre_dir.glob("*.json"))
# pprint(natsort.natsorted(pre_json_names))
# All .nii.gz images have an associated .json.
pprint(pre_img_names - pre_json_names)

# Filter out some sequences.
pre_names = set(
    filter(
        lambda s: 'localizer' not in s.casefold() and 'scout' not in s.casefold(), 
        pre_img_names.intersection(pre_json_names)
    )
)

In [None]:
# Intra-Op
intra_img_names = set(img_file.name.replace('.nii.gz', '') for img_file in intra_dir.glob('*.nii.gz'))
pprint(natsort.natsorted(intra_img_names))
intra_json_names = set(json_file.name.replace('.json', '') for json_file in intra_dir.glob("*.json"))
# pprint(natsort.natsorted(intra_json_names))

# A couple of image files do not have a .json file. Incomplete upload?
pprint(intra_img_names - intra_json_names)
pprint((intra_img_names - intra_json_names).intersection(intra_img_names))

# Filter out some sequences.
intra_names = set(
    filter(
        lambda s: 'localizer' not in s.casefold() and 'scout' not in s.casefold(), 
        intra_img_names.intersection(intra_json_names)
    )
)

## Metadata Parsing

### Pre-Op

In [None]:
pre_df: pd.DataFrame
pre_df_unique: pd.DataFrame

# Read json files.
meta_keys = set()
pre_meta = list()
for name in pre_names:
    meta_file = pre_dir / (name + '.json')
    with open(meta_file, 'r') as f:
        meta = json.load(f)
    meta['file_name'] = name
    meta.pop('ImageOrientationPatientDICOM')
    meta.pop('ImageType')
    pre_meta.append(meta)
    if not meta_keys:
        meta_keys = meta_keys.union(meta.keys())
    else:
        meta_keys = meta_keys.intersection(meta.keys())

# Sub-select each metadata according to the fields present in all metadata.
pre_meta = [
    dict(
        filter(
            lambda kv: kv[0] in meta_keys,
            super_meta.items()
        )
    ) for super_meta in pre_meta
]

pre_df = pd.DataFrame.from_dict(pre_meta).set_index('file_name')

# Further sub-select fields that have more than one value throughout the table.
pre_df_unique = pre_df.loc[
    :,
    list(
        len(pre_subj[1].astype(str).unique()) > 1 for pre_subj in pre_df.items()
    )
]

In [None]:
# Load corresponding images for additional metadata.
nifti_meta = list()

for name in pre_df.index:
    img = nibabel.load(pre_dir / (name + '.nii.gz'))
    header = img.header
    header_d = dict(header)
    spatial_unit = header.get_xyzt_units()[0]
    x_size = header.get_zooms()[0]
    y_size = header.get_zooms()[1]
    z_size = header.get_zooms()[2]
    meta = {
        'file_name': name,
        'shape': str(img.shape),#.replace(',', 'x').replace(' ', ''),
        'description': str(header_d['descrip'].astype('U')),#.replace(';', '|').replace(' ', ''),
        'x_size': str(x_size) + spatial_unit,
        'y_size': str(y_size) + spatial_unit,
        'z_size': str(z_size) + spatial_unit
        }
    
    nifti_meta.append(meta)
    
meta_df = pd.DataFrame.from_dict(nifti_meta).set_index('file_name')

In [None]:
merge_pre_df = pre_df_unique.merge(meta_df, on='file_name')
merge_pre_df.to_csv('pre_op_meta.csv', quoting=csv.QUOTE_ALL)
merge_pre_df

### Intra-Op

In [None]:
intra_df: pd.DataFrame
intra_df_unique: pd.DataFrame

# Read json files.
meta_keys = set()
intra_meta = list()
for name in intra_names:
    meta_file = intra_dir / (name + '.json')
    with open(meta_file, 'r') as f:
        meta = json.load(f)
    meta['file_name'] = name
    meta.pop('ImageOrientationPatientDICOM', None)
    meta.pop('ImageType', None)
    intra_meta.append(meta)
    if not meta_keys:
        meta_keys = meta_keys.union(meta.keys())
    else:
        meta_keys = meta_keys.intersection(meta.keys())

# Sub-select each metadata according to the fields present in all metadata.
intra_meta = [
    dict(
        filter(
            lambda kv: kv[0] in meta_keys,
            super_meta.items()
        )
    ) for super_meta in intra_meta
]

intra_df = pd.DataFrame.from_dict(intra_meta).set_index('file_name')

# Further sub-select fields that have more than one value throughout the table.
intra_df_unique = intra_df.loc[
    :,
    list(
        len(intra_subj[1].astype(str).unique()) > 1 for intra_subj in intra_df.items()
    )
]

In [None]:
# Load corresponding images for additional metadata.
nifti_meta = list()

for name in intra_df.index:
    img = nibabel.load(intra_dir / (name + '.nii.gz'))
    header = img.header
    header_d = dict(header)
    spatial_unit = header.get_xyzt_units()[0]
    x_size = header.get_zooms()[0]
    y_size = header.get_zooms()[1]
    z_size = header.get_zooms()[2]
    meta = {
        'file_name': name,
        'shape': img.shape,
        'description': str(header_d['descrip'].astype('U')),
        'x size': str(x_size) + spatial_unit,
        'y size': str(y_size) + spatial_unit,
        'z size': str(z_size) + spatial_unit
        }
    
    nifti_meta.append(meta)
    
meta_df = pd.DataFrame.from_dict(nifti_meta).set_index('file_name')

In [None]:
merge_intra_df = intra_df_unique.merge(meta_df, on='file_name')
merge_intra_df.to_csv('intra_op_meta.csv', quoting=csv.QUOTE_ALL)
merge_intra_df