# Brain Shift - Data Summarization
Tyler Spears, Dr. Tom Fletcher

Part of the Pain in the Net (PITN) project

In [246]:
# imports
import pathlib
from pathlib import Path
import json
import collections

import numpy as np
import pandas as pd
import csv
import natsort
from pprint import pprint
import nibabel

# visualization libraries
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt

plt.rcParams.update({'figure.autolayout': True})
plt.rcParams.update({'figure.facecolor': [1.0, 1.0, 1.0, 1.0]})

In [3]:
# Data setup

data_dir = Path("/media/tyler/data/pitn/brain_shift")
assert data_dir.exists()
pre_dir = data_dir / "pre"
intra_dir = data_dir / "during"
assert pre_dir.exists() and intra_dir.exists()

## Extract image names

In [172]:
# Pre-Op
pre_img_names = set(img_file.name.replace('.nii.gz', '') for img_file in pre_dir.glob('*.nii.gz'))
pprint(natsort.natsorted(pre_img_names))
pre_json_names = set(json_file.name.replace('.json', '') for json_file in pre_dir.glob("*.json"))
# pprint(natsort.natsorted(pre_json_names))
# All .nii.gz images have an associated .json.
pprint(pre_img_names - pre_json_names)

# Filter out some sequences.
pre_names = set(
    filter(
        lambda s: 'localizer' not in s.casefold() and 'scout' not in s.casefold(), 
        pre_img_names.intersection(pre_json_names)
    )
)

['SE000001_AAHead_Scout_20200520115951_1',
 'SE000001_AAHead_Scout_20200520115951_1a',
 'SE000001_AAHead_Scout_20200520115951_1b',
 'SE000002_AAHead_Scout_20200520115951_2_i00001',
 'SE000002_AAHead_Scout_20200520115951_2_i00002',
 'SE000002_AAHead_Scout_20200520115951_2_i00003',
 'SE000002_AAHead_Scout_20200520115951_2_i00004',
 'SE000003_AAHead_Scout_20200520115951_3_i00001',
 'SE000003_AAHead_Scout_20200520115951_3_i00002',
 'SE000003_AAHead_Scout_20200520115951_3_i00003',
 'SE000004_AAHead_Scout_20200520115951_4_i00001',
 'SE000004_AAHead_Scout_20200520115951_4_i00002',
 'SE000004_AAHead_Scout_20200520115951_4_i00003',
 'SE000005_t1_mprage_sag_p2_iso_20200520115951_5',
 'SE000006_t1_mprage_sag_p2_iso_MPR_Cor_20200520115951_6',
 'SE000007_t1_mprage_sag_p2_iso_MPR_Tra_20200520115951_7',
 'SE000008_Ax_diffusion_20200520115951_8',
 'SE000009_Ax_diffusion_20200520115951_9',
 'SE000010_t2_spc_flair_sag_p2_iso_20200520115951_10',
 'SE000011_t2_spc_flair_sag_p2_iso_MPR_Cor_20200520115951_1

In [5]:
# Intra-Op
intra_img_names = set(img_file.name.replace('.nii.gz', '') for img_file in intra_dir.glob('*.nii.gz'))
pprint(natsort.natsorted(intra_img_names))
intra_json_names = set(json_file.name.replace('.json', '') for json_file in intra_dir.glob("*.json"))
# pprint(natsort.natsorted(intra_json_names))

# A couple of image files do not have a .json file. Incomplete upload?
pprint(intra_img_names - intra_json_names)
pprint((intra_img_names - intra_json_names).intersection(intra_img_names))

# Filter out some sequences.
intra_names = set(
    filter(
        lambda s: 'localizer' not in s.casefold() and 'scout' not in s.casefold(), 
        intra_img_names.intersection(intra_json_names)
    )
)

['ST000002_T1_SE_AXIAL_20200610090639_11',
 'ST000002_T1_SE_SAG_20200610090639_10',
 'ST000002_localizer_20200610090639_2_i00001',
 'ST000002_localizer_20200610090639_2_i00002',
 'ST000002_localizer_20200610090639_2_i00003',
 'ST000002_localizer_20200610090639_5_i00001',
 'ST000002_localizer_20200610090639_5_i00002',
 'ST000002_localizer_20200610090639_5_i00003',
 'ST000002_t1_fl3d_sag_Fiber_20200610090639_7',
 'ST000002_t1_fl3d_sag_Fiber_20200610090639_101',
 'ST000002_t1_fl3d_sag_Fiber_20200610090639_101_ROI1',
 'ST000002_t1_fl3d_sag_Fiber_20200610090639_102',
 'ST000002_t1_fl3d_sag_Fiber_20200610090639_102_ROI1',
 'ST000002_t1_fl3d_sag_Pre_Ablation_20200610090639_4',
 'ST000002_t2_flair_cor_EC_20200610090639_17',
 'ST000002_t2_flair_cor_EC_Post_Ablation_20200610090639_24',
 'ST000002_t2_flair_tra_EC_20200610090639_16']
{'ST000002_t1_fl3d_sag_Fiber_20200610090639_101_ROI1',
 'ST000002_t1_fl3d_sag_Fiber_20200610090639_102_ROI1'}
{'ST000002_t1_fl3d_sag_Fiber_20200610090639_101_ROI1',
 

## Metadata Parsing

### Pre-Op

In [241]:
pre_df: pd.DataFrame
pre_df_unique: pd.DataFrame

# Read json files.
meta_keys = set()
pre_meta = list()
for name in pre_names:
    meta_file = pre_dir / (name + '.json')
    with open(meta_file, 'r') as f:
        meta = json.load(f)
    meta['file_name'] = name
    meta.pop('ImageOrientationPatientDICOM')
    meta.pop('ImageType')
    pre_meta.append(meta)
    if not meta_keys:
        meta_keys = meta_keys.union(meta.keys())
    else:
        meta_keys = meta_keys.intersection(meta.keys())

# Sub-select each metadata according to the fields present in all metadata.
pre_meta = [
    dict(
        filter(
            lambda kv: kv[0] in meta_keys,
            super_meta.items()
        )
    ) for super_meta in pre_meta
]

pre_df = pd.DataFrame.from_dict(pre_meta).set_index('file_name')

# Further sub-select fields that have more than one value throughout the table.
pre_df_unique = pre_df.loc[
    :,
    list(
        len(pre_subj[1].astype(str).unique()) > 1 for pre_subj in pre_df.items()
    )
]

In [244]:
# Load corresponding images for additional metadata.
nifti_meta = list()

for name in pre_df.index:
    img = nibabel.load(pre_dir / (name + '.nii.gz'))
    header = img.header
    header_d = dict(header)
    spatial_unit = header.get_xyzt_units()[0]
    x_size = header.get_zooms()[0]
    y_size = header.get_zooms()[1]
    z_size = header.get_zooms()[2]
    meta = {
        'file_name': name,
        'shape': str(img.shape),#.replace(',', 'x').replace(' ', ''),
        'description': str(header_d['descrip'].astype('U')),#.replace(';', '|').replace(' ', ''),
        'x_size': str(x_size) + spatial_unit,
        'y_size': str(y_size) + spatial_unit,
        'z_size': str(z_size) + spatial_unit
        }
    
    nifti_meta.append(meta)
    
meta_df = pd.DataFrame.from_dict(nifti_meta).set_index('file_name')

In [245]:
merge_pre_df = pre_df_unique.merge(meta_df, on='file_name')
merge_pre_df.to_csv('pre_op_meta.csv', quoting=csv.QUOTE_ALL)
merge_pre_df

Unnamed: 0_level_0,ImagingFrequency,MRAcquisitionType,SeriesDescription,ProtocolName,ScanningSequence,SequenceVariant,SequenceName,SeriesNumber,AcquisitionTime,SliceThickness,...,PercentSampling,PhaseEncodingSteps,AcquisitionMatrixPE,ReconMatrixPE,PixelBandwidth,shape,description,x_size,y_size,z_size
file_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
SE000015_t2_tse_tra_320_p2_20200520115951_15,123.248,2D,t2_tse_tra_320_p2,t2_tse_tra_320_p2,SE,SK\SP\OSP,*tse2d1_19,15,12:31:51.965000,3.0,...,75,304,279,372,215,"(372, 384, 51)",TE=95;Time=123151.965;phase=1,0.5729167mm,0.5729167mm,3.0mm
SE000021_t1_mprage_sag_iso_post_MPR_Cor_20200520115951_21,123.248,3D,t1_mprage_sag_iso post_MPR_Cor,t1_mprage_sag_iso post_MPR_Cor,GR\IR,SK\SP\MP,*tfl3d1_16ns,21,12:43:14.452500,1.0,...,100,255,256,256,210,"(465, 271, 448)",TE=2.2;Time=124314.452,0.5mm,1.0mm,0.5mm
SE000006_t1_mprage_sag_p2_iso_MPR_Cor_20200520115951_6,123.248,3D,t1_mprage_sag_p2_iso_MPR_Cor,t1_mprage_sag_p2_iso_MPR_Cor,GR\IR,SK\SP\MP,*tfl3d1_16ns,6,12:07:30.385000,1.0,...,100,255,256,256,220,"(465, 148, 448)",TE=3.2;Time=120730.385,0.5mm,1.0mm,0.5mm
SE000024_t2_tse_tra_320_p2_20200520115951_24,123.248,2D,KEY_IMAGES,t2_tse_tra_320_p2,SE,SK\SP\OSP,*tse2d1_19,24,12:31:47.235000,3.0,...,75,304,279,1024,215,"(1024, 1024, 1)",TE=95;Time=123147.235,0.33436584mm,0.33436584mm,3.0mm
SE000022_t1_mprage_sag_iso_post_MPR_Tra_20200520115951_22,123.248,3D,t1_mprage_sag_iso post_MPR_Tra,t1_mprage_sag_iso post_MPR_Tra,GR\IR,SK\SP\MP,*tfl3d1_16ns,22,12:43:14.452500,1.0,...,100,255,256,512,210,"(512, 512, 238)",TE=2.2;Time=124314.452,0.5mm,0.5mm,1.0mm
SE000007_t1_mprage_sag_p2_iso_MPR_Tra_20200520115951_7,123.248,3D,t1_mprage_sag_p2_iso_MPR_Tra,t1_mprage_sag_p2_iso_MPR_Tra,GR\IR,SK\SP\MP,*tfl3d1_16ns,7,12:07:30.385000,1.0,...,100,255,256,512,220,"(512, 512, 238)",TE=3.2;Time=120730.385,0.5mm,0.5mm,1.0mm
SE000013_t2_tse_cor_oblique_hippo_20200520115951_13,123.248,2D,t2_tse_cor_oblique_hippo,t2_tse_cor_oblique_hippo,SE,SK\SP\OSP,*tse2d1_18,13,12:21:33.992500,3.0,...,80,396,307,384,220,"(384, 384, 41)",TE=1e+02;Time=122133.992;phase=1,0.44270834mm,0.44270834mm,3.0mm
SE000012_t2_spc_flair_sag_p2_iso_MPR_Tra_20200520115951_12,123.249,3D,t2_spc_flair_sag_p2_iso_MPR_Tra,t2_spc_flair_sag_p2_iso_MPR_Tra,SE\IR,SK\SP\MP,*spcir_278ns,12,12:15:14.202500,1.0,...,100,223,256,524,750,"(524, 524, 238)",TE=3.8e+02;Time=121514.202,0.48828125mm,0.48828125mm,1.0mm
SE000017_ax-SWI_20200520115951_17_ph,123.249,3D,Pha_Images,ax-SWI,GR,SP\OSP,*swi3d1r,17,12:35:9.132500,1.5,...,90,202,202,224,120,"(224, 256, 96)",TE=20;Time=123509.133;phase=1,0.8984375mm,0.8984375mm,1.5mm
SE000011_t2_spc_flair_sag_p2_iso_MPR_Cor_20200520115951_11,123.249,3D,t2_spc_flair_sag_p2_iso_MPR_Cor,t2_spc_flair_sag_p2_iso_MPR_Cor,SE\IR,SK\SP\MP,*spcir_278ns,11,12:15:14.202500,1.0,...,100,223,256,256,750,"(476, 271, 459)",TE=3.8e+02;Time=121514.202,0.48828125mm,1.0mm,0.48828125mm


### Intra-Op

In [247]:
intra_df: pd.DataFrame
intra_df_unique: pd.DataFrame

# Read json files.
meta_keys = set()
intra_meta = list()
for name in intra_names:
    meta_file = intra_dir / (name + '.json')
    with open(meta_file, 'r') as f:
        meta = json.load(f)
    meta['file_name'] = name
    meta.pop('ImageOrientationPatientDICOM', None)
    meta.pop('ImageType', None)
    intra_meta.append(meta)
    if not meta_keys:
        meta_keys = meta_keys.union(meta.keys())
    else:
        meta_keys = meta_keys.intersection(meta.keys())

# Sub-select each metadata according to the fields present in all metadata.
intra_meta = [
    dict(
        filter(
            lambda kv: kv[0] in meta_keys,
            super_meta.items()
        )
    ) for super_meta in intra_meta
]

intra_df = pd.DataFrame.from_dict(intra_meta).set_index('file_name')

# Further sub-select fields that have more than one value throughout the table.
intra_df_unique = intra_df.loc[
    :,
    list(
        len(intra_subj[1].astype(str).unique()) > 1 for intra_subj in intra_df.items()
    )
]

In [248]:
# Load corresponding images for additional metadata.
nifti_meta = list()

for name in intra_df.index:
    img = nibabel.load(intra_dir / (name + '.nii.gz'))
    header = img.header
    header_d = dict(header)
    spatial_unit = header.get_xyzt_units()[0]
    x_size = header.get_zooms()[0]
    y_size = header.get_zooms()[1]
    z_size = header.get_zooms()[2]
    meta = {
        'file_name': name,
        'shape': img.shape,
        'description': str(header_d['descrip'].astype('U')),
        'x size': str(x_size) + spatial_unit,
        'y size': str(y_size) + spatial_unit,
        'z size': str(z_size) + spatial_unit
        }
    
    nifti_meta.append(meta)
    
meta_df = pd.DataFrame.from_dict(nifti_meta).set_index('file_name')

In [249]:
merge_intra_df = intra_df_unique.merge(meta_df, on='file_name')
merge_intra_df.to_csv('intra_op_meta.csv', quoting=csv.QUOTE_ALL)
merge_intra_df

Unnamed: 0_level_0,ImagingFrequency,MRAcquisitionType,SeriesDescription,ProtocolName,ScanningSequence,SequenceVariant,SequenceName,SeriesNumber,AcquisitionTime,SliceThickness,...,PercentSampling,PhaseEncodingSteps,AcquisitionMatrixPE,ReconMatrixPE,PixelBandwidth,shape,description,x size,y size,z size
file_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ST000002_t1_fl3d_sag_Fiber_20200610090639_7,63.5976,3D,t1_fl3d_sag_Fiber,t1_fl3d_sag_Fiber,GR,SP,*fl3d1,7,11:42:30.962500,1,...,100,256,256,256,160,"(240, 256, 256)",TE=5.7;Time=114230.962;phase=1,1.0mm,1.015625mm,1.015625mm
ST000002_T1_SE_SAG_20200610090639_10,63.5976,2D,T1 SE SAG,T1 SE SAG,SE,SP,*se2d1,10,11:57:48.982500,3,...,100,256,256,256,129,"(256, 256, 7)",TE=9.5;Time=115748.982;phase=1,1.015625mm,1.015625mm,3.0mm
ST000002_T1_SE_AXIAL_20200610090639_11,63.5976,2D,T1 SE AXIAL,T1 SE AXIAL,SE,SP,*se2d1,11,12:00:33.422500,3,...,100,256,256,256,129,"(256, 256, 7)",TE=9.5;Time=120033.423;phase=1,1.015625mm,1.015625mm,3.0mm
ST000002_t1_fl3d_sag_Fiber_20200610090639_102,63.5976,3D,sag mpr 2<MPR Collection>,t1_fl3d_sag_Fiber,GR,SP,*fl3d1,102,11:42:30.962500,1,...,100,256,256,484,160,"(484, 484, 1)",TE=5.7;Time=114230.962,0.5371901mm,0.5371901mm,1.0mm
ST000002_t1_fl3d_sag_Pre_Ablation_20200610090639_4,63.5977,3D,t1_fl3d_sag_Pre Ablation,t1_fl3d_sag_Pre Ablation,GR,SP,*fl3d1,4,09:09:23.285000,1,...,100,256,256,256,160,"(240, 256, 256)",TE=5.7;Time=90923.285;phase=1,1.0mm,1.015625mm,1.015625mm
ST000002_t2_flair_cor_EC_Post_Ablation_20200610090639_24,63.5976,2D,t2_flair_cor_EC_Post Ablation,t2_flair_cor_EC_Post Ablation,SE\IR,SK\SP\MP\OSP,*tir2d1_19,24,13:20:41.917500,2,...,75,209,192,256,130,"(256, 256, 54)",TE=1.2e+02;Time=132041.918;phase=1,1.015625mm,1.015625mm,2.0mm
ST000002_t2_flair_cor_EC_20200610090639_17,63.5976,2D,t2_flair_cor_EC,t2_flair_cor_EC,SE\IR,SK\SP\MP\OSP,*tir2d1_19,17,12:51:26.452500,2,...,75,209,192,256,130,"(256, 256, 54)",TE=1.2e+02;Time=125126.452;phase=1,1.015625mm,1.015625mm,2.0mm
ST000002_t2_flair_tra_EC_20200610090639_16,63.5976,2D,t2_flair_tra_EC,t2_flair_tra_EC,SE\IR,SK\SP\MP\OSP,*tir2d1_19,16,12:43:47.925000,2,...,75,209,192,256,130,"(256, 256, 54)",TE=1.2e+02;Time=124347.925;phase=1,1.015625mm,1.015625mm,2.0mm
ST000002_t1_fl3d_sag_Fiber_20200610090639_101,63.5976,3D,axial mpr<MPR Collection>,t1_fl3d_sag_Fiber,GR,SP,*fl3d1,101,11:42:30.962500,1,...,100,256,256,484,160,"(484, 484, 1)",TE=5.7;Time=114230.962,0.5371901mm,0.5371901mm,1.0mm
