In [2]:
import numpy as np
import pandas as pd
import nibabel as nib

Load a pre-made 4D image containing binary lesion masks from multiple patients.

In [3]:
mask_stack_path = '/home/despoB/lesion/anat_preproc/lesion_mask_mni_stack.nii.gz'

In [4]:
mask_stack_img = nib.load(mask_stack_path)

In [5]:
mask_stack_data = mask_stack_img.get_data()

In [6]:
mask_stack_data.shape

(91, 109, 91, 64)

Get the inverse affine transform for the image, so we can go from image-space coordinates to voxel-data coordiantes.

In [7]:
mask_stack_img.affine

array([[  -2.,   -0.,    0.,   90.],
       [  -0.,    2.,   -0., -126.],
       [   0.,    0.,    2.,  -72.],
       [   0.,    0.,    0.,    1.]])

In [8]:
inverse_affine = np.linalg.inv(mask_stack_img.affine)

Confirm that our coordinate translation works by checking against a known relationship (from FSLView).

MNI coordinate (28, 36, 2) = Voxel coordinate (31, 81, 37)

In [9]:
nib.affines.apply_affine(inverse_affine, [28, 36, 2])

array([ 31.,  81.,  37.])

To be double sure, test our transformed coordinates on real data. 

The patient 191 has a lesioned voxel at MNI coordinate (-36, 2, -2), but moving more than 2mm (1 voxel) in any direction puts you in empty space.

In [12]:
mask_path_191 = '/home/despoB/lesion/anat_preproc/191/191_mask_mni.nii.gz'

In [13]:
mask_img_191 = nib.load(mask_path_191)

In [14]:
mask_data_191 = mask_img_191.get_data()

There should be damage here..

In [15]:
nib.affines.apply_affine(inverse_affine, [-36, 2, -2])

array([ 63.,  64.,  35.])

In [16]:
mask_data_191[63, 64, 35]

1.0

But not in any of these places...

In [17]:
nib.affines.apply_affine(inverse_affine, [-40, 2, -2])

array([ 65.,  64.,  35.])

In [18]:
mask_data_191[65, 64, 35]

0.0

In [19]:
nib.affines.apply_affine(inverse_affine, [-36, -2, -2])

array([ 63.,  62.,  35.])

In [20]:
mask_data_191[63, 62, 35]

0.0

In [21]:
nib.affines.apply_affine(inverse_affine, [-36, 2, 2])

array([ 63.,  64.,  37.])

In [22]:
mask_data_191[63, 64, 37]

0.0

Looks like our coordinate conversion is working. These were lazy checks, but lazy is better than nothing.

Next, we need to map 3D voxel-data coordinates to 1D (flattened) coordinates.

First, create a small 3D array to test things on.

In [23]:
test_data = np.random.rand(3, 3, 3)

In [24]:
test_data.shape

(3, 3, 3)

In [25]:
test_data

array([[[ 0.28608054,  0.96612575,  0.0491338 ],
        [ 0.4174178 ,  0.78697586,  0.40856223],
        [ 0.67486548,  0.0242388 ,  0.83784137]],

       [[ 0.86366276,  0.33605138,  0.10813308],
        [ 0.20377529,  0.59884482,  0.95976346],
        [ 0.22732127,  0.62403217,  0.06885925]],

       [[ 0.03200667,  0.33162971,  0.49775652],
        [ 0.81039875,  0.8548744 ,  0.86193231],
        [ 0.71003148,  0.22703014,  0.6580786 ]]])

Pull an arbitrary item using 3D indexing.

In [26]:
test_data[0,2,1]

0.024238799862456029

The `ravel_multi_index` function translates a multi-dimensional index into the equivalent 1D index of a raveled array.

In [27]:
np.ravel_multi_index([0,2,1], (3,3,3))

7

In [28]:
test_data.ravel()[7]

0.024238799862456029

Now let's check this works on our image data.

In [29]:
mask_data_191[63, 64, 35]

1.0

In [30]:
np.ravel_multi_index([63, 64, 35], mask_data_191.shape)

630756

In [31]:
mask_data_191.ravel()[630756]

1.0

Now that we've got indexing working using NumPy arrays, let's translate things into Pandas so we can have labeled rows.

Construct a test DF with image vectors from 10 patients. We do it iteratively, adding one patient at a time. 

We'll also do some code profiling to see how much of a hit the system will take when doing these operations.

In [36]:
%load_ext memory_profiler

In [37]:
%%memit
for i in range(10):
    pdata = mask_stack_data[...,i].ravel()
    try:
        mask_data_df['10'+str(i)] = pdata
    except:
        mask_data_df = pd.DataFrame({'10'+str(i):pdata})

peak memory: 621.07 MiB, increment: 82.59 MiB


In [38]:
mask_data_df.head()

Unnamed: 0,100,101,102,103,104,105,106,107,108,109
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Now that we have the DF, let's search it.

In [39]:
test_results = mask_data_df.loc[630756,:] == 1
list(test_results[test_results == True].index)

['102', '103', '104', '105', '106']

In [40]:
%%timeit
test_results = mask_data_df.loc[630756,:] == 1
list(test_results[test_results == True].index)

1000 loops, best of 3: 1.01 ms per loop


In [41]:
%memit
test_results = mask_data_df.loc[630756,:] == 1
list(test_results[test_results == True].index)

peak memory: 721.48 MiB, increment: 0.00 MiB


['102', '103', '104', '105', '106']

Sure enough, visual inspection confirms that those patients have lesions at that coordinate.

Let's see if it is much slower when we tweak things so the outputs a list of patients directly.

In [42]:
%%timeit
mask_data_df.T[mask_data_df.T[630756] == 1].index

10 loops, best of 3: 35 ms per loop


Okay, so it looks like we definitely don't want to just transpose the DF on the fly. What if we do that beforehand.

In [43]:
mask_data_t = mask_data_df.T

In [44]:
%%timeit
mask_data_t[mask_data_t[630756] == 1].index

100 loops, best of 3: 17 ms per loop


Hmm, okay. It seems like the slowdown comes from indexing over columns instead of rows. Back to our original plan.

Now that we've got things working, we'll put everything into functions.