In [None]:
######## LESSON 5: USING PYTHON FOR BASIC NEUROIMAGING PROCESSES AND ANALYSES ########

In [None]:
'''In this lesson, you will learn how to interact with neuroimages using Python. You will also 
learn tools to efficiently analyze and modify images. Finally, you'll learn how to use python 
to interact with already existing neuroimaging software.'''

In [None]:
# Note -- in order to complete this lesson, you will have to ensure all of the proper libaries
# are installed. You may need to install the following libraries 

# nibabel
# nilearn
# sklearn

# For nilearn, you will need to install it directly from a certain channel called conda-forge.
# You can do so like this:

# conda install -c conda-forge nilearn

In [1]:
import os
import pandas
import numpy as np
import scipy.stats as stat
import nibabel as ni
from copy import deepcopy

In [None]:
# The first thing we should to is open up an image so we can get familiar with its anatomy.
# For this, we will use the library nibabel. Nibabel's purpose is to help you take neuroimages
# of various formats and load them them into your python workspace.

# A very handy utility is ni.load(). This function is pretty good about detecting what kind of
# image you're loading. It will accept Niftis, ANALYSE images (i.e. hdr/img files), Minc images
# and more.

cwd = os.getcwd()
img = ni.load(
        os.path.join(cwd,'stuff/nan_snorm_002-S-4229_18F-AV1451_2016-02-10_P4_I635352.nii.gz'))


In [None]:
# Now that the image has been loaded, we can interrogate several of its properties. For 
# example, here is the image affine

img.affine

In [None]:
# This particular image does not have a header, but if it did, we can access it easily:
img.header

In [None]:
# Here is its shape. The shape represents the number of voxels in each dimension. This is a 3D
# image
img.shape

In [None]:
# Finally there's the image data itself. An image is simply a matrix of values. In this case, 
# each value is represented as a voxel -- a 3D cube containing information about the image. 
# The values within the image depends on what type of image it is. This is a PET image, which
# has been intensity-normalized. Therefore, each value (voxel) in the matrix represents whats
# called an SUVR -- its a primitive ratio of PET signal to noise signal. Let's have a look
# at the data

dat = img.get_data()
dat

In [None]:
# We're only gettings a small snapshot of the image here -- Python will not display all of 
# the values. Why not? Well, lets think about how many values there really are:
a,b,c = img.shape
a*b*c

In [None]:
# Over 7 million!! There's no reason for Python to display all of these values because it would
# not be human readable (ahem, matlab...). We'll come back to this conundrum later. First, lets
# examine what kind of object this image data is
type(dat)

In [None]:
# This is an object we haven't run into yet. Luckily, its not very different from objects we 
# have encountered. A numpy n-dimensional array behaves quite similarly to a list, and you can
# think of it as an optimized lists with many features. Lets have a look:
lst = [1,2,3]
arr = np.array(lst)
print(lst,'\n',arr)

In [None]:
# You can already see the difference in presentation between a list and an np.array -- the 
# values are not comma-separated. However, you cannot build an np.array from scratch like you
# can a list
new_arr = [1 2 3]

In [None]:
# Python won't recognize it. Instead, you need to convert an existing sequences as we did above,
# or, use a numpy function to build it. There are a few ways to do this

# create a 2 x 2 array of random values
new_arr = np.ndarray((2,2),dtype=int)
print(new_arr,'\n')

# create an ordered sequence of integers with a length 8
new_arr = np.arange((8))
print(new_arr,'\n')

# same as above but shaped into a 2x4 matrix
new_arr = np.arange((8)).reshape((2,4)) # notice this takes a tuple containing the shape as
                                        # an argument
print(new_arr,'\n')

# create a 3x2x2 array of 0s
new_arr = np.zeros((3,2,2))

print(new_arr,'\n')

# create an empty array 3x3 array
new_arr = np.empty((3,3))
print(new_arr,'\n')

# create an len(10) array of 9s
new_arr = np.full(10,9,int)
print(new_arr,'\n')

In [None]:
# You can also use most of these commands to generate an array that has the same shape as an
# existing array. For example, lets say we wanted to make an array the same sizes as our image
# data, but we wanted it filled with 0s...
new_arr = np.zeros_like(dat)
new_arr.shape
print(new_arr[75,75,:])

In [None]:
# np.arrays can be indexed just like lists.
new_arr = np.arange(10)
print(new_arr)
print(new_arr[4])
print(new_arr[1:3])

In [None]:
# for arrays with multiple dimensions, its still quite easy to index and slice. Lets take our
# image data for instance. Lets say we wanted to look at every value in x plane at y,z
# coordinates 70,75

dat[:,70,75]

In [None]:
# But you have to be careful -- like lists, arrays are mutable

a = np.array([1,2,3])
b = a
print('here is a',a)
print('here is b, which is set as equal to a',b)
b[0] = 10
print('here is a after changing b',a)
print('here is b after changing b',b)

In [None]:
# However, like Series', np.arrays have all sorts of tremendously useful methods and attributes
# that help to perform efficient operations on arrays and matrices. This is very important 
# given that, as we have already demosnstrated, neuroimages are not small.

# Lets make an arbitrary 4x4 matrix and show off just a few of these features.

mtx = np.arange(16).reshape((4,4))
mtx

In [None]:
# You can easily manipulate matrices with regular arithmetic
print(mtx + 3,'\n')
print(mtx / 2, '\n')

# Or matrix math
print(mtx * [1,2,3,4])

In [None]:
# You can also get quick descriptives
print('mean:',mtx.mean())
print('std:',mtx.std())
print('sum:',mtx.sum())
print('\n')

# Or get information about the mins and maxs and their index
print('min:',mtx.min())
print('index of min:',mtx.argmin())
print('max:',mtx.max())
print('index of max:',mtx.argmax())

In [None]:
# You can easily coerce np.arrays to different data types
print(mtx.astype(int),'\n')
print(mtx.astype(str))

In [None]:
# For symettrical matrices, there are also plenty of useful commands for extracting certain
# parts of the matrix.

# Get just the diagonal
print(mtx.diagonal())
# change diagnoal to ones
newmtx = deepcopy(mtx)
diag_ix = np.diag_indices_from(mtx)
newmtx[diag_ix] = np.ones(4)
newmtx

In [None]:
# Lets quickly break down how that worked, because its shows off some very nice features of
# these arrays, namely their indexing

# We first obtained the indices of each value within the diagonal of the matrix with a
# specialized function. Because the matrix (np.array) is 2d, the indices are also 2D. Have a
# look
diag_ix = np.diag_indices_from(mtx)
diag_ix

In [None]:
# So the indices come in the form of two arrays. One array carries the x-indices and the other
# array carries the y-indices. These two arrays are contained within a tuple.

# A nice feature of np.ndarrays is that you can index with tuples. When I index using this 
# tuple of arrays, Python will return the value at the corresponding coordinates, which in this
# case are 0,0  1,1  2,2  and 3,3

# Have a look. 

print('here is the matrix: \n',mtx,'\n')
print('and here are the values at the indices we passed:',mtx[diag_ix])

In [None]:
# For example, if I wanted to view just the values at coordiantes 0,3 and 2,2, I could do
# so like this:
mtx[([0,2],[3,2])]

In [None]:
# And changing the values is simple. We can change only the values at the index we pass. In
# the case above, we changed them to ones. Because there are four values to change, we passed
# an array of four 1s. 

newmtx = deepcopy(mtx)
ones = np.ones(4)
print('here is the input',ones)
newmtx[diag_ix] = ones
print('and here is the result \n', newmtx)

In [None]:
# Using a similar approach, we can easily extract just the lower triangle of a matrix
a,b = mtx.shape
lo_tri = np.tril_indices(a)
print('here is the matrix \n',mtx,'\n')
print('here are the indices of the triangle \n',lo_tri)
print('here are the values at that index',mtx[lo_tri],'\n')



In [None]:
# and if we want to change the lower triangle to 0s (but then retain the diagonal as ones)...

# exchange values at the indices of the lower triangle with an array of zeros with the same
# length as the number of values in the lower triangle
newmtx[lo_tri] = np.zeros_like(lo_tri[0])
# add back the ones at the diagonal
newmtx[diag_ix] = ones
print('et voila! Our new matrix: \n',newmtx)

In [None]:
# Matrices can be easily reshaped to other shapes, as long as the dimensions are
# interchangeable with the current matrix shape. 

# Here are a few ways we could reshape our current array
print(mtx.reshape(8,2),'\n')
print(mtx.reshape(2,2,2,2),'\n')
print(mtx.reshape(16))

In [None]:
# And matrices can be very easily transposed
print('original: \n',mtx,'\n')
print('tranposed: \n',mtx.transpose())

In [None]:
# And conveniently, we can always convert an array back to a list if needed. Although notice
# what the output looks like
mtx.tolist()

# Our np.array was a 4x4 matrix. So rather than being a list of 16 values, we have a list of 
# four lists, each containing four values. If you wanted to create a 2D np.array from a list,
# this is how you would construct the list.

In [None]:
# Or, if you'd rather work with pandas, you can easily convert a 2D matrix to a pandas 
# DataFrame
pandas.DataFrame(mtx)

In [None]:
# There are other neat tricks with indexing using booleans
newmtx = deepcopy(mtx)
print(mtx)

In [None]:
# First of all, np.arrays respond to boolean statements. Have a look
newmtx>7

In [None]:
# Here's another example
newmtx == 5

In [None]:
# Now, look what happens when we index the matrix with a boolean. If we wanted to, for example,
# return only items from the matrix greater than 7, we would index the matrix with a boolean
# matrix. We just saw how we can make a boolean matrix by just creating a boolean statement
# that includes a matrix in it. Now we can use that to index!
newmtx[newmtx>7]

In [None]:
# Notice how the numbers that are returned are only those that are True in the output three
# cells up!

# We can also change matrix values in this way
newmtx[newmtx>7] = 0
newmtx

In [None]:
# WAAAAIIIIIIITTTTTT!!!!!!!!!!!!!!!!!!
# I know what you're thinking. "Hey, I thought this was Lesson was supposed to be about 
# Neuroimages." Well, ultimately, in Python, neuroimages are just matrices of values in the 
# form of np.arrays! So everything you're learning here can be applied easily to an image. And 
# we'll be doing just that shortly. However, if you want to be able to manipulate these images, 
# you must be comfortable with np.arrays.

# So, let's complete a couple of basic exercises to make sure you're warmed up.

In [None]:
######## EXERCISES I ###########

## PART A
# Create a 3x3x4 matrix of ascending integers and print it

## PART B
# Take the matrix from part A and modify it so that all odd numbers are set to 0

## PART C 
# Create a 5x5 matrix of 0s. Set all values in the upper triangle of the matrix to 5, and set 
# the diagonal to ones. Then transpose it.

## PART D
# Create a large matrix of 0s that spells the word "Hi" in ones. Convert the matrix dtype to
# int

In [None]:
# ANSWERS ARE BELOW
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#

In [None]:
##### ANSWERS TO EXERCISE I ######

## PART A
# Create a 3x3x4 matrix of ascending integers and print it
x_mtx = np.arange(3*3*4).reshape(3,3,4)
print(x_mtx,'\n')

## PART B
# Take the matrix from part A and modify it so that all odd numbers are set to 0
x_mtx[x_mtx%2>0] = 0
print(x_mtx,'\n')

## PART C 
# Create a 5x5 matrix of 0s. Set all values in the upper triangle of the matrix to 5, and set 
# the diagonal to ones.
x2m = np.zeros((5,5))
utri = np.triu_indices_from(x2m)
diag = np.diag_indices_from(x2m)
x2m[utri] = 5
x2m[diag] = np.ones(len(diag[0]))
# Then transpose
print(x2m.transpose(),'\n')

## PART D
# Create a large matrix of 0s that spells the word "Hi" in ones. Convert the matrix dtype to
# int
hmtx = np.zeros((5,7))
hind = ([0,1,2,3,4,2,0,1,2,3,4,0,4,0,1,2,3,4,0,4],[0,0,0,0,0,1,2,2,2,2,2,4,4,5,5,5,5,5,6,6])
hmtx[hind] = 1
print(hmtx.astype(int))

In [None]:
# Okay, so now we're comfortable with numpy arrays. But we still have a lingering issue -- 
# neuroimages are very large arrays. We already showed that an image with 1mm resolution 
# contains over 7 million values.

# Iterating over such a large matrix is very computationally and time intensive. However, there
# are little tricks that can help drastically speed up computation of large neuroimages. Let's
# explore this a bit


In [None]:
# We can use the timeit magic to track the speed of different operations. Its very easy to use
# it looks like this:

# %timeit operation to time

# As an example...
%timeit 50/2

In [None]:
# Lets start with a simple example. We'll create a matrix of ones that is the exact size of our
# neuroimage
ni_1 = np.ones_like(dat)
x,y,z = ni_1.shape
# Now lets say we want to execute a simple operation -- we want to add +1 to every value. How
# would you go about doing that?

In [None]:
# If you didn't know about some of the nifty tricks with numpy, you might try iterating through
# all three indices. 

def iter1(in_mtx):
    for i in range(x):
        for j in range(y):
            for k in range(z):
                in_mtx[i,j,k] +=1

 

In [None]:
# Now lets use timeit to time the speed of this function
%timeit iter1(ni_1)

In [None]:
# Holy hell that was SLOW!! over 30s on my machine. Lets compare it to letting numpy do its
# own iteration by using built-in numpy functionality
%timeit ni_1+1

In [None]:
# On my computer, the average speed for this operation was 12.2 ms, compared to 33.5 seconds. 
# So.. how much faster was it?
print('this was a speed up of %s times!!!'%(round(33500/12.2)))

In [None]:
# Hopefully you see my point. Using python-friendly language can save you a TON of time. When
# you're doing very small operations, you won't notice the speed-up, but when dealing with 
# much larger computation (such as something related to a neuroimage), the speed up could save
# hours!

# But of course, sometimes you will need to iterate through values, and when you do,
# you should know there are many tools to do it. Some tools are a bit advanced, like numba and
# cython, and I will hold off on explaining those tools for now. Instead, I will teach some 
# more basic tools that will still give you considerable speed advantages. Let's start with
# list comprehension

In [None]:
# List comprehension is a method of iterating through a list, but doing it in one line. Here's
# a short example. I will use a traditional For loop and list comprehension to do the same
# thing. We will print the squares of integers from 1 to 10

# First, the "traditional way", using a For loop 
def lc1(rang):
    jnk = []
    for x in range(rang):
        jnk.append(x**2)
    return jnk

jnk = lc1(10)
print(jnk)

In [None]:
# Now I'll do the same thing with list comprehension
def lc2(rang):
    return [x**2 for x in range(rang)]

jnk = lc2(10)
print(jnk)

In [None]:
# Same output. Lets examine how that works. 

# Here's the code again
[x**2 for x in range(10)]

# 1) Notice the entire command is surrounded by square brackets. This is essential.
# 2) Lets start at the end. Notice how the end of the line is this

# for x in range(10)

# Looks exactly like a for loop doesn't it? In a for loop, the For statement goes at the 
# beginning, and everything else underneath and indented. In list comprehension, the for loop
# comes after the operation.
# 3) Instead of the operation being underneath the for loop, the operation precedes the for 
# loop. So,

# [x**2 for x in range(10)]

# is identical to

# for x in range(10):
#    x**2

# Except its all in one line, and the output is automatically made into a list.


In [None]:
# With small iterables, list comprehension only saves a bit of time
# Here is the original way
%timeit lc1(100)

In [None]:
# Here is the with list comprehension
%timeit lc2(100)

In [None]:
# But its important to know why. List comprehension usually allows for cleaner, simpler code 
# with less operations, which are themselves optimized. The speed-up above probably comes from 
# the fact that you are doing one less operation (creating and storing the variable jnk), and 
# indeed, a clever list comprehension might save even more time by eliminating more superfluous 
# operations

# However, as we'll see below, as your iterables become larger, the speed-up you get from list
# comprehension improves dramatically

In [None]:
# List comprehension can get quite complicated, as you'll see below. If you're uncomfortable 
# with the syntax of list comprehension, perhaps you would prefer the map function, which 
# can accomplish the same thing with different inputs.

# The map functions works a bit differently. Map takes two arguments, a function, and an 
# iterable. An iterable is of course a subscriptable (sequence) object of any type (list, 
# tuple, np.array, etc). And a function is any python function, including a function you 
# define. To accomplish the same operation as we did with list comprehension above, it would 
# look like this:

def j_sqr(x):
    return x**2

list(map(j_sqr,range(10)))

# So here, for each item in range(10), we are performing the function j_square using the item
# as the argument for j_sqr. Then we are converting the output into a list. So, its doing the
# exact same thing as what we did above with a For Loop and list comprehension.

In [None]:
# However, an advantage to using map is that its FAST! 
%timeit list(map(j_sqr,range(10)))

In [None]:
# On my computer, I got a nice speed up:
print('operation using map was %s times faster than list comprehension'%(42.2/6.57))

In [None]:
# Just remember -- a speed up of six times could be very considerable! If you have a process
# that took an hour with list comprehension, replacing with map in this case would reduce it to
# <10 minutes

# By the way, you might be thinking map() is inconvenient because you must pass a function as 
# the first argument. It may seem silly to create a new function just to do a simple operation.
# Luckily, the lambda function exists exactly for this purpose -- it lets you create a very
# quick and simple one-off function. In other words, its a handy way of performing an operation
# on an object without having to create a new function. 

# Here, I'll do the exact same thing as I did above, except rather than defining the j_sqr 
# function, I'll achieve the same functionality using lambda (I'll explain it after)
list(map(lambda x: x**2,range(10)))

In [None]:
# In the above example, lambda x: x**2 just means, for every x in an iterable (in this case, 
# range(10), square x. This is almost like combining list comprehension with map(). It works
# like this: lambda [arbitrary variable name]: operation on variable

In [None]:
# List comprehension, map and lambda can all be made to be much more complicated. They may be
# intimidating at first, but they allow for very clean and very efficient code, which is often
# what is needed when dealing with neuroimaging data. Importantly, these functions can deal
# with multiple arguments or iterables. See below for some examples:

In [None]:
# First, say you have two lists of values and you want to multiply the values together at each
# index. I will demonstrate this using For loops, map, and list comprehension below

lst1 = range(10)
lst2 = range(10,20)

# For Loop
nlst = []
for i in range(len(lst1)):
    x,y = lst1[i],lst2[i]
    nlst.append(x*y)
print(nlst)

# Map
nlst = list(map(lambda x,y: x*y, lst1, lst2))
print(nlst)

# List comprehension (we'll need a new tool for this one)
nlst = [x*y for x,y in zip(lst1,lst2)]
print(nlst)

In [None]:
# First, notice how much more effort and space was necessary for the For Loop in the first
# example. Second, notice how list comprehension and map were able to accept multiple arguments
# and multiple iterables.

# For map, we used lambda to create mini function with two arguments (x and y) and the 
# operation of that function was to multiply x and y together. Then, using map, we applied that
# function to every x,y pair between the two iterables (lst1 and lst2).

# For the list comprehension, we define the operation (x*y) first, then we define the arbitrary 
# variables (x and y), and then explain to which iterables x and y belong to (lst1 and lst2). 
# However, we had to use a new function, zip, to do this. Zip simply combines the two iterables
# so that they are joint by a single index


In [None]:
# You can take things even further with list comprehension by adding conditionals at the end.
# Lets say you only wanted to perform the operation on iterations where x is greater than 4.
# This is simple and intuitive with list comprehension
[x*y for x,y in zip(lst1,lst2) if x>4]

In [None]:
# Now that you've been introduced to these concepts, I will show you how they can be useful for
# neuroimages. Let's say you wanted to z-transform an image. You have a mean and SD for every
# voxel, and you want to apply those to your image. There are of course many ways to do this
# but lets use our new tricks. First, so its easier to see what's going on, I'll create small
# 3D matrices to practice on

img_x = 2.5 * np.random.rand(4,4,4)
img_x

In [None]:
# And now we'll create some fake means and SDs...
means = 2.5 * np.random.rand(4,4,4)
sds = 1.5 * np.random.rand(4,4,4)
print(means,'\n \n \n',sds)

In [None]:
# For the purposes of demonstration, lets first do this using a For Loop
def zscore_forloop(mat,means,sds):
    x,y,z = mat.shape # get the shape of the input matrix
    newmat = np.empty((x,y,z)) # create an empty output matrix of the same dimensions 
    for i in range(x):
        for j in range(y):
            for k in range(z):
                newmat[i,j,k] = (mat[i,j,k] - means[i,j,k]) / sds[i,j,k]

    return newmat

jnk1 = zscore_forloop(img_x,means,sds)
print(jnk)

In [None]:
# It works, but, like an ogre, its big, ugly and slow! Let's try our new tricks out instead
# Here's the same thing with map
jnk2 = list(map(lambda i,j,k: (i-j)/k,img_x,means,sds))
jnk2

In [None]:
# And now again with list comprehension
jnk3 = [(i-j)/k for i,j,k in zip(img_x,means,sds)]
jnk3

In [None]:
# List comprehension and map were able to accomplish in one line what required six lines with
# a for loop. What about speed advantages? You betcha!

In [None]:
%timeit zscore_forloop(img_x,means,sds)

In [None]:
%timeit list(map(lambda i,j,k: (i-j)/k,img_x,means,sds))

In [None]:
%timeit[(i-j)/k for i,j,k in zip(img_x,means,sds)]

In [None]:
# Alrighty, now lets try it on our neuroimage data and see how these different methods perform.
# First, generate the data...

x,y,z = dat.shape
means = 2.5 * np.random.rand(x,y,z)
sds = 1.5 * np.random.rand(x,y,z)

In [None]:
# And now we test it!

In [None]:
%timeit zscore_forloop(dat,means,sds)

In [None]:
%timeit list(map(lambda i,j,k: (i-j)/k,dat,means,sds))

In [None]:
%timeit[(i-j)/k for i,j,k in zip(dat,means,sds)]

In [None]:
# Wow!! With more computation, the speed-up delivered by map and list comprehension is even
# greater!! 
print('on my computer, list comprehension increased the speed by %s times, and map by %s times'%
          (30000/129,30000/138))

In [None]:
# Just imagine, by using map, we can z-score over 200 images before the For loop z-scores one!
# However, if you're going for performance, the best solution is to try to avoid iterating all
# together and instead, vectorize. In other words, if you're working with matrices anyway,
# just use math! This is how to obtain the optimal perforance in numpy. Have a look:

In [None]:
%timeit (dat-means)/sds

In [None]:
# Again, there are still some scenarios when iteration is necessary. Another trick to enhance
# performance is the .flat attribute or np.arrays. This "flattens" an np.array, such that it
# becomes 1-dimensional. So a 3x3 matrix for example would become a 9x1 array. However, the 
# result is not actually an array, but an "iterator object". This is sort of a one-time array
# that is made for very rapid iteration, and which is not stored in memory. In other words,
# once you iterate over it, its "used up" and you'll have to create a new iterator object to 
# use it again. Still, this method often provides a nice speed up. Have a look:

def zscore_iter(mat,means,sds):
    x,y,z = mat.shape # get the shape of the input matrix
    newmat = np.empty((x*y*z)) # create an empty output matrix of the same dimensions 
    for i in range(len(newmat)):
        newmat[i] = (mat.flat[i] - means.flat[i]) / sds.flat[i]

    return newmat

means = 2.5 * np.random.rand(4,4,4)
sds = 1.5 * np.random.rand(4,4,4)

In [None]:
%timeit zscore_forloop(img_x,means,sds)

In [None]:
%timeit zscore_iter(img_x,means,sds)

In [None]:
# Only a slight speed-up, but a speed-up nonetheless. Using iterables into different
# situations can often speed up your code, especially once you get the hang of using them. 

In [None]:
# One last tool I'll show-off is one that can be found itertools. I recommend you browse the
# itertools website, which has tons of very nice.. well.. iteration tools.

# https://docs.python.org/2/library/itertools.html

# Right now, I will show you "combinatorial generators". Much like "zip", these generators can
# take multiple lists and generate every combination of them. This is very useful with neuro-
# images because you often wish to iterate through indices on each axis of a multi-dimensional
# array. These tools make this process fast, clean and easy

import itertools
x = np.arange(4)

In [None]:
# First we'll use zip, which is a basic functionality of assembling the values at the same 
# index across the two lists into tuples. This is great for getting the indices of the diagonal
# of a matrix
list(zip(x,x))

In [None]:
# Now let's look at some of the combinatorial generators from itertools. Here, you can see
# every possible combination between the two lists is generated. This is perfect for iteration
# across multiple axes of differing dimensions
list(itertools.product(x,x))

In [None]:
# Perhaps you are working with a correlation matrix, where the values are exactly the same on
# both sides of the diagonal. In this case, 3,e would be the same value as e,3. So if you want
# to avoid these redundant axes, you could use combinations if you don't want the diagonal...
list(itertools.combinations(x,2))

In [None]:
# Or use combinations_with_replacement to include the diagonal
list(itertools.combinations_with_replacement(x,2))

In [None]:
# Finally, there are some situations where you might want every value except the diagonal, and
# in this case you can utilize permutation. In this case, you get all values except 1,1 2,2 etc
list(itertools.permutations(x,2))

In [None]:
# How might this work in practice? Lets once again modify our

In [None]:
# Another useful tool for neuroimages is the masked array. Often, you will wish to ignore
# certain parts of the iamge that do not contain data -- after all, the field of view is often 
# far larger than the brain itself. To do this, one can utilize "masked" arrays. These are nice 
# datatypes that store a copy of the data itself as well as information as to which items 
# should be masked out. So you dont lose any information or change the shape of your data, 
# but at the same time, you tell functions to ignore (i.e. not operate on) certain data points!

In [None]:
# Lets have a look at the docstring 
np.ma.masked_array?
# I admit its a bit obtuse, but its important because it lets us see all of the arguments one 
# can pass. Basically, its telling us it accepts data and a mask. The mask is an array of
# boolean values (could be True/False or 1/0) that is the same shape as the data. The masked
# array will "mask out" (i.e. not operate on during computations) values in data for which
# the value in the same index of the mask is True.

# I'll demonstrate with a simple example below

In [None]:
# Here is a small matrix
ex_mat = np.arange(16).reshape(4,4)
print(ex_mat,'\n')

# Now lets make a mask. Lets mask out all values <3 and >13
ex_msk = (ex_mat>3) & (ex_mat<13)
print(ex_msk,'\n')

# However, we read that a masked_array will mask out values that are True, not values that are
# False. How can we invert our mask? Easy, by using the function np.logical_not
ex_msk = np.logical_not(ex_msk)
print(ex_msk)

In [None]:
# Now we're ready to create our masked array. All we need to do is enter our data and mask as
# arguments
ex_ma = np.ma.masked_array(ex_mat,ex_msk)
ex_ma

In [None]:
# Above, you can see the data we want masked is replaced by --, indicating this data will not
# be used during computation. But don't worry, the data isn't lost, and you can always access
# it like this
ex_ma.data
# Masked arrays are nice because they retain all of the functionality of an np.array, plus a
# few other features

In [None]:
# So when would this be helpful in actual analyses?. Since we're using PET data and we're
# looking at SUVR values, let's say we don't care about looking at values < 0.3, since such
# values are likely outside the brain

# Let's regenerate our data first
dat = img.get_data()
x,y,z = dat.shape

# And now we'll make our mask
dat_mask = np.logical_not(dat>0.3)

# And finally we'll make our masked array
mdat = np.ma.masked_array(dat,dat_mask)
print(mdat)

In [None]:
# Good! The masked_array has masked out values at the edges of the image -- thats what we
# wanted! 

# But just to be sure there are some values there...
mdat[75,75,:]


In [None]:
# However, somewhat counter-intuitively, using a masked_array will substantially slow
# down your computations. You would think by *not* operating over certain values, you would
# save time, but thats not the case
means = 2.5 * np.random.rand(x,y,z)
sds = 1.5 * np.random.rand(x,y,z)

In [None]:
%timeit list(map(lambda i,j,k: (i-j)/k,mdat,means,sds))

In [None]:
%timeit[(i-j)/k for i,j,k in zip(mdat,means,sds)]

In [None]:
# Still pretty fast but, on my computer, it was about a 7x slow-down. Still, masked_arrays can
# be convenient. 

In [None]:
# Lets do a few exercises to become more comfortable with these concepts. Afterwards, I'll
# demonstrate some actual applications

In [None]:
#### EXECERCIZES PART B ####

## Part A
# Generate a 10 x 10 matrix of random numbers between 1000 and 10000. Then, find the square
# root of each value. Do this using a For Loop, List Comprehension, Map, and vectors (matrix
# math). Then, prove all matrices you've created are equal, and time each method to see which 
# is fastest 

# HINT: You will need to use separate cells to test the speeds
# HINT: You can use np.allclose to check if the matrices are equal

## Part B
# Using a copy of our image data above, create a masked array that masks out all values below
# 1. Then, multiply the masked matrix by 4, and print a slice of the new data to prove that
# the computations only occured on unmasked values

## Part C
# Using our a copy of image data above, find the 3D index of the peak value at each slice along 
# the z-axis, and save it into a list. You can use whatever method you prefer, but make sure
# to use itertools for the iteration over the axes.

# HINT: np.argmax can help you here



In [None]:
# ANSWERS BELOW
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#

In [None]:
#### ANSWERS TO EXERCISES B #######

## Part A
# Generate a 10 x 10 matrix of random numbers between 1000 and 10000. 
ex_mtx = np.random.randint(1000,10000,(10,10))

# Then, find the square root of each value. 
# For Loop 
def x_forloop(ex_mtx):
    x,y = ex_mtx.shape
    for i in range(x):
        for j in range(y):
            ex_mtx[i,j] = sqrt(ex_mtx[i,j])
    
    return ex_mtx

# List Comprehension 
def x_lc(ex_mtx):
    x,y = ex_mtx.shape
    return np.array([sqrt(i) for i in ex_mtx.flat]).reshape(x,y)

# Map
def x_map(ex_mtx):
    x,y = ex_mtx.shape
    return np.array(list(map(lambda i: sqrt(i), ex_mtx.flat))).reshape(x,y)

# Vectors (matrix math). 
def x_vec(ex_mtx):
    return np.sqrt(ex_mtx)

# prove all matrices you've created are equal
x1 = x_forloop(ex_mtx)
x2 = x_lc(ex_mtx)
x3 = x_map(ex_mtx)
x4 = x_vec(ex_mtx)

#print(type(x1),type(x2),type(x3),type(x4))
print('all the matrices equal?',np.allclose(x1,x2,x3,x4))

# Then, time each to see which is fastest...

In [None]:
%timeit x_forloop(ex_mtx)

In [None]:
%timeit x_lc(ex_mtx)

In [None]:
%timeit x_map(ex_mtx)

In [None]:
%timeit x_vec(ex_mtx)

In [None]:
## Part B
# Using a copy of our image data above, create a masked array that masks out all values below
# 1. 

xdat = deepcopy(dat)
msk = xdat>1
x_ma = np.ma.masked_array(xdat,np.logical_not(msk))
# Then, multiply the masked matrix by 4
x_ma = x_ma * 4

# and print a slice of the new data to prove that the computations only occured on 
# unmasked values
x_ma.data[50,50,:] == xdat[50,50,:]

In [None]:
## Part C
# Using our a copy of image data above, find the 3D index of the peak value at each slice along 
# the z-axis, and save it into a list
xdat = deepcopy(dat)
x,y,z = xdat.shape
lc_idx = [[i,j,np.argmin(xdat[i,j,:])] for i,j in itertools.product(range(x),range(y))]
lc_idx