Merge pull request #63 from tcchenbtx/master

Thanks for migrating the files over and testing them. Everything seems in place!
berkeley-stat159 · Nov 25, 2015 · acf7851 · acf7851
2 parents ed93c5b + acddc7b
commit acf7851
Show file tree

Hide file tree

Showing 5 changed files with 264 additions and 0 deletions.
diff --git a/code/utils/diagnostics.py b/code/utils/diagnostics.py
@@ -0,0 +1,127 @@
+""" Diagnostics.py
+
+A collection of utility functions for diagnostics on FMRI data
+
+See test_* functions in this directory for nose tests
+"""
+# import important library
+
+from __future__ import division, print_function, absolute_import
+import numpy as np
+
+def vol_std(data):
+    """ Return standard deviation across voxels for 4D array `data`
+
+    Parameters
+    ----------
+    data : 4D array
+        4D array from FMRI run with last axis indexing volumes.  Call the shape
+        of this array (M, N, P, T) where T is the number of volumes.
+
+    Returns
+    -------
+    std_values : array shape (T,)
+        One dimensonal array where ``std_values[i]`` gives the standard
+        deviation of all voxels contained in ``data[..., i]``.
+    """
+
+    result = [0] * data.shape[-1]
+    for i in range(len(result)):
+        result[i] = np.std(data[:,:,:,i])
+    return result
+
+
+def iqr_outliers(arr_1d, iqr_scale=1.5):
+    """ Return indices of outliers identified by interquartile range
+
+    Parameters
+    ----------
+    arr_1d : 1D array
+        One-dimensional numpy array, from which we will identify outlier
+        values.
+    iqr_scale : float, optional
+        Scaling for IQR to set low and high thresholds.  Low threshold is given
+        by 25th centile value minus ``iqr_scale * IQR``, and high threshold id
+        given by 75 centile value plus ``iqr_scale * IQR``.
+
+    Returns
+    -------
+    outlier_indices : array
+        Array containing indices in `arr_1d` that contain outlier values.
+    lo_hi_thresh : tuple
+        Tuple containing 2 values (low threshold, high thresold) as described
+        above.
+    """
+    # Hint : np.lookfor('centile')
+    # Hint : np.lookfor('nonzero')
+
+    # get parameters
+    IQR = np.percentile(arr_1d, 75) - np.percentile(arr_1d, 25)
+    lo_thresh = np.percentile(arr_1d, 25) - IQR * iqr_scale
+    hi_thresh = np.percentile(arr_1d, 75) + IQR * iqr_scale
+
+    # find outlier_indeces
+    outlier_indices = []
+    for index in range(len(arr_1d)):
+        if arr_1d[index] < lo_thresh or arr_1d[index] > hi_thresh:
+            outlier_indices.append(index)
+    return (outlier_indices, (lo_thresh, hi_thresh))
+
+
+def vol_rms_diff(arr_4d):
+    """ Return root mean square of differences between sequential volumes
+
+    Parameters
+    ----------
+    data : 4D array
+        4D array from FMRI run with last axis indexing volumes.  Call the shape
+        of this array (M, N, P, T) where T is the number of volumes.
+
+    Returns
+    -------
+    rms_values : array shape (T-1,)
+        One dimensonal array where ``rms_values[i]`` gives the square root of
+        the mean (across voxels) of the squared difference between volume i and
+        volume i + 1.
+    """
+
+    time_length = arr_4d.shape[-1]
+    diff_vol = []
+    for item in range(time_length -1):
+        diff_vol.append(arr_4d[...,item+1] - arr_4d[...,item])
+    rms_values = []
+    for i in diff_vol:
+        rms_values.append(np.sqrt(np.mean(i**2)))
+
+    return rms_values
+
+
+def extend_diff_outliers(diff_indices):
+    """ Extend difference-based outlier indices `diff_indices` by pairing
+
+    Parameters
+    ----------
+    diff_indices : array
+        Array of indices of differences that have been detected as outliers.  A
+        difference index of ``i`` refers to the difference between volume ``i``
+        and volume ``i + 1``.
+
+    Returns
+    -------
+    extended_indices : array
+        Array where each index ``j`` in `diff_indices has been replaced by two
+        indices, ``j`` and ``j+1``, unless ``j+1`` is present in
+        ``diff_indices``.  For example, if the input was ``[3, 7, 8, 12, 20]``,
+        ``[3, 4, 7, 8, 9, 12, 13, 20, 21]``.
+    """
+
+    extended_indices = []
+    for item in diff_indices:
+        if item not in extended_indices:
+            extended_indices.append(item)
+        if (item+1) not in extended_indices:
+            extended_indices.append(item+1)
+
+    return extended_indices
+
+
diff --git a/code/utils/tests/test_extend_diff_outliers.py b/code/utils/tests/test_extend_diff_outliers.py
@@ -0,0 +1,30 @@
+""" Tests for extend_diff_outliers function in diagnostics module
+
+Run with:
+
+    nosetests test_extend_diff_outliers
+"""
+
+import numpy as np
+from .. import diagnostics
+from nose.tools import assert_equal
+from numpy.testing import assert_almost_equal, assert_array_equal
+
+
+def test_extend_diff_outliers():
+    # Test function to extend difference outlier indices
+    indices = np.array([3, 7, 12, 20])
+    assert_array_equal(diagnostics.extend_diff_outliers(indices),
+                       [3, 4, 7, 8, 12, 13, 20, 21])
+
+
+def test_sequential_input():
+    indices = np.array([4, 5, 9, 10])
+    assert_array_equal(diagnostics.extend_diff_outliers(indices),
+                       [4, 5, 6, 9, 10, 11])
+    indices = np.array([1, 2, 4, 5, 9, 10])
+    assert_array_equal(diagnostics.extend_diff_outliers(indices),
+                       [1, 2, 3, 4, 5, 6, 9, 10, 11])
+    indices = np.array([3, 7, 8, 12, 20])
+    assert_array_equal(diagnostics.extend_diff_outliers(indices),
+                       [3, 4, 7, 8, 9, 12, 13, 20, 21])
diff --git a/code/utils/tests/test_iqr_outliers.py b/code/utils/tests/test_iqr_outliers.py
@@ -0,0 +1,60 @@
+""" Tests for iqr_outliers function in diagnostics module
+
+Run with:
+
+    nosetests test_iqr_outliers
+"""
+
+import numpy as np
+from .. import diagnostics
+from nose.tools import assert_equal
+from numpy.testing import assert_almost_equal, assert_array_equal
+
+
+def test_iqr_outliers():
+    # Test with simplest possible array
+    arr = np.arange(101)  # percentile same as value
+    # iqr = 50
+    exp_lo = 25 - 75
+    exp_hi = 75 + 75
+    indices, thresholds = diagnostics.iqr_outliers(arr)
+    assert_array_equal(indices, [])
+    assert_equal(thresholds, (exp_lo, exp_hi))
+    # Reverse, same values
+    indices, thresholds = diagnostics.iqr_outliers(arr[::-1])
+    assert_array_equal(indices, [])
+    assert_equal(thresholds, (exp_lo, exp_hi))
+    # Add outliers
+    arr[0] = -51
+    arr[1] = 151
+    arr[100] = 1  # replace lost value to keep centiles same
+    indices, thresholds = diagnostics.iqr_outliers(arr)
+    assert_array_equal(indices, [0, 1])
+    assert_equal(thresholds, (exp_lo, exp_hi))
+    # Reversed, then the indices are reversed
+    indices, thresholds = diagnostics.iqr_outliers(arr[::-1])
+    assert_array_equal(indices, [99, 100])
+    assert_equal(thresholds, (exp_lo, exp_hi))
+
+
+def test_iqr_scaling():
+    # Check that the scaling of IQR works
+    # Test with simplest possible array
+    arr = np.arange(101)  # percentile same as value
+    # iqr = 50
+    exp_lo = 25 - 100
+    exp_hi = 75 + 100
+    indices, thresholds = diagnostics.iqr_outliers(arr, 2)
+    assert_array_equal(indices, [])
+    assert_equal(thresholds, (exp_lo, exp_hi))
+    # Add outliers - but these aren't big enough now
+    arr[0] = -51
+    arr[1] = 151
+    indices, thresholds = diagnostics.iqr_outliers(arr, 2)
+    assert_array_equal(indices, [])
+    # Add outliers - that are big enough
+    arr[0] = -76
+    arr[1] = 176
+    arr[100] = 1  # replace lost value to keep centiles same
+    indices, thresholds = diagnostics.iqr_outliers(arr, 2)
+    assert_array_equal(indices, [0, 1])
diff --git a/code/utils/tests/test_vol_rms_diff.py b/code/utils/tests/test_vol_rms_diff.py
@@ -0,0 +1,24 @@
+""" Tests for vol_rms_diff function in diagnostics module
+
+Run with:
+
+    nosetests test_vol_rms_diff.py
+"""
+
+import numpy as np
+from .. import diagnostics
+from numpy.testing import assert_almost_equal, assert_array_equal
+
+def test_vol_rms_diff():
+    # We make a fake 4D image
+    shape_3d = (2, 3, 4)
+    V = np.prod(shape_3d)
+    T = 10  # The number of 3D volumes
+    # Make a 2D array that we will reshape to 4D
+    arr_2d = np.random.normal(size=(V, T))
+    differences = np.diff(arr_2d, axis=1)
+    exp_rms = np.sqrt(np.mean(differences ** 2, axis=0))
+    # Reshape to 4D and run function
+    arr_4d = np.reshape(arr_2d, shape_3d + (T,))
+    actual_rms = diagnostics.vol_rms_diff(arr_4d)
+    assert_almost_equal(actual_rms, exp_rms)
diff --git a/code/utils/tests/test_vol_std.py b/code/utils/tests/test_vol_std.py
@@ -0,0 +1,23 @@
+""" Tests for vol_std function in diagnostics module
+
+Run with:
+
+    nosetests test_vol_std.py
+"""
+
+import numpy as np
+from .. import diagnostics
+from numpy.testing import assert_almost_equal, assert_array_equal
+
+def test_vol_std():
+    # We make a fake 4D image
+    shape_3d = (2, 3, 4)
+    V = np.prod(shape_3d)
+    T = 10  # The number of 3D volumes
+    # Make a 2D array that we will reshape to 4D
+    arr_2d = np.random.normal(size=(V, T))
+    expected_stds = np.std(arr_2d, axis=0)
+    # Reshape to 4D
+    arr_4d = np.reshape(arr_2d, shape_3d + (T,))
+    actual_stds = diagnostics.vol_std(arr_4d)
+    assert_almost_equal(expected_stds, actual_stds)