diff --git a/Makefile b/Makefile index 4eb7b0e..d9aa97a 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ clean: find . -name "*.so" -o -name "*.pyc" -o -name "*.pyx.md5" | xargs rm -f coverage: - nosetests code/utils/tests data/tests/ --with-coverage --cover-package=code/utils/functions,data/tests/test_get_check_hashes.py + nosetests code/utils/tests data/tests/ --with-coverage --cover-package=code/utils/functions,data/data_hashes.py test: nosetests code/utils/tests data/tests/ diff --git a/code/utils/diagnostics.py b/code/utils/functions/diagnostics.py similarity index 69% rename from code/utils/diagnostics.py rename to code/utils/functions/diagnostics.py index 70341b1..2b36e2d 100644 --- a/code/utils/diagnostics.py +++ b/code/utils/functions/diagnostics.py @@ -6,7 +6,7 @@ """ import numpy as np -import pdb + def vol_std(data): """ Return standard deviation across voxels for 4D array `data` @@ -23,10 +23,12 @@ def vol_std(data): One dimensonal array where ``std_values[i]`` gives the standard deviation of all voxels contained in ``data[..., i]``. """ - std_values = [np.std(np.ravel(data[...,i])) for i in range(0, np.shape(data)[3])] - return np.array(std_values) + T = data.shape[-1] + data_2d = np.reshape(data, (-1, T)) + return np.std(data_2d, axis=0) + -def iqr_outliers(arr_1d, iqr_scale=1): +def iqr_outliers(arr_1d, iqr_scale=1.5): """ Return indices of outliers identified by interquartile range Parameters @@ -49,16 +51,13 @@ def iqr_outliers(arr_1d, iqr_scale=1): """ # Hint : np.lookfor('centile') # Hint : np.lookfor('nonzero') - sevenfifth = np.percentile(arr_1d,75) - twenfifth = np.percentile(arr_1d,25) - iqr = sevenfifth-twenfifth - low = twenfifth-iqr_scale*iqr - hi = iqr_scale*iqr+sevenfifth - outliers1 = np.array(np.nonzero(arr_1d > hi))[0] - outliers2 = np.array(np.nonzero((arr_1d < low)))[0] - outliers = np.hstack((outliers1,outliers2)) - outliers.sort() - return outliers,(low, hi) + pct_25, pct_75 = np.percentile(arr_1d, [25, 75]) + iqr = pct_75 - pct_25 + lo_thresh = pct_25 - iqr * iqr_scale + hi_thresh = pct_75 + iqr * iqr_scale + is_outlier = (arr_1d < lo_thresh) | (arr_1d > hi_thresh) + return np.nonzero(is_outlier)[0], (lo_thresh, hi_thresh) + def vol_rms_diff(arr_4d): """ Return root mean square of differences between sequential volumes @@ -76,12 +75,11 @@ def vol_rms_diff(arr_4d): the mean (across voxels) of the squared difference between volume i and volume i + 1. """ - - rms_values=[] - for i in range(arr_4d.shape[-1]-1): - diff_vol = arr_4d[...,i+1] - arr_4d[...,i] - rms_values.append(np.sqrt(np.mean(diff_vol ** 2))) - return np.array(rms_values) + T = arr_4d.shape[-1] + diff_data = np.diff(arr_4d, axis=-1) + diff_data_2d = np.reshape(diff_data, (-1, T-1)) + return np.sqrt(np.mean(diff_data_2d ** 2, axis=0)) + def extend_diff_outliers(diff_indices): """ Extend difference-based outlier indices `diff_indices` by pairing @@ -95,22 +93,14 @@ def extend_diff_outliers(diff_indices): Returns ------- - extended_indices : + extended_indices : array Array where each index ``j`` in `diff_indices has been replaced by two indices, ``j`` and ``j+1``, unless ``j+1`` is present in ``diff_indices``. For example, if the input was ``[3, 7, 8, 12, 20]``, ``[3, 4, 7, 8, 9, 12, 13, 20, 21]``. """ - i=0 - while (i < len(diff_indices)): - if i is (len(diff_indices)-1): - diff_indices = np.insert(diff_indices,i+1,diff_indices[i]+1) - break - if diff_indices[i+1] == (diff_indices[i]+1): - i=i+1 - continue - diff_indices = np.insert(diff_indices,i+1,diff_indices[i]+1) - i=i+2 - return diff_indices - - + # Make two columns with the second being the first plus 1 + ext_outliers = np.column_stack((diff_indices, diff_indices + 1)) + # Use numpy reshape ordering to get the values from the first row, then the + # second row etc... + return np.unique(ext_outliers.ravel()) diff --git a/code/utils/glm.py b/code/utils/functions/glm.py similarity index 100% rename from code/utils/glm.py rename to code/utils/functions/glm.py diff --git a/code/utils/smoothing.py b/code/utils/functions/smoothing.py similarity index 100% rename from code/utils/smoothing.py rename to code/utils/functions/smoothing.py diff --git a/code/utils/tests/test_diagnostics.py b/code/utils/tests/test_diagnostics.py new file mode 100644 index 0000000..8c1e2d4 --- /dev/null +++ b/code/utils/tests/test_diagnostics.py @@ -0,0 +1,108 @@ +"""test_diagnostics.py +Tests for the functions in diagnostics.py module + +Run with: + + nosetests test_diagnostics.py +""" +import os, sys, pdb +import numpy as np +from nose.tools import assert_equal +from numpy.testing import assert_almost_equal, assert_array_equal + +#Append path to functions +sys.path.append(os.path.join(os.path.dirname(__file__), "../functions/")) +from diagnostics import * + +def test_vol_std(): + # We make a fake 4D image + shape_3d = (2, 3, 4) + V = np.prod(shape_3d) + T = 10 # The number of 3D volumes + # Make a 2D array that we will reshape to 4D + arr_2d = np.random.normal(size=(V, T)) + expected_stds = np.std(arr_2d, axis=0) + # Reshape to 4D + arr_4d = np.reshape(arr_2d, shape_3d + (T,)) + actual_stds = vol_std(arr_4d) + assert_almost_equal(expected_stds, actual_stds) + +def test_vol_rms_diff(): + # We make a fake 4D image + shape_3d = (2, 3, 4) + V = np.prod(shape_3d) + T = 10 # The number of 3D volumes + # Make a 2D array that we will reshape to 4D + arr_2d = np.random.normal(size=(V, T)) + differences = np.diff(arr_2d, axis=1) + exp_rms = np.sqrt(np.mean(differences ** 2, axis=0)) + # Reshape to 4D and run function + arr_4d = np.reshape(arr_2d, shape_3d + (T,)) + actual_rms = vol_rms_diff(arr_4d) + assert_almost_equal(actual_rms, exp_rms) + +def test_iqr_outliers(): + # Test with simplest possible array + arr = np.arange(101) # percentile same as value + # iqr = 50 + exp_lo = 25 - 75 + exp_hi = 75 + 75 + indices, thresholds = iqr_outliers(arr) + assert_array_equal(indices, []) + assert_equal(thresholds, (exp_lo, exp_hi)) + # Reverse, same values + indices, thresholds = iqr_outliers(arr[::-1]) + assert_array_equal(indices, []) + assert_equal(thresholds, (exp_lo, exp_hi)) + # Add outliers + arr[0] = -51 + arr[1] = 151 + arr[100] = 1 # replace lost value to keep centiles same + indices, thresholds = iqr_outliers(arr) + assert_array_equal(indices, [0, 1]) + assert_equal(thresholds, (exp_lo, exp_hi)) + # Reversed, then the indices are reversed + indices, thresholds = iqr_outliers(arr[::-1]) + assert_array_equal(indices, [99, 100]) + assert_equal(thresholds, (exp_lo, exp_hi)) + +def test_iqr_scaling(): + # Check that the scaling of IQR works + # Test with simplest possible array + arr = np.arange(101) # percentile same as value + # iqr = 50 + exp_lo = 25 - 100 + exp_hi = 75 + 100 + indices, thresholds = iqr_outliers(arr, 2) + assert_array_equal(indices, []) + assert_equal(thresholds, (exp_lo, exp_hi)) + # Add outliers - but these aren't big enough now + arr[0] = -51 + arr[1] = 151 + indices, thresholds = iqr_outliers(arr, 2) + assert_array_equal(indices, []) + # Add outliers - that are big enough + arr[0] = -76 + arr[1] = 176 + arr[100] = 1 # replace lost value to keep centiles same + indices, thresholds = iqr_outliers(arr, 2) + assert_array_equal(indices, [0, 1]) + +def test_extend_diff_outliers(): + # Test function to extend difference outlier indices + indices = np.array([3, 7, 12, 20]) + assert_array_equal(extend_diff_outliers(indices), + [3, 4, 7, 8, 12, 13, 20, 21]) + + +def test_sequential_input(): + indices = np.array([4, 5, 9, 10]) + assert_array_equal(extend_diff_outliers(indices), + [4, 5, 6, 9, 10, 11]) + indices = np.array([1, 2, 4, 5, 9, 10]) + assert_array_equal(extend_diff_outliers(indices), + [1, 2, 3, 4, 5, 6, 9, 10, 11]) + indices = np.array([3, 7, 8, 12, 20]) + assert_array_equal(extend_diff_outliers(indices), + [3, 4, 7, 8, 9, 12, 13, 20, 21]) + diff --git a/code/utils/tests/test_glm.py b/code/utils/tests/test_glm.py new file mode 100644 index 0000000..86d0355 --- /dev/null +++ b/code/utils/tests/test_glm.py @@ -0,0 +1,71 @@ +""" Tests for glm function in glm module +This checks the glm function. + +Run at the tests directory with: + nosetests code/utils/tests/test_glm.py + +""" +# Loading modules. +import numpy as np +import numpy.linalg as npl +import nibabel as nib +import os +import sys +from numpy.testing import assert_almost_equal, assert_array_equal + + +# Add path to functions to the system path. +sys.path.append(os.path.join(os.path.dirname(__file__), "../functions/")) + + +# Load our GLM functions. +from glm import glm_beta, glm_mrss + +def test_glm_beta(): + # Read in the image data. + img = nib.load('data/ds114/sub009/BOLD/task002_run001/ds114_sub009_t2r1.nii') + data = img.get_data() + # Read in the convolutions. + p = 2 + convolved1 = np.loadtxt('data/ds114/sub009/behav/task002_run001/ds114_sub009_t2r1_conv.txt') + # Create design matrix. + X_matrix = np.ones((len(convolved1), p)) + X_matrix[:, 1] = convolved1 + + # Calculate betas, copied from the exercise. + data_2d = np.reshape(data, (-1, data.shape[-1])) + B = npl.pinv(X_matrix).dot(data_2d.T) + B_4d = np.reshape(B.T, img.shape[:-1] + (-1,)) + + # Run function. + test_B_4d = glm_beta(data, X_matrix) + assert_almost_equal(B_4d, test_B_4d) + + +def test_glm_mrss(): + img = nib.load('data/ds114/sub009/BOLD/task002_run001/ds114_sub009_t2r1.nii') + data = img.get_data() + convolved1 = np.loadtxt('data/ds114/sub009/behav/task002_run001/ds114_sub009_t2r1_conv.txt') + X_matrix = np.ones((len(convolved1), 2)) + X_matrix[:, 1] = convolved1 + data_2d = np.reshape(data, (-1, data.shape[-1])) + B = npl.pinv(X_matrix).dot(data_2d.T) + B_4d = np.reshape(B.T, img.shape[:-1] + (-1,)) + test_B_4d = glm_beta(data, X_matrix) + + # Pick a single voxel to check mrss functiom. + # Calculate actual fitted values, residuals, and MRSS of voxel. + fitted = X_matrix.dot(B_4d[12, 22, 10]) + residuals = data[12, 22, 10] - fitted + MRSS = np.sum(residuals**2)/(X_matrix.shape[0] - npl.matrix_rank(X_matrix)) + + # Calculate using glm_diagnostics function. + test_MRSS, test_fitted, test_residuals = glm_mrss(test_B_4d, X_matrix, data) + assert_almost_equal(MRSS, test_MRSS[12, 22, 10]) + assert_almost_equal(fitted, test_fitted[12, 22, 10]) + assert_almost_equal(residuals, test_residuals[12, 22, 10]) + + + + + diff --git a/code/utils/tests/test_smoothing.py b/code/utils/tests/test_smoothing.py new file mode 100644 index 0000000..18cef72 --- /dev/null +++ b/code/utils/tests/test_smoothing.py @@ -0,0 +1,38 @@ +""" Tests for smoothvoxels in smooth module +Run at the tests directory with: + nosetests code/utils/tests/test_smoothing.py +""" + +import os +import sys +import numpy as np +import itertools +import scipy.ndimage +from scipy.ndimage.filters import gaussian_filter +import matplotlib.pyplot as plt +import nibabel as nib +from numpy.testing import assert_almost_equal +from nose.tools import assert_not_equals + + +# Add path to functions to the system path. +sys.path.append(os.path.join(os.path.dirname(__file__), "../functions/")) + +# Load smoothing function. +from smoothing import smoothing + +def test_smooth(): + # Read in the image data. + img = nib.load('data/ds114/sub009/BOLD/task002_run001/ds114_sub009_t2r1.nii') + data = img.get_data() + + # Run the smoothing function with sigma 0 at time 12 + non_smoothed_data = smoothing(data, 0, 12) + + # assert that data at time 12 and non_smoothed_data are equal since sigma = 0 + assert_almost_equal(data[..., 12], non_smoothed_data) + + # Run the smoothvoxels function with sigma 5 at time 100 + smoothed_data = smoothing(data, 5, 100) + # assert that data at time 16 and smoothed_data are not equal + assert_not_equals(data[..., 100].all(), smoothed_data.all()) diff --git a/data/Makefile b/data/Makefile index f20f51f..b273d2d 100644 --- a/data/Makefile +++ b/data/Makefile @@ -18,9 +18,9 @@ download_test_data: wget -P ../data/ds114/sub009/behav/task002_run001/ http://www.jarrodmillman.com/rcsds/_downloads/ds114_sub009_t2r1_conv.txt test: - nosetests tests -w .. + nosetests tests coverage: - nosetests tests -w .. --with-coverage --cover-package=data.py + nosetests tests --with-coverage --cover-package=data_hashes.py diff --git a/data/data_hashes.py b/data/data_hashes.py index 4fff6ad..7ae1007 100644 --- a/data/data_hashes.py +++ b/data/data_hashes.py @@ -11,27 +11,6 @@ import pdb import json -def create_dict(filename): - """Return a dictionary of hashes from a .txt file - - Parameters - ---------- - filename: .txt file containing the each file hash in the - first column and the file location in the data in the - second column. - Returns - --------- - newDict: a dictionary of hashes - """ - newDict={} - f=open(filename) - num_lines = sum(1 for line in open(filename)) - for line,i in zip(f,range(0,num_lines)): - info = line.split() - newDict[info[1]]=info[0] - f.close() - return newDict - def generate_file_md5(filename, blocksize=2**20): """Generate the md5 hash for filename Parameters diff --git a/data/ds114/sub009/BOLD/task002_run001/ds114_sub009_t2r1.nii b/data/ds114/sub009/BOLD/task002_run001/ds114_sub009_t2r1.nii new file mode 100644 index 0000000..3430ac3 Binary files /dev/null and b/data/ds114/sub009/BOLD/task002_run001/ds114_sub009_t2r1.nii differ diff --git a/data/ds114/sub009/behav/task002_run001/ds114_sub009_t2r1_conv.txt b/data/ds114/sub009/behav/task002_run001/ds114_sub009_t2r1_conv.txt new file mode 100644 index 0000000..38ade20 --- /dev/null +++ b/data/ds114/sub009/behav/task002_run001/ds114_sub009_t2r1_conv.txt @@ -0,0 +1,173 @@ +0.000000000000000000e+00 +0.000000000000000000e+00 +0.000000000000000000e+00 +0.000000000000000000e+00 +0.000000000000000000e+00 +2.321802309091016703e-01 +8.321802309091016481e-01 +1.141223007348432184e+00 +1.134358594322657066e+00 +1.035057581311021879e+00 +9.611284719183640357e-01 +9.262370763114995409e-01 +9.135602883776547944e-01 +9.097297884100494780e-01 +9.087243611417219480e-01 +9.084884912227670917e-01 +9.084884912227670917e-01 +6.763082603136654214e-01 +7.630826031366537421e-02 +-2.327345161256650918e-01 +-2.258701030998900294e-01 +-1.265690900882549264e-01 +-5.263998069559708975e-02 +-1.774858508873256718e-02 +-5.071797154887739204e-03 +-1.241297187282385493e-03 +-2.358699189549879390e-04 +0.000000000000000000e+00 +0.000000000000000000e+00 +2.321802309091016703e-01 +8.321802309091016481e-01 +1.141223007348432184e+00 +1.134358594322657066e+00 +1.035057581311021879e+00 +9.611284719183640357e-01 +9.262370763114995409e-01 +9.135602883776547944e-01 +9.097297884100494780e-01 +9.087243611417219480e-01 +9.084884912227670917e-01 +9.084884912227670917e-01 +6.763082603136654214e-01 +7.630826031366537421e-02 +-2.327345161256650918e-01 +-2.258701030998900294e-01 +-1.265690900882549264e-01 +-5.263998069559708975e-02 +-1.774858508873256718e-02 +-5.071797154887739204e-03 +-1.241297187282385493e-03 +-2.358699189549879390e-04 +0.000000000000000000e+00 +0.000000000000000000e+00 +2.321802309091016703e-01 +8.321802309091016481e-01 +1.141223007348432184e+00 +1.134358594322657066e+00 +1.035057581311021879e+00 +9.611284719183640357e-01 +9.262370763114995409e-01 +9.135602883776547944e-01 +9.097297884100494780e-01 +9.087243611417219480e-01 +9.084884912227670917e-01 +9.084884912227670917e-01 +6.763082603136654214e-01 +7.630826031366537421e-02 +-2.327345161256650918e-01 +-2.258701030998900294e-01 +-1.265690900882549264e-01 +-5.263998069559708975e-02 +-1.774858508873256718e-02 +-5.071797154887739204e-03 +-1.241297187282385493e-03 +-2.358699189549879390e-04 +0.000000000000000000e+00 +0.000000000000000000e+00 +2.321802309091016703e-01 +8.321802309091016481e-01 +1.141223007348432184e+00 +1.134358594322657066e+00 +1.035057581311021879e+00 +9.611284719183640357e-01 +9.262370763114995409e-01 +9.135602883776547944e-01 +9.097297884100494780e-01 +9.087243611417219480e-01 +9.084884912227670917e-01 +9.084884912227670917e-01 +6.763082603136654214e-01 +7.630826031366537421e-02 +-2.327345161256650918e-01 +-2.258701030998900294e-01 +-1.265690900882549264e-01 +-5.263998069559708975e-02 +-1.774858508873256718e-02 +-5.071797154887739204e-03 +-1.241297187282385493e-03 +-2.358699189549879390e-04 +0.000000000000000000e+00 +0.000000000000000000e+00 +2.321802309091016703e-01 +8.321802309091016481e-01 +1.141223007348432184e+00 +1.134358594322657066e+00 +1.035057581311021879e+00 +9.611284719183640357e-01 +9.262370763114995409e-01 +9.135602883776547944e-01 +9.097297884100494780e-01 +9.087243611417219480e-01 +9.084884912227670917e-01 +9.084884912227670917e-01 +6.763082603136654214e-01 +7.630826031366537421e-02 +-2.327345161256650918e-01 +-2.258701030998900294e-01 +-1.265690900882549264e-01 +-5.263998069559708975e-02 +-1.774858508873256718e-02 +-5.071797154887739204e-03 +-1.241297187282385493e-03 +-2.358699189549879390e-04 +0.000000000000000000e+00 +0.000000000000000000e+00 +2.321802309091016703e-01 +8.321802309091016481e-01 +1.141223007348432184e+00 +1.134358594322657066e+00 +1.035057581311021879e+00 +9.611284719183640357e-01 +9.262370763114995409e-01 +9.135602883776547944e-01 +9.097297884100494780e-01 +9.087243611417219480e-01 +9.084884912227670917e-01 +9.084884912227670917e-01 +6.763082603136654214e-01 +7.630826031366537421e-02 +-2.327345161256650918e-01 +-2.258701030998900294e-01 +-1.265690900882549264e-01 +-5.263998069559708975e-02 +-1.774858508873256718e-02 +-5.071797154887739204e-03 +-1.241297187282385493e-03 +-2.358699189549879390e-04 +0.000000000000000000e+00 +0.000000000000000000e+00 +2.321802309091016703e-01 +8.321802309091016481e-01 +1.141223007348432184e+00 +1.134358594322657066e+00 +1.035057581311021879e+00 +9.611284719183640357e-01 +9.262370763114995409e-01 +9.135602883776547944e-01 +9.097297884100494780e-01 +9.087243611417219480e-01 +9.084884912227670917e-01 +9.084884912227670917e-01 +6.763082603136654214e-01 +7.630826031366537421e-02 +-2.327345161256650918e-01 +-2.258701030998900294e-01 +-1.265690900882549264e-01 +-5.263998069559708975e-02 +-1.774858508873256718e-02 +-5.071797154887739204e-03 +-1.241297187282385493e-03 +-2.358699189549879390e-04 +0.000000000000000000e+00 +0.000000000000000000e+00 diff --git a/data/get_data_hashes.py b/data/get_data_hashes.py index b42208f..8d70fae 100644 --- a/data/get_data_hashes.py +++ b/data/get_data_hashes.py @@ -4,7 +4,7 @@ and corresponding hashes. """ from __future__ import print_function -from data import * +from data_hashes import * import json import pdb diff --git a/data/get_ds005_hashes_from_txt.py b/data/get_ds005_hashes_from_txt.py index fc16acf..dc08534 100644 --- a/data/get_ds005_hashes_from_txt.py +++ b/data/get_ds005_hashes_from_txt.py @@ -1,15 +1,38 @@ -""" +"""get_ds005_hashes_from_txt.py Creates a .json file with hashes for our ds005 data from text file containing file names and corresponding hashes. + +Run wuth: + python get_ds005_hashes_from_txt.py """ from __future__ import print_function -from data import create_dict +from data_hashes import create_dict import json import pdb -if __name__=="__main__": +def create_dict(filename): + """Return a dictionary of hashes from a .txt file + + Parameters + ---------- + filename: .txt file containing the each file hash in the + first column and the file location in the data in the + second column. + Returns + --------- + newDict: a dictionary of hashes + """ + newDict={} + f=open(filename) + num_lines = sum(1 for line in open(filename)) + for line,i in zip(f,range(0,num_lines)): + info = line.split() + newDict[info[1]]=info[0] + f.close() + return newDict + #For our data ds005 - newDict = create_dict('ds005_raw_checksums.txt') - with open('ds005_hashes.json', 'w') as file_out: - json.dump(newDict, file_out) +newDict = create_dict('ds005_raw_checksums.txt') +with open('ds005_hashes_checksums.json', 'w') as file_out: + json.dump(newDict, file_out) # print(check_hashes(newDict)) diff --git a/data/tests/test_get_check_hashes.py b/data/tests/test_get_check_hashes.py index c185ccf..85f8406 100644 --- a/data/tests/test_get_check_hashes.py +++ b/data/tests/test_get_check_hashes.py @@ -2,15 +2,15 @@ Run with: nosetests test_get_check_hashes.py """ -from __future__ import print_function +from __future__ import ( division, absolute_import, print_function, unicode_literals ) import sys, os, pdb import tempfile import json -try: - from urllib.request import urlopen -except ImportError: - from urllib2 import urlopen +if sys.version_info >= (3,): + import urllib.request as urllib2 +else: + import urllib2 #Specicy the path for functions sys.path.append(os.path.join(os.path.dirname(__file__), "../")) @@ -30,7 +30,7 @@ def test_check_hashes(): def test_get_hashes(): #Download json file senators-list.json and store it in the current directory url = 'http://jarrodmillman.com/rcsds/data/senators-list.json' - in_file = urlopen(url) + in_file = urllib2.urlopen(url) out_file = open('senators-list.json', 'wb') out_file.write(in_file.read()) out_file.close()