Skip to content

Commit

Permalink
Merge pull request #199 from timothy1191xa/lin_reg
Browse files Browse the repository at this point in the history
Updated linear_regression
  • Loading branch information
timothy1191xa committed Dec 12, 2015
2 parents 09ae1c1 + de285e4 commit 75f0d20
Show file tree
Hide file tree
Showing 14 changed files with 453 additions and 71 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ clean:
find . -name "*.so" -o -name "*.pyc" -o -name "*.pyx.md5" | xargs rm -f

coverage:
nosetests code/utils/tests data/tests/ --with-coverage --cover-package=code/utils/functions,data/tests/test_get_check_hashes.py
nosetests code/utils/tests data/tests/ --with-coverage --cover-package=code/utils/functions,data/data_hashes.py

test:
nosetests code/utils/tests data/tests/
Expand Down
58 changes: 24 additions & 34 deletions code/utils/diagnostics.py → code/utils/functions/diagnostics.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"""

import numpy as np
import pdb


def vol_std(data):
""" Return standard deviation across voxels for 4D array `data`
Expand All @@ -23,10 +23,12 @@ def vol_std(data):
One dimensonal array where ``std_values[i]`` gives the standard
deviation of all voxels contained in ``data[..., i]``.
"""
std_values = [np.std(np.ravel(data[...,i])) for i in range(0, np.shape(data)[3])]
return np.array(std_values)
T = data.shape[-1]
data_2d = np.reshape(data, (-1, T))
return np.std(data_2d, axis=0)


def iqr_outliers(arr_1d, iqr_scale=1):
def iqr_outliers(arr_1d, iqr_scale=1.5):
""" Return indices of outliers identified by interquartile range
Parameters
Expand All @@ -49,16 +51,13 @@ def iqr_outliers(arr_1d, iqr_scale=1):
"""
# Hint : np.lookfor('centile')
# Hint : np.lookfor('nonzero')
sevenfifth = np.percentile(arr_1d,75)
twenfifth = np.percentile(arr_1d,25)
iqr = sevenfifth-twenfifth
low = twenfifth-iqr_scale*iqr
hi = iqr_scale*iqr+sevenfifth
outliers1 = np.array(np.nonzero(arr_1d > hi))[0]
outliers2 = np.array(np.nonzero((arr_1d < low)))[0]
outliers = np.hstack((outliers1,outliers2))
outliers.sort()
return outliers,(low, hi)
pct_25, pct_75 = np.percentile(arr_1d, [25, 75])
iqr = pct_75 - pct_25
lo_thresh = pct_25 - iqr * iqr_scale
hi_thresh = pct_75 + iqr * iqr_scale
is_outlier = (arr_1d < lo_thresh) | (arr_1d > hi_thresh)
return np.nonzero(is_outlier)[0], (lo_thresh, hi_thresh)


def vol_rms_diff(arr_4d):
""" Return root mean square of differences between sequential volumes
Expand All @@ -76,12 +75,11 @@ def vol_rms_diff(arr_4d):
the mean (across voxels) of the squared difference between volume i and
volume i + 1.
"""

rms_values=[]
for i in range(arr_4d.shape[-1]-1):
diff_vol = arr_4d[...,i+1] - arr_4d[...,i]
rms_values.append(np.sqrt(np.mean(diff_vol ** 2)))
return np.array(rms_values)
T = arr_4d.shape[-1]
diff_data = np.diff(arr_4d, axis=-1)
diff_data_2d = np.reshape(diff_data, (-1, T-1))
return np.sqrt(np.mean(diff_data_2d ** 2, axis=0))


def extend_diff_outliers(diff_indices):
""" Extend difference-based outlier indices `diff_indices` by pairing
Expand All @@ -95,22 +93,14 @@ def extend_diff_outliers(diff_indices):
Returns
-------
extended_indices :
extended_indices : array
Array where each index ``j`` in `diff_indices has been replaced by two
indices, ``j`` and ``j+1``, unless ``j+1`` is present in
``diff_indices``. For example, if the input was ``[3, 7, 8, 12, 20]``,
``[3, 4, 7, 8, 9, 12, 13, 20, 21]``.
"""
i=0
while (i < len(diff_indices)):
if i is (len(diff_indices)-1):
diff_indices = np.insert(diff_indices,i+1,diff_indices[i]+1)
break
if diff_indices[i+1] == (diff_indices[i]+1):
i=i+1
continue
diff_indices = np.insert(diff_indices,i+1,diff_indices[i]+1)
i=i+2
return diff_indices


# Make two columns with the second being the first plus 1
ext_outliers = np.column_stack((diff_indices, diff_indices + 1))
# Use numpy reshape ordering to get the values from the first row, then the
# second row etc...
return np.unique(ext_outliers.ravel())
File renamed without changes.
File renamed without changes.
108 changes: 108 additions & 0 deletions code/utils/tests/test_diagnostics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
"""test_diagnostics.py
Tests for the functions in diagnostics.py module
Run with:
nosetests test_diagnostics.py
"""
import os, sys, pdb
import numpy as np
from nose.tools import assert_equal
from numpy.testing import assert_almost_equal, assert_array_equal

#Append path to functions
sys.path.append(os.path.join(os.path.dirname(__file__), "../functions/"))
from diagnostics import *

def test_vol_std():
# We make a fake 4D image
shape_3d = (2, 3, 4)
V = np.prod(shape_3d)
T = 10 # The number of 3D volumes
# Make a 2D array that we will reshape to 4D
arr_2d = np.random.normal(size=(V, T))
expected_stds = np.std(arr_2d, axis=0)
# Reshape to 4D
arr_4d = np.reshape(arr_2d, shape_3d + (T,))
actual_stds = vol_std(arr_4d)
assert_almost_equal(expected_stds, actual_stds)

def test_vol_rms_diff():
# We make a fake 4D image
shape_3d = (2, 3, 4)
V = np.prod(shape_3d)
T = 10 # The number of 3D volumes
# Make a 2D array that we will reshape to 4D
arr_2d = np.random.normal(size=(V, T))
differences = np.diff(arr_2d, axis=1)
exp_rms = np.sqrt(np.mean(differences ** 2, axis=0))
# Reshape to 4D and run function
arr_4d = np.reshape(arr_2d, shape_3d + (T,))
actual_rms = vol_rms_diff(arr_4d)
assert_almost_equal(actual_rms, exp_rms)

def test_iqr_outliers():
# Test with simplest possible array
arr = np.arange(101) # percentile same as value
# iqr = 50
exp_lo = 25 - 75
exp_hi = 75 + 75
indices, thresholds = iqr_outliers(arr)
assert_array_equal(indices, [])
assert_equal(thresholds, (exp_lo, exp_hi))
# Reverse, same values
indices, thresholds = iqr_outliers(arr[::-1])
assert_array_equal(indices, [])
assert_equal(thresholds, (exp_lo, exp_hi))
# Add outliers
arr[0] = -51
arr[1] = 151
arr[100] = 1 # replace lost value to keep centiles same
indices, thresholds = iqr_outliers(arr)
assert_array_equal(indices, [0, 1])
assert_equal(thresholds, (exp_lo, exp_hi))
# Reversed, then the indices are reversed
indices, thresholds = iqr_outliers(arr[::-1])
assert_array_equal(indices, [99, 100])
assert_equal(thresholds, (exp_lo, exp_hi))

def test_iqr_scaling():
# Check that the scaling of IQR works
# Test with simplest possible array
arr = np.arange(101) # percentile same as value
# iqr = 50
exp_lo = 25 - 100
exp_hi = 75 + 100
indices, thresholds = iqr_outliers(arr, 2)
assert_array_equal(indices, [])
assert_equal(thresholds, (exp_lo, exp_hi))
# Add outliers - but these aren't big enough now
arr[0] = -51
arr[1] = 151
indices, thresholds = iqr_outliers(arr, 2)
assert_array_equal(indices, [])
# Add outliers - that are big enough
arr[0] = -76
arr[1] = 176
arr[100] = 1 # replace lost value to keep centiles same
indices, thresholds = iqr_outliers(arr, 2)
assert_array_equal(indices, [0, 1])

def test_extend_diff_outliers():
# Test function to extend difference outlier indices
indices = np.array([3, 7, 12, 20])
assert_array_equal(extend_diff_outliers(indices),
[3, 4, 7, 8, 12, 13, 20, 21])


def test_sequential_input():
indices = np.array([4, 5, 9, 10])
assert_array_equal(extend_diff_outliers(indices),
[4, 5, 6, 9, 10, 11])
indices = np.array([1, 2, 4, 5, 9, 10])
assert_array_equal(extend_diff_outliers(indices),
[1, 2, 3, 4, 5, 6, 9, 10, 11])
indices = np.array([3, 7, 8, 12, 20])
assert_array_equal(extend_diff_outliers(indices),
[3, 4, 7, 8, 9, 12, 13, 20, 21])

71 changes: 71 additions & 0 deletions code/utils/tests/test_glm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
""" Tests for glm function in glm module
This checks the glm function.
Run at the tests directory with:
nosetests code/utils/tests/test_glm.py
"""
# Loading modules.
import numpy as np
import numpy.linalg as npl
import nibabel as nib
import os
import sys
from numpy.testing import assert_almost_equal, assert_array_equal


# Add path to functions to the system path.
sys.path.append(os.path.join(os.path.dirname(__file__), "../functions/"))


# Load our GLM functions.
from glm import glm_beta, glm_mrss

def test_glm_beta():
# Read in the image data.
img = nib.load('data/ds114/sub009/BOLD/task002_run001/ds114_sub009_t2r1.nii')
data = img.get_data()
# Read in the convolutions.
p = 2
convolved1 = np.loadtxt('data/ds114/sub009/behav/task002_run001/ds114_sub009_t2r1_conv.txt')
# Create design matrix.
X_matrix = np.ones((len(convolved1), p))
X_matrix[:, 1] = convolved1

# Calculate betas, copied from the exercise.
data_2d = np.reshape(data, (-1, data.shape[-1]))
B = npl.pinv(X_matrix).dot(data_2d.T)
B_4d = np.reshape(B.T, img.shape[:-1] + (-1,))

# Run function.
test_B_4d = glm_beta(data, X_matrix)
assert_almost_equal(B_4d, test_B_4d)


def test_glm_mrss():
img = nib.load('data/ds114/sub009/BOLD/task002_run001/ds114_sub009_t2r1.nii')
data = img.get_data()
convolved1 = np.loadtxt('data/ds114/sub009/behav/task002_run001/ds114_sub009_t2r1_conv.txt')
X_matrix = np.ones((len(convolved1), 2))
X_matrix[:, 1] = convolved1
data_2d = np.reshape(data, (-1, data.shape[-1]))
B = npl.pinv(X_matrix).dot(data_2d.T)
B_4d = np.reshape(B.T, img.shape[:-1] + (-1,))
test_B_4d = glm_beta(data, X_matrix)

# Pick a single voxel to check mrss functiom.
# Calculate actual fitted values, residuals, and MRSS of voxel.
fitted = X_matrix.dot(B_4d[12, 22, 10])
residuals = data[12, 22, 10] - fitted
MRSS = np.sum(residuals**2)/(X_matrix.shape[0] - npl.matrix_rank(X_matrix))

# Calculate using glm_diagnostics function.
test_MRSS, test_fitted, test_residuals = glm_mrss(test_B_4d, X_matrix, data)
assert_almost_equal(MRSS, test_MRSS[12, 22, 10])
assert_almost_equal(fitted, test_fitted[12, 22, 10])
assert_almost_equal(residuals, test_residuals[12, 22, 10])





38 changes: 38 additions & 0 deletions code/utils/tests/test_smoothing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
""" Tests for smoothvoxels in smooth module
Run at the tests directory with:
nosetests code/utils/tests/test_smoothing.py
"""

import os
import sys
import numpy as np
import itertools
import scipy.ndimage
from scipy.ndimage.filters import gaussian_filter
import matplotlib.pyplot as plt
import nibabel as nib
from numpy.testing import assert_almost_equal
from nose.tools import assert_not_equals


# Add path to functions to the system path.
sys.path.append(os.path.join(os.path.dirname(__file__), "../functions/"))

# Load smoothing function.
from smoothing import smoothing

def test_smooth():
# Read in the image data.
img = nib.load('data/ds114/sub009/BOLD/task002_run001/ds114_sub009_t2r1.nii')
data = img.get_data()

# Run the smoothing function with sigma 0 at time 12
non_smoothed_data = smoothing(data, 0, 12)

# assert that data at time 12 and non_smoothed_data are equal since sigma = 0
assert_almost_equal(data[..., 12], non_smoothed_data)

# Run the smoothvoxels function with sigma 5 at time 100
smoothed_data = smoothing(data, 5, 100)
# assert that data at time 16 and smoothed_data are not equal
assert_not_equals(data[..., 100].all(), smoothed_data.all())
4 changes: 2 additions & 2 deletions data/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ download_test_data:
wget -P ../data/ds114/sub009/behav/task002_run001/ http://www.jarrodmillman.com/rcsds/_downloads/ds114_sub009_t2r1_conv.txt

test:
nosetests tests -w ..
nosetests tests

coverage:
nosetests tests -w .. --with-coverage --cover-package=data.py
nosetests tests --with-coverage --cover-package=data_hashes.py


21 changes: 0 additions & 21 deletions data/data_hashes.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,6 @@
import pdb
import json

def create_dict(filename):
"""Return a dictionary of hashes from a .txt file
Parameters
----------
filename: .txt file containing the each file hash in the
first column and the file location in the data in the
second column.
Returns
---------
newDict: a dictionary of hashes
"""
newDict={}
f=open(filename)
num_lines = sum(1 for line in open(filename))
for line,i in zip(f,range(0,num_lines)):
info = line.split()
newDict[info[1]]=info[0]
f.close()
return newDict

def generate_file_md5(filename, blocksize=2**20):
"""Generate the md5 hash for filename
Parameters
Expand Down
Binary file not shown.

0 comments on commit 75f0d20

Please sign in to comment.