-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Packaged our code as a python module called stat159lambda
- Loading branch information
Showing
27 changed files
with
1,291 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
test: | ||
nosetests utils | ||
|
||
coverage: | ||
nosetests utils --with-coverage --cover-package=utils |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
import os | ||
import sys | ||
|
||
REPO_HOME_PATH = os.path.realpath(__file__).replace('/code/stat159lambda/config.py', '') | ||
|
||
USE_CACHED_DATA = os.environ.get('STAT159_CACHED_DATA', 'True') == 'True' | ||
|
||
NUM_PROCESSES = int(os.environ.get('STAT159_NUM_PROCESSES', 5)) | ||
|
||
NUM_VOXELS = 1108800 | ||
|
||
NUM_TOTAL_VOLUMES = 3543 |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
from __future__ import print_function | ||
from __future__ import division | ||
import numpy as np | ||
import sys | ||
import matplotlib.pyplot as plt | ||
import gc | ||
from stat159lambda.config import REPO_HOME_PATH | ||
|
||
|
||
def calc_vol_rms_diff(data_file_path): | ||
data = np.load(open(data_file_path)) | ||
diff_data = np.diff(data, axis=1) | ||
del data | ||
gc.collect() | ||
vol_rms_diff = np.sqrt(np.mean(diff_data**2, axis=0)) | ||
return vol_rms_diff[9:] | ||
|
||
|
||
def save_plot(vol_rms_diff, subj_num): | ||
plt.plot(vol_rms_diff) | ||
plt.savefig('{0}/figures/subj{1}_vol_rms_diff.png'.format( | ||
REPO_HOME_PATH, subj_num)) | ||
|
||
|
||
if __name__ == '__main__': | ||
subj_num = sys.argv[1] | ||
data_file_path = '{0}/data/processed/sub{1}_rcds_2d.npy'.format( | ||
REPO_HOME_PATH, subj_num) | ||
vol_rms_diff = calc_vol_rms_diff(data_file_path) | ||
save_plot(vol_rms_diff, subj_num) | ||
del vol_rms_diff |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
from __future__ import print_function | ||
from __future__ import division | ||
import json | ||
import nibabel as nib | ||
import numpy as np | ||
import sys | ||
from glob import glob | ||
from os.path import exists | ||
from multiprocessing import Pool | ||
from stat159lambda.config import REPO_HOME_PATH | ||
|
||
DATA_PATHS = json.load(open('{0}/data/data_path.json'.format(REPO_HOME_PATH))) | ||
|
||
INCORRECT_NUM_ARGS_MESSAGE = 'Invalid number of arguments: specify alignment type' | ||
ILLEGAL_ARG_MESSAGE = 'preprocess.py must be provided with alignment argument' | ||
|
||
|
||
def concatenate_runs(alignment): | ||
for i, subject in enumerate(DATA_PATHS['subjects']): | ||
nii_file_name = '{0}/data/processed/sub{1}_{2}'.format( | ||
REPO_HOME_PATH, i + 1, alignment) | ||
if not exists(nii_file_name + '.nii') or not cf.USE_CACHED_DATA: | ||
run_data = [] | ||
for j, run in enumerate(subject['runs']): | ||
task_path = run[alignment]['path'] | ||
img = nib.load(REPO_HOME_PATH + '/' + task_path) | ||
data = img.get_data() | ||
if j == 0: | ||
run_data.append(data[..., :-4]) | ||
elif j >= 1 and j <= 6: | ||
run_data.append(data[..., 4:-4]) | ||
else: | ||
run_data.append(data[..., 4:]) | ||
concatenated_run_data = np.concatenate(run_data, axis=3) | ||
concatenated_img = nib.Nifti1Image(concatenated_run_data, | ||
np.eye(4)) | ||
nib.save(concatenated_img, nii_file_name) | ||
print('Saved {0}'.format(nii_file_name + '.nii')) | ||
else: | ||
print('Using cached version of {0}'.format(nii_file_name + '.nii')) | ||
|
||
|
||
def reshape_data_to_2d(alignment): | ||
files_to_reshape = np.sort(glob('{0}/data/processed/sub*_{1}.nii'.format( | ||
REPO_HOME_PATH, alignment))) | ||
for f in files_to_reshape: | ||
file_name_2d = f.replace('.nii', '_2d') | ||
if not exists(file_name_2d + '.npy') or not cf.USE_CACHED_DATA: | ||
data = nib.load(f).get_data() | ||
data_chunks = partition_4d_data(data) | ||
p = Pool(cf.NUM_PROCESSES) | ||
reshaped_chunks = p.imap(reshape_4d_data, data_chunks) | ||
data_2d = merge_2d_data(reshaped_chunks) | ||
np.save(file_name_2d, data_2d) | ||
print('Saved {0}'.format(file_name_2d + '.npy')) | ||
else: | ||
print('Using cached version of {0}'.format(file_name_2d + '.npy')) | ||
|
||
|
||
def partition_4d_data(data): | ||
num_partitions = cf.NUM_PROCESSES | ||
num_volunes = data.shape[-1] | ||
partition_indices = [(num_volunes // num_partitions) * i | ||
for i in range(1, num_partitions)] | ||
return np.split(data, partition_indices, axis=3) | ||
|
||
|
||
def reshape_4d_data(data): | ||
return np.reshape(data, (-1, data.shape[-1])) | ||
|
||
|
||
def merge_2d_data(data_slices): | ||
return np.hstack(tuple(data_slices)) | ||
|
||
# def band_pass_filter(): | ||
|
||
if __name__ == '__main__': | ||
if len(sys.argv) != 2: | ||
raise ValueError(INCORRECT_NUM_ARGS_MESSAGE) | ||
alignment = sys.argv[1] | ||
if alignment not in ['linear', 'non_linear', 'rcds']: | ||
raise ValueError(ILLEGAL_ARG_MESSAGE) | ||
concatenate_runs(alignment) | ||
reshape_data_to_2d(alignment) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
from __future__ import print_function | ||
from __future__ import division | ||
import nibabel as nib | ||
import numpy as np | ||
from scipy import stats | ||
from multiprocessing import Process | ||
import itertools | ||
import sys | ||
from os import path | ||
import sharedmem as sm | ||
import gc | ||
from stat159lambda.config import REPO_HOME_PATH | ||
|
||
INCORRECT_NUM_ARGS_MESSAGE = 'invalid number of arguments: specify alignment type, subject1 and subject2' | ||
ILLEGAL_ALIGNMENT_ARG_MESSAGE = 'alignment argument must be either <rcds>, <linear>, <non_linear>' | ||
ILLEGAL_SUBJECTS_ARG_MESSAGE = 'subject arguments must be integers corresponding to subject numbers' | ||
|
||
|
||
def parallelize_correlation(): | ||
chunk_size = len(shared_subject1) // cf.NUM_PROCESSES | ||
output_correlations = sm.empty(len(shared_subject1)) | ||
processes = [ | ||
Process(target=correlation, | ||
args=(shared_subject1, shared_subject2, i * chunk_size, min( | ||
(i + 1) * chunk_size, len(shared_subject1)), | ||
output_correlations)) for i in xrange(cf.NUM_PROCESSES) | ||
] | ||
for p in processes: | ||
p.start() | ||
for p in processes: | ||
p.join() | ||
return output_correlations | ||
|
||
|
||
def correlation(shared_subject1, shared_subject2, start_index, stop_index, | ||
output_correlations): | ||
for i in range(start_index, stop_index): | ||
output_correlations[i] = stats.pearsonr(shared_subject1[i, :], | ||
shared_subject2[ | ||
i, :])[0] | ||
|
||
|
||
def check_command_line_arguments(arguments): | ||
if len(arguments) != 4: | ||
raise ValueError(INCORRECT_NUM_ARGS_MESSAGE) | ||
alignment = arguments[1] | ||
if alignment not in ['linear', 'non_linear', 'rcds']: | ||
raise ValueError(ILLEGAL_ALIGNMENT_ARG_MESSAGE) | ||
try: | ||
subject1 = int(arguments[2]) | ||
subject2 = int(arguments[3]) | ||
except: | ||
raise ValueError(ILLEGAL_SUBJECTS_ARG_MESSAGE) | ||
subject1_file_path = '{0}/data/processed/sub{1}_{2}_2d.npy'.format( | ||
REPO_HOME_PATH, subject1, alignment) | ||
subject2_file_path = '{0}/data/processed/sub{1}_{2}_2d.npy'.format( | ||
REPO_HOME_PATH, subject2, alignment) | ||
if not path.exists(subject1_file_path): | ||
raise ValueError( | ||
'Filing missing: {0}, run preprocess.py to generate necessary file'.format( | ||
subject1_file_path)) | ||
if not path.exists(subject2_file_path): | ||
raise ValueError( | ||
'Filing missing: {0}, run preprocess.py to generate necessary file'.format( | ||
subject2_file_path)) | ||
return (subject1_file_path, subject2_file_path) | ||
|
||
|
||
if __name__ == '__main__': | ||
subject1_file_path, subject2_file_path = check_command_line_arguments( | ||
sys.argv) | ||
correlation_file_name = '{0}/data/processed/r_sub{1}_sub{2}_{3}'.format( | ||
REPO_HOME_PATH, sys.argv[2], sys.argv[3], sys.argv[1]) | ||
if not path.exists(correlation_file_name + | ||
'.npy') or not cf.USE_CACHED_DATA: | ||
shared_subject1 = sm.copy(np.load(subject1_file_path)) | ||
gc.collect() | ||
shared_subject2 = sm.copy(np.load(subject2_file_path)) | ||
gc.collect() | ||
voxel_correlations = parallelize_correlation() | ||
np.save(correlation_file_name, voxel_correlations) | ||
print('Saved {0}'.format(correlation_file_name + '.npy')) | ||
else: | ||
print('Using cached version of {0}'.format(correlation_file_name + | ||
'.npy')) |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
# Python 3 compatibility | ||
from __future__ import division, print_function | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
import numpy.linalg as npl | ||
from scipy.stats import t as t_dist | ||
from scipy.ndimage import gaussian_filter | ||
import stat159lambda.utils.scene_slicer as ssm | ||
import nibabel as nib | ||
|
||
|
||
|
||
def get_design_matrix(): | ||
|
||
""" | ||
Returns | ||
------- | ||
Design matrix with 5 columns, including the 3 columns of interest, | ||
the linear drift column, and the column of ones | ||
""" | ||
data = nib.load(data_path) | ||
n_trs = data.shape[-1] | ||
X = np.ones((n_trs, 3)) | ||
ss = ssm.SceneSlicer('test_data.nii','scenes.csv') | ||
day_night, int_ext = ss.get_scene_slices() | ||
X[:, 1] = np.linspace(-1, 1, n_trs) | ||
X[:, 2] = day_night | ||
return X | ||
|
||
#design matrix that is passed into random forrest | ||
def get_rf_design_matrix(voxels,data): | ||
ss = ssm.SceneSlicer('test_data.nii','scenes.csv') | ||
day_night, int_ext = ss.get_scene_slices() | ||
new_X = np.zeros((data.shape[-1], len(voxels))) | ||
for num in range(len(voxels)): | ||
new_X[:, num] = data[voxels[num]] | ||
return new_X, day_night | ||
|
||
|
||
def plot_design_matrix(X): | ||
""" | ||
Returns | ||
------- | ||
None | ||
""" | ||
plt.imshow(X, aspect=0.1, cmap='gray', interpolation='nearest') | ||
plt.xticks([]) | ||
|
||
|
||
def get_betas_Y(X, data): | ||
""" | ||
Returns | ||
------- | ||
B: 2D array, p x B, the number of voxels | ||
""" | ||
Y = np.reshape(data,(-1,data.shape[-1])) | ||
B = npl.pinv(X).dot(Y.T) | ||
return B,Y.T | ||
|
||
|
||
def get_betas_4d(B, data): | ||
""" | ||
Returns | ||
------- | ||
4D array, beta for each voxel; need this format to plot | ||
""" | ||
return np.reshape(B.T, data.shape[:-1] + (-1,)) | ||
|
||
|
||
def plot_betas(b_vols, col): | ||
""" | ||
Parameters | ||
---------- | ||
b_vols: 2D array | ||
col: integer between 0 and p | ||
Returns | ||
------- | ||
None | ||
""" | ||
if col >= b_vols.shape[-1]: | ||
raise RuntimeError("Error: select a column between 0 and p") | ||
c = b_vols.shape[2]//2 | ||
plt.imshow(b_vols[:, :, c, col], cmap='gray', interpolation='nearest') | ||
|
||
|
||
def t_stat(y, X, c): | ||
""" betas, t statistic and significance test given data, | ||
design matix, contrast | ||
This is OLS estimation; we assume the errors to have independent | ||
and identical normal distributions around zero for each $i$ in | ||
$\e_i$ (i.i.d). | ||
""" | ||
# Make sure y, X, c are all arrays | ||
y = np.asarray(y) | ||
X = np.asarray(X) | ||
c = np.atleast_2d(c).T # As column vector | ||
# Calculate the parameters - b hat | ||
beta = npl.pinv(X).dot(y) | ||
# The fitted values - y hat | ||
fitted = X.dot(beta) | ||
# Residual error | ||
errors = y - fitted | ||
# Residual sum of squares | ||
RSS = (errors**2).sum(axis=0) | ||
# Degrees of freedom is the number of observations n minus the number | ||
# of independent regressors we have used. If all the regressor | ||
# columns in X are independent then the (matrix rank of X) == p | ||
# (where p the number of columns in X). If there is one column that | ||
# can be expressed as a linear sum of the other columns then | ||
# (matrix rank of X) will be p - 1 - and so on. | ||
df = X.shape[0] - npl.matrix_rank(X) | ||
# Mean residual sum of squares | ||
MRSS = RSS / df | ||
# calculate bottom half of t statistic | ||
SE = np.sqrt(MRSS * c.T.dot(npl.pinv(X.T.dot(X)).dot(c))) | ||
t = c.T.dot(beta) / SE | ||
return abs(t[0]) | ||
|
||
|
||
def get_top_32(t,thresh=100/1108800): | ||
""" | ||
Parameters | ||
---------- | ||
t: 1D array of t-statistics for each voxel | ||
Returns | ||
------- | ||
1D array of position of voxels in top 32 of t-statistics (all are positive | ||
""" | ||
a = np.int32(round(len(t) * thresh)) | ||
return t.argsort()[-a:][::-1] | ||
|
||
|
||
def get_index_4d(top_32_voxels, data): | ||
""" | ||
Parameters | ||
--------- | ||
top_32_voxels: 1D array of indices of top 32 voxels | ||
Returns | ||
------- | ||
Indices in terms of 4D array of each voxel in top 20% of t-statistics | ||
""" | ||
shape = data[...,-1].shape | ||
axes = np.unravel_index(top_32_voxels,shape) | ||
return zip(*axes) | ||
|
||
def plot_single_voxel(data, top_100_voxels): | ||
""" | ||
Returns | ||
------- | ||
None | ||
""" | ||
plt.plot(data[get_index_4d(data, top_100_voxels)[0]]) |
Oops, something went wrong.