Skip to content

Commit

Permalink
Merge pull request #34 from lisaannyu/master
Browse files Browse the repository at this point in the history
Design matrix and array of voxels in top 20% of t-statistics
  • Loading branch information
AlonDaks committed Dec 6, 2015
2 parents b8e8907 + b0bf7d2 commit c886f50
Show file tree
Hide file tree
Showing 14 changed files with 186 additions and 100 deletions.
75 changes: 19 additions & 56 deletions code/utils/linear_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,9 @@ def get_betas_Y(X, data):
-------
B: 2D array, p x B, the number of voxels
"""
print(data.shape[-1])
print(type(data))
# Y = np.reshape(data, (data.shape[-1], -1))
Y = np.reshape(data, (-1, data.shape[-1]))
print(Y.shape)
# B = npl.pinv(X).dot(Y)
Y = np.reshape(data,(-1,data.shape[-1]))
B = npl.pinv(X).dot(Y.T)
print(B.shape)
return B, Y.T
return B,Y.T


def get_betas_4d(B, data):
Expand All @@ -70,7 +64,6 @@ def get_betas_4d(B, data):
-------
4D array, beta for each voxel; need this format to plot
"""
print(B.shape)
return np.reshape(B.T, data.shape[:-1] + (-1,))


Expand Down Expand Up @@ -121,64 +114,36 @@ def t_stat(y, X, c):
# calculate bottom half of t statistic
SE = np.sqrt(MRSS * c.T.dot(npl.pinv(X.T.dot(X)).dot(c)))
t = c.T.dot(beta) / SE
return abs(t)


#def get_top_100(t,thresh=100/1108800):
# """
# Parameters
# ----------
# t: 1D array of t-statistics for each voxel
#
# Returns
# -------
# 1D array of position of voxels in top 100 of t-statistics (all are
# positive)
# """
# a = np.int32(round(len(t) * thresh))
# # top_100_voxels = np.argpartition(t, -a)[-a:]
# # problem: nans, try
# top_100_voxels = np.argpartition(~np.isnan(t),-1)[-a:]

# return top_100_voxels


#def get_index_4d(top_100_voxels, data):
# """
# Returns
# -------
# Indices in terms of 4D array of each voxel in top 20% of t-statistics
# """
# shape = data[...,-1].shape
# axes = np.unravel_index(top_100_voxels, shape)
# return zip(axes[0], axes[1], axes[2]) # sequence too large, try n = 32

# Solve the problem by using 32 for next two functions intead
def get_top_32(t, thresh=100/1108800):
"""
Parameters
----------
return abs(t[0])


def get_top_32(t,thresh=100/1108800):
"""
Parameters
----------
t: 1D array of t-statistics for each voxel
Returns
-------
1D array of position of voxels in top 32 of t-statistics (all are positive
"""
a = np.int32(round(len(t) * thresh))
top_32_voxels = np.argpartition(~np.isnan(t), -1)[-a:]
return top_32_voxels

return t.argsort()[-a:][::-1]


def get_index_4d(top_32_voxels, data):
"""
Parameters
---------
top_32_voxels: 1D array of indices of top 32 voxels
Returns
-------
Indices in terms of 4D array of each voxel in top 20% of t-statistics
"""
shape = data[..., -1].shape
axes = np.unravel_index(top_32_voxels, shape)
return zip(axes[0], axes[1], axes[2])

"""
shape = data[...,-1].shape
axes = np.unravel_index(top_32_voxels,shape)
return zip(*axes)

def plot_single_voxel(data, top_100_voxels):
"""
Expand All @@ -187,5 +152,3 @@ def plot_single_voxel(data, top_100_voxels):
None
"""
plt.plot(data[get_index_4d(data, top_100_voxels)[0]])
# fix so only get top voxel

Empty file modified code/utils/rf.py
100644 → 100755
Empty file.
2 changes: 1 addition & 1 deletion code/utils/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import rf

# Load data
img = nib.load('test_data.nii')
img = nib.load('bold_dico_dico_rcds_nl.nii')
data = img.get_data()

# Design Matrix
Expand Down
65 changes: 25 additions & 40 deletions code/utils/tests/test_linear_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,14 @@
import matplotlib.pyplot as plt
import numpy.linalg as npl
from scipy.stats import t as t_dist
from .. import scene_slicer, plot

from .. import linear_modeling
from numpy.testing import assert_almost_equal
from numpy.testing import assert_equal, assert_almost_equal

# Make an X, y
X = np.ones((4, 2))
X[:, 0] = np.random.randint(10, size=4)
y = np.zeros((4, 6))
y[:, 0] = X[:, 0] * 2 + 3
y[:, 1] = X[:, 0] * -1.5 + 2
y[:, 2] = X[:, 0]
y[:, 3] = X[:, 0] - 2
y[:, 4] = X[:, 0] * 2
y[:, 5] = X[:, 0] * -1 + 1
data = np.reshape(y, (1, 2, 3, 4))
X[:, 0] = np.array([4, 8, 2, 9])
data = np.array([[[1, 0, 7, 0], [1, 0, 4, 4], [3, 7, 9, 7]]])

# def test_get_design_matrix():
# not done writing this function, need to wait
Expand All @@ -35,20 +27,21 @@
# not sure how to test plots


def test_get_betas():
actual = linear_modeling.get_betas(X, y[:, 0])
expected = np.array([[2., 3]]).T
def test_get_betas_Y():
actual = linear_modeling.get_betas_Y(X, data)[0]
expected = np.array([[-0.85496183, -0.11450382, -0.01526718],
[6.91603053, 2.90839695, 6.58778626]])
assert_almost_equal(expected, actual)
actual = linear_modeling.get_betas_Y(X, data)[1]
expected = np.array([[1, 1, 3], [0, 0, 7], [7, 4, 9], [0, 4, 7]])
assert_almost_equal(expected, actual)


def test_get_betas_4d():
actual = linear_modeling.get_betas_4d(
linear_modeling.get_betas(X, data), data)
expected = np.array([[[
[2.00000000e+00, -1.50000000e+00
], [1.00000000e+00, 1.00000000e+00], [2.00000000e+00, -1.00000000e+00]
], [[3.00000000e+00, 2.00000000e+00], [-3.55271368e-15, -2.00000000e+00],
[-7.10542736e-15, 1.00000000e+00]]]])
linear_modeling.get_betas_Y(X, data)[0], data)
expected = np.array([[[-0.85496183, 6.91603053], [-0.11450382, 2.90839695],
[-0.01526718, 6.58778626]]])
assert_almost_equal(expected, actual)

# def test_plot_betas():
Expand All @@ -57,33 +50,25 @@ def test_get_betas_4d():


def test_t_stat():
actual = linear_modeling.t_stat(y, X, [0, 1])
expected = np.array([[4.90591615e+14, 3.77185234e+15, -5.32661312e-01, -
2.17767996e+15, -5.32661312e-01, 6.46867339e+14]])
actual = linear_modeling.t_stat(
linear_modeling.get_betas_Y(X, data)[1], X, [0, 1])
expected = np.array([2.7475368, 1.04410995, 1.90484058])
assert_almost_equal(expected, actual)


def test_get_ts():
actual = linear_modeling.get_ts(y, X, [0, 1], data)
expected = np.array([9.84892820e+14, 6.46867339e+14, -4.81391897e-01, -
8.59684933e+14, -4.81391897e-01, 7.99631096e+14])
assert_almost_equal(actual, expected)


def test_get_top_100():
a = np.arange(21)
assert_equal(linear_modeling.get_top_100(a,.2), [17, 18, 19, 20])
a = np.arange(21)[::-1]
assert_equal(linear_modeling.get_top_100(a,.2), [3, 2, 1, 0])
actual = linear_modeling.get_top_100(linear_modeling.get_ts(y, X, [0, 1],
data),.2)
expected = np.array([0])
def test_get_top_32():
a = np.array([6, 4, 1, 2, 8, 8, 1, 9, 5, 2, 1, 9, 5, 4, 3, 6, 5, 3, 5, 8])
assert_equal(linear_modeling.get_top_32(a, .2), [11, 7, 19, 4])
actual = linear_modeling.get_top_32(
linear_modeling.t_stat(
linear_modeling.get_betas_Y(X, data)[1], X, [0, 1]), .5)
expected = np.array([0, 2])
assert_almost_equal(actual, expected)


def test_get_index_4d():
actual = linear_modeling.get_index_4d([3, 8, 23, 1], (2, 3, 4))
assert_equal(actual, [(0, 0, 3), (0, 2, 0), (1, 2, 3), (0, 0, 1)])
actual = linear_modeling.get_index_4d([0, 2], data)
assert_equal(actual, [(0, 0), (0, 2)])

# def test_plot_single_voxel():
# not sure how to test plots
51 changes: 51 additions & 0 deletions code/utils/tests/test_parse_demographics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from __future__ import absolute_import
from .. import parse_demographics

import os
import csv


def prepare_for_tests():
with open('demographics.csv', 'w') as csvfile:
file_writer = csv.writer(csvfile, delimiter=',', quotechar='"')
file_writer.writerow(['id', 'gender', 'age', 'forrest_seen_count'])
file_writer.writerow(['1', 'm', '30-35', '5'])
file_writer.writerow(['2', 'm', '30-35', '1'])
test_object = parse_demographics.parse_csv('demographics.csv')
return test_object


def test_seen_most_times():
test_subjects = prepare_for_tests()
seen_count = parse_demographics.seen_most_times(test_subjects)
assert seen_count[0] == 5
assert seen_count[1] == 1
delete_file()


def test_seen_least_times():
test_subjects = prepare_for_tests()
seen_count = parse_demographics.seen_least_times(test_subjects)
assert seen_count[0] == 1
assert seen_count[1] == 2
delete_file()


def test_find_id_by_gender():
test_subjects = prepare_for_tests()
id_list = parse_demographics.find_id_by_gender(test_subjects, 'm')
assert len(id_list) == 2
assert id_list[0] == 'm'
assert id_list[1] == 'm'
delete_file()


def test_find_count_by_id():
test_subjects = prepare_for_tests()
count = parse_demographics.find_count_by_id(test_subjects, 1)
assert count == 5
delete_file()


def delete_file():
os.remove('demographics.csv')
10 changes: 7 additions & 3 deletions slides/Makefile
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
.PHONY: all clean
.PHONY: all clean progress.pdf final.pdf

all: clean progress.pdf
all: clean progress.pdf final.pdf

clean:
rm -f progress.pdf
rm -f progress.pdf final.pdf

progress.pdf: progress.md
pandoc -t beamer -s progress.md -o progress.pdf


final.pdf: final.md
pandoc -t beamer -s final.md -o final.pdf
Binary file added slides/day_night_3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added slides/day_night_on_off.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added slides/design_matrix_new_3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
83 changes: 83 additions & 0 deletions slides/final.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
% Project Lambda
% Jordeen Chang, Alon Daks, Ying Luo, Lisa Ann Yu
% December 3, 2015

## The Paper
- A high-resolution 7-Tesla fMRI dataset from complex natural stimulation with an audio movie

## Abstract

- Brief Overview of Original Study:
- 20 participants recorded at a high field strength of 7 Tesla while listening to *Forrest Gump*
- Collected fMRI data for entire movie in .niigz format with additional information regarding movie scenes
- Used MRI scanner and pulse oximetry to conduct blood oxygen level dependent (BOLD) imaging, structural MRI imaging, and physiological assay
- Its goal is to provide data for others to explore auditory cognition, language and music perception, social perception, etc.
- Our Goal:
- Reproduce a subject of analysis conducted by Hanke et. al
- Apply machine learning to see if we can predict if a subject was listening to a day or night movie scene based on brain state

# Data Extraction & Exploration

##Extraction
- First had to overcome the large amount of data to sift through
- Testing only on Subject 1 for sake of time and efficiency: limited project scope from 320 GB to 5 GB
- Chose to only use nonlinear alignment out of raw data, linear alignment, and nonlinear
- Smoothed data by applying Gaussian filter
- Designed a clean schema by introducing a data_path.json file to reference where each raw data file is located

##
![This is a plot of the SD’s across volumes in the 4-D array for subject 1, run 1 and task 1. Though the SD’s are all within the range (171, 173.5), there is still quite a bit of variability within that range.](sd.jpg?raw=true)

# Methods & Results

##Process Overview: Reproducing
- Two methods for correlation mentioned in paper, and chose to reproduce the first: taking BOLD time-series and calculating voxel-wise pearson correlation on the raw data that Matthew provided.
- Using multiple EC2 instances to run analysis, so that we can load both two hour time courses in a pair and parallelize the process since correlation takes 10 minutes on our laptops.
- Defined a UNIX environment variable STAT159_CACHED_DATA that allows users to choose if they want data to be recomputed.
- Results are still being processed.

##Analysis Overview: Predicting
- Parsed through “scenes.csv” and “demographics.csv” for information about movie scenes and subjects
- Conducted t-test to determine if brain signal is significantly different between day and night scenes for each voxel
- Retrieved top 32 voxels with biggest change between the groups.

##
Step 1: Determine which slices are on and which are off

![Day-night time course for Task 1, Run 1. Very few scenes take place at night.](day_night_on_off.png)

##
Step 2: Create a Design Matrix

![Intercept, day/night, linear drift](design_matrix_new_3.png?raw=true)

##
Step 3: Calculate the betas for each voxel

![Day_Night](day_night_3.png?raw=true)

##
![Linear Drift](linear_drift_3.png?raw=true)

##
![Intercept (baseline)](intercept_3.png?raw=true)

##
Step 4: Test the hypothesis that beta = 0 to generate t-statistics
![Largest 32 t-statistics. Note: We took the absolute value of the t-statistics to find the farthest distances from 0. Additionally, since we are always comparing the estimate of beta to 0, the t-statistic should be positive.](top_32_bar.png?raw=true)

##
![Time course for the voxel with the highest t-statistic.](highest_t_day_night.png?raw=true)

## Analysis Overview: Predicting (cont.)
- Using random forest with 1000 in the voxels as the features
- 80% training, 20% testing
- 83% accuracy, but the data is 86% day, so not doing too well.
- Next steps include cross validation on feature set and running prediction on other parts of the data like sentiment and interior/exterior

## Future Work
- Considered exploring possible physiological responses to certain movie scenes by gender and age, but not enough computing resources and data to do so
- Predict familiarity with *Forrest Gump*
- Difference in voxel activation between participants with high and low amounts of prior exposure
- Machine learning
- However, there are a relatively small number of participants, since fMRI is expensive, so the predictive ability of that study would be rather limited
Binary file added slides/highest_t_day_night.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added slides/intercept_3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added slides/linear_drift_3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added slides/top_32_bar.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit c886f50

Please sign in to comment.