Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Finished plots and figures for reproduction #78

Merged
merged 17 commits into from
Dec 14, 2015
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 8 additions & 20 deletions code/stat159lambda/classification/design_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,13 @@


class DesignMatrix:
def __init__(self, data_file, volume_indices, voxels_indices):
self.data_file = data_file
self.volume_indices = volume_indices
self.voxels_indices = voxels_indices
self.X = self.generate_design_matrix_()
gc.collect()
self.y = self.generate_labels_(ssm.SceneSlicer())
def __init__(self, data_file):
self.X = np.load(data_file).T[NUM_OFFSET_VOLUMES:, :]
ss = ssm.SceneSlicer()
self.y = np.array(ss.get_scene_slices()[0][NUM_OFFSET_VOLUMES:])

def get_design_matrix(self):
return self.X
def get_design_matrix(self, volume_indices, voxels_indices):
return self.X[volume_indices, :][:, voxels_indices]

def get_labels(self):
return self.y

def generate_design_matrix_(self):
data = np.load(self.data_file)
return data.T[NUM_OFFSET_VOLUMES:, self.voxels_indices][
self.volume_indices, :]

def generate_labels_(self, ss):
return np.array(ss.get_scene_slices()[0][NUM_OFFSET_VOLUMES:][
self.volume_indices])
def get_labels(self, volume_indices):
return self.y[volume_indices]
5 changes: 4 additions & 1 deletion code/stat159lambda/classification/partition_volumes.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import numpy as np
import os
from stat159lambda.config import REPO_HOME_PATH, NUM_OFFSET_VOLUMES, NUM_VOLUMES
from stat159lambda.utils import scene_slicer as ss

def partition_volumes():
volume_indices = range(NUM_VOLUMES - NUM_OFFSET_VOLUMES)
volume_indices = np.array(range(NUM_VOLUMES - NUM_OFFSET_VOLUMES))
clean_slice_mask = ss.SceneSlicer().get_clean_slice_mask()
volume_indices = volume_indices[clean_slice_mask]
np.random.shuffle(volume_indices)
num_train = int(.8*len(volume_indices))
train_indices = volume_indices[:num_train]
Expand Down
2 changes: 1 addition & 1 deletion code/stat159lambda/classification/random_forest/rf.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def __init__(self,
y,
max_features='auto',
depth=None,
n_estimators=400):
n_estimators=300):
self.model = RandomForestClassifier(max_features=max_features,
n_estimators=n_estimators,
max_depth=None,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,29 +1,38 @@
from __future__ import print_function, division
import numpy as np
from sklearn.cross_validation import KFold
from sklearn.metrics import accuracy_score
from stat159lambda.classification import design_matrix as dm
from stat159lambda.classification.random_forest import rf
from stat159lambda.classification import partition_volumes as pv
from stat159lambda.config import REPO_HOME_PATH
from stat159lambda.config import REPO_HOME_PATH, NUM_VOXELS
from stat159lambda.linear_modeling import linear_modeling as lm
from stat159lambda.utils import data_path as dp

design_matrix = dm.DesignMatrix(
'{0}/data/processed/sub1_rcds_2d.npy'.format(REPO_HOME_PATH),
pv.get_train_indices(), range(200))

X_train = design_matrix.get_design_matrix()
y_train = np.array(design_matrix.get_labels())
voxels_sorted_by_t_statistic = lm.VoxelExtractor(1, 'int-ext').t_stat()
num_features_values = [200, 400, 600, 1000, 1500, 2000, 3000, 5000, 10000]
design_matrix = dm.DesignMatrix(dp.get_smoothed_2d_path(1, 4))
train_volume_indices = pv.get_train_indices()
cv_values = []
for num_features in num_features_values:
voxel_feature_indices = voxels_sorted_by_t_statistic[:num_features]

cv_accuracies = []
for train, test in KFold(len(X_train), 5):
X_cv_train = X_train[train, :]
y_cv_train = y_train[train]
X_cv_test = X_train[test, :]
y_cv_test = y_train[test]
model = rf.Classifier(X_cv_train, y_cv_train)
model.train()
y_predicted = model.predict(X_cv_test)
cv_accuracies.append(accuracy_score(y_predicted, y_cv_test))
X_train = design_matrix.get_design_matrix(train_volume_indices, voxel_feature_indices)
y_train = np.array(design_matrix.get_labels(train_volume_indices))

print np.mean(cv_accuracies)
cv_accuracies = []
for train, test in KFold(len(X_train), 5):
X_cv_train = X_train[train, :]
y_cv_train = y_train[train]
X_cv_test = X_train[test, :]
y_cv_test = y_train[test]
model = rf.Classifier(X_cv_train, y_cv_train)
model.train()
y_predicted = model.predict(X_cv_test)
cv_accuracies.append(accuracy_score(y_predicted, y_cv_test))

cv_values.append(np.mean(cv_accuracies))

np.save('cv_values', cv_values)

40 changes: 20 additions & 20 deletions code/stat159lambda/classification/svm/svm.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class Classifier:
-------
None
"""
def __init__(self, X, y, C=1.0, kernel='rbf', degree=2):
def __init__(self, X, y, C=.001, kernel='linear', degree=2):
if kernel == 'linear':
self.model = LinearSVC(C=C)
else:
Expand All @@ -35,30 +35,30 @@ def __init__(self, X, y, C=1.0, kernel='rbf', degree=2):


def train(self):
"""
Classifier method that fits the SVM model according to the given training
data X
"""
Classifier method that fits the SVM model according to the given training
data X

Parameters
----------
None
Parameters
----------
None

Returns
-------
self : object
"""
Returns
-------
self : object
"""
self.model.fit(self.X, self.y)

def predict(self, new_data):
"""
Performs classification on samples in new_data
"""
Performs classification on samples in new_data

Parameters
----------
new_data : array
Parameters
----------
new_data : array

Returns
-------
pred: array that contains class labels for samples in new_data
"""
Returns
-------
pred: array that contains class labels for samples in new_data
"""
return self.model.predict(new_data)
35 changes: 35 additions & 0 deletions code/stat159lambda/classification/svm/svm_cross_validate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from __future__ import print_function, division
import numpy as np
from sklearn.cross_validation import KFold
from sklearn.metrics import accuracy_score
from stat159lambda.classification import design_matrix as dm
from stat159lambda.classification.svm import svm
from stat159lambda.classification import partition_volumes as pv
from stat159lambda.config import REPO_HOME_PATH, NUM_VOXELS
from stat159lambda.linear_modeling import linear_modeling as lm
from stat159lambda.utils import data_path as dp


voxels_sorted_by_t_statistic = lm.VoxelExtractor(1, 'int-ext').t_stat()
# num_features_values = range(100, NUM_VOXELS/100, 100)
# for num_features in num_features_values:
voxel_feature_indices = voxels_sorted_by_t_statistic[:1000]
design_matrix = dm.DesignMatrix(dp.get_smoothed_2d_path(1, 4), pv.get_train_indices(), voxel_feature_indices)

print('classifying')
X_train = design_matrix.get_design_matrix()
y_train = np.array(design_matrix.get_labels())

cv_accuracies = []
for train, test in KFold(len(X_train), 5):
X_cv_train = X_train[train, :]
y_cv_train = y_train[train]
X_cv_test = X_train[test, :]
y_cv_test = y_train[test]
model = svm.Classifier(X_cv_train, y_cv_train)
model.train()
y_predicted = model.predict(X_cv_test)
cv_accuracies.append(accuracy_score(y_predicted, y_cv_test))

print(np.mean(cv_accuracies))

Binary file modified code/stat159lambda/classification/test_indices.npy
Binary file not shown.
Binary file modified code/stat159lambda/classification/train_indices.npy
Binary file not shown.
4 changes: 2 additions & 2 deletions code/stat159lambda/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,16 @@

USE_CACHED_DATA = os.environ.get('STAT159_CACHED_DATA', 'True') == 'True'

NUM_PROCESSES = int(os.environ.get('STAT159_NUM_PROCESSES', 5))

# fMRI data specific constants

NUM_OFFSET_VOLUMES = 9

NUM_VOLUMES = 3543

NUM_VOXELS = 1108800
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you wanted, you could calculate this constant using VOXEL_DIMENSIONS. But it's fine as it is too


VOXEL_DIMENSIONS = (132, 175, 48)

NUM_RUNS = 8

SUBJECTS = [1, 2, 3, 5, 6]
Expand Down
57 changes: 32 additions & 25 deletions code/stat159lambda/linear_modeling/linear_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@


class VoxelExtractor:

def __init__(self, subject, interest_col_str, data=None):
"""
VoxelExtractor generates the t-statistic for each voxel based on a
Expand All @@ -28,13 +27,13 @@ def __init__(self, subject, interest_col_str, data=None):
self.subject = subject
self.interest_col_str = interest_col_str
if not data:
data_path = dp.get_smoothed_path_2d(self.subject, 4)
data = nib.load(data_path).get_data()
data = data[:, :, :, NUM_OFFSET_VOLUMES:]
# Data is shaped as number of voxels by time
self.data = np.reshape(data, (-1, data.shape[-1]))
data_path = dp.get_smoothed_2d_path(self.subject, 4)
data = np.load(data_path)
data = data[:, NUM_OFFSET_VOLUMES:]
self.data = data
self.design = None
self.B = None
self.t_values = None

def get_design_matrix(self):
"""
Expand All @@ -51,7 +50,8 @@ def get_design_matrix(self):
elif self.interest_col_str == "day-night":
interest_col_ind = 0
else:
print("Incorrect interest column name: please use either 'int-ext' or 'day-night'")
print(
"Incorrect interest column name: please use either 'int-ext' or 'day-night'")
interest_col = ss.get_scene_slices()[interest_col_ind]
n_trs = self.data.shape[-1]
design = np.ones((n_trs, 3))
Expand All @@ -70,9 +70,13 @@ def plot_design_matrix(self):
if self.design is None:
self.get_design_matrix()
design_fig = plt.gcf()
plt.imshow(self.design, aspect=0.1, cmap='gray', interpolation='nearest')
plt.imshow(self.design,
aspect=0.1,
cmap='gray',
interpolation='nearest')
plt.xticks([])
design_fig_path = '{0}/figures/design_fig_{1}.png'.format(REPO_HOME_PATH, self.interest_col_str)
design_fig_path = '{0}/figures/design_fig_{1}.png'.format(
REPO_HOME_PATH, self.interest_col_str)
design_fig.savefig(design_fig_path, dpi=100)
plt.clf()

Expand All @@ -98,21 +102,23 @@ def t_stat(self):
"""
if self.design is None:
self.get_design_matrix()
y = np.asarray(self.data.T)
X = np.asarray(self.design)
c = [0, 0, 1]
c = np.atleast_2d(c).T
beta = npl.pinv(X).dot(y)
fitted = X.dot(beta)
errors = y - fitted
RSS = (errors**2).sum(axis=0)
df = X.shape[0] - npl.matrix_rank(X)
MRSS = RSS / df
SE = np.sqrt(MRSS * c.T.dot(npl.pinv(X.T.dot(X)).dot(c)))
SE[SE == 0] = np.amin(SE[SE != 0])
t = c.T.dot(beta) / SE
self.t_values = abs(t[0])
self.t_indices = np.array(self.t_values).argsort()[::-1][:self.t_values.size]
if self.t_values is None:
y = self.data.T
X = self.design
c = [0, 0, 1]
c = np.atleast_2d(c).T
beta = npl.pinv(X).dot(y)
fitted = X.dot(beta)
errors = y - fitted
RSS = (errors**2).sum(axis=0)
df = X.shape[0] - npl.matrix_rank(X)
MRSS = RSS / df
SE = np.sqrt(MRSS * c.T.dot(npl.pinv(X.T.dot(X)).dot(c)))
SE[SE == 0] = np.amin(SE[SE != 0])
t = c.T.dot(beta) / SE
self.t_values = abs(t[0])
self.t_indices = np.array(self.t_values).argsort(
)[::-1][:self.t_values.size]
return self.t_indices

def plot_single_voxel(self, voxel_index):
Expand All @@ -125,6 +131,7 @@ def plot_single_voxel(self, voxel_index):
"""
voxel_img = plt.gcf()
plt.plot(self.data[voxel_index, :])
voxel_img_path = '{0}/figures/voxel_{1}.png'.format(REPO_HOME_PATH, voxel_index)
voxel_img_path = '{0}/figures/voxel_{1}.png'.format(REPO_HOME_PATH,
voxel_index)
voxel_img.savefig(voxel_img_path, dpi=100)
plt.clf()
Loading