Skip to content

Commit

Permalink
Merge pull request #78 from AlonDaks/classification
Browse files Browse the repository at this point in the history
Finished plots and figures for reproduction
  • Loading branch information
jodreen committed Dec 14, 2015
2 parents 3bd3483 + 7d44c5e commit c0fd181
Show file tree
Hide file tree
Showing 20 changed files with 302 additions and 367 deletions.
28 changes: 8 additions & 20 deletions code/stat159lambda/classification/design_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,13 @@


class DesignMatrix:
def __init__(self, data_file, volume_indices, voxels_indices):
self.data_file = data_file
self.volume_indices = volume_indices
self.voxels_indices = voxels_indices
self.X = self.generate_design_matrix_()
gc.collect()
self.y = self.generate_labels_(ssm.SceneSlicer())
def __init__(self, data_file):
self.X = np.load(data_file).T[NUM_OFFSET_VOLUMES:, :]
ss = ssm.SceneSlicer()
self.y = np.array(ss.get_scene_slices()[0][NUM_OFFSET_VOLUMES:])

def get_design_matrix(self):
return self.X
def get_design_matrix(self, volume_indices, voxels_indices):
return self.X[volume_indices, :][:, voxels_indices]

def get_labels(self):
return self.y

def generate_design_matrix_(self):
data = np.load(self.data_file)
return data.T[NUM_OFFSET_VOLUMES:, self.voxels_indices][
self.volume_indices, :]

def generate_labels_(self, ss):
return np.array(ss.get_scene_slices()[0][NUM_OFFSET_VOLUMES:][
self.volume_indices])
def get_labels(self, volume_indices):
return self.y[volume_indices]
5 changes: 4 additions & 1 deletion code/stat159lambda/classification/partition_volumes.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import numpy as np
import os
from stat159lambda.config import REPO_HOME_PATH, NUM_OFFSET_VOLUMES, NUM_VOLUMES
from stat159lambda.utils import scene_slicer as ss

def partition_volumes():
volume_indices = range(NUM_VOLUMES - NUM_OFFSET_VOLUMES)
volume_indices = np.array(range(NUM_VOLUMES - NUM_OFFSET_VOLUMES))
clean_slice_mask = ss.SceneSlicer().get_clean_slice_mask()
volume_indices = volume_indices[clean_slice_mask]
np.random.shuffle(volume_indices)
num_train = int(.8*len(volume_indices))
train_indices = volume_indices[:num_train]
Expand Down
2 changes: 1 addition & 1 deletion code/stat159lambda/classification/random_forest/rf.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def __init__(self,
y,
max_features='auto',
depth=None,
n_estimators=400):
n_estimators=300):
self.model = RandomForestClassifier(max_features=max_features,
n_estimators=n_estimators,
max_depth=None,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,29 +1,38 @@
from __future__ import print_function, division
import numpy as np
from sklearn.cross_validation import KFold
from sklearn.metrics import accuracy_score
from stat159lambda.classification import design_matrix as dm
from stat159lambda.classification.random_forest import rf
from stat159lambda.classification import partition_volumes as pv
from stat159lambda.config import REPO_HOME_PATH
from stat159lambda.config import REPO_HOME_PATH, NUM_VOXELS
from stat159lambda.linear_modeling import linear_modeling as lm
from stat159lambda.utils import data_path as dp

design_matrix = dm.DesignMatrix(
'{0}/data/processed/sub1_rcds_2d.npy'.format(REPO_HOME_PATH),
pv.get_train_indices(), range(200))

X_train = design_matrix.get_design_matrix()
y_train = np.array(design_matrix.get_labels())
voxels_sorted_by_t_statistic = lm.VoxelExtractor(1, 'int-ext').t_stat()
num_features_values = [200, 400, 600, 1000, 1500, 2000, 3000, 5000, 10000]
design_matrix = dm.DesignMatrix(dp.get_smoothed_2d_path(1, 4))
train_volume_indices = pv.get_train_indices()
cv_values = []
for num_features in num_features_values:
voxel_feature_indices = voxels_sorted_by_t_statistic[:num_features]

cv_accuracies = []
for train, test in KFold(len(X_train), 5):
X_cv_train = X_train[train, :]
y_cv_train = y_train[train]
X_cv_test = X_train[test, :]
y_cv_test = y_train[test]
model = rf.Classifier(X_cv_train, y_cv_train)
model.train()
y_predicted = model.predict(X_cv_test)
cv_accuracies.append(accuracy_score(y_predicted, y_cv_test))
X_train = design_matrix.get_design_matrix(train_volume_indices, voxel_feature_indices)
y_train = np.array(design_matrix.get_labels(train_volume_indices))

print np.mean(cv_accuracies)
cv_accuracies = []
for train, test in KFold(len(X_train), 5):
X_cv_train = X_train[train, :]
y_cv_train = y_train[train]
X_cv_test = X_train[test, :]
y_cv_test = y_train[test]
model = rf.Classifier(X_cv_train, y_cv_train)
model.train()
y_predicted = model.predict(X_cv_test)
cv_accuracies.append(accuracy_score(y_predicted, y_cv_test))

cv_values.append(np.mean(cv_accuracies))

np.save('cv_values', cv_values)

40 changes: 20 additions & 20 deletions code/stat159lambda/classification/svm/svm.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class Classifier:
-------
None
"""
def __init__(self, X, y, C=1.0, kernel='rbf', degree=2):
def __init__(self, X, y, C=.001, kernel='linear', degree=2):
if kernel == 'linear':
self.model = LinearSVC(C=C)
else:
Expand All @@ -35,30 +35,30 @@ def __init__(self, X, y, C=1.0, kernel='rbf', degree=2):


def train(self):
"""
Classifier method that fits the SVM model according to the given training
data X
"""
Classifier method that fits the SVM model according to the given training
data X
Parameters
----------
None
Parameters
----------
None
Returns
-------
self : object
"""
Returns
-------
self : object
"""
self.model.fit(self.X, self.y)

def predict(self, new_data):
"""
Performs classification on samples in new_data
"""
Performs classification on samples in new_data
Parameters
----------
new_data : array
Parameters
----------
new_data : array
Returns
-------
pred: array that contains class labels for samples in new_data
"""
Returns
-------
pred: array that contains class labels for samples in new_data
"""
return self.model.predict(new_data)
35 changes: 35 additions & 0 deletions code/stat159lambda/classification/svm/svm_cross_validate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from __future__ import print_function, division
import numpy as np
from sklearn.cross_validation import KFold
from sklearn.metrics import accuracy_score
from stat159lambda.classification import design_matrix as dm
from stat159lambda.classification.svm import svm
from stat159lambda.classification import partition_volumes as pv
from stat159lambda.config import REPO_HOME_PATH, NUM_VOXELS
from stat159lambda.linear_modeling import linear_modeling as lm
from stat159lambda.utils import data_path as dp


voxels_sorted_by_t_statistic = lm.VoxelExtractor(1, 'int-ext').t_stat()
# num_features_values = range(100, NUM_VOXELS/100, 100)
# for num_features in num_features_values:
voxel_feature_indices = voxels_sorted_by_t_statistic[:1000]
design_matrix = dm.DesignMatrix(dp.get_smoothed_2d_path(1, 4), pv.get_train_indices(), voxel_feature_indices)

print('classifying')
X_train = design_matrix.get_design_matrix()
y_train = np.array(design_matrix.get_labels())

cv_accuracies = []
for train, test in KFold(len(X_train), 5):
X_cv_train = X_train[train, :]
y_cv_train = y_train[train]
X_cv_test = X_train[test, :]
y_cv_test = y_train[test]
model = svm.Classifier(X_cv_train, y_cv_train)
model.train()
y_predicted = model.predict(X_cv_test)
cv_accuracies.append(accuracy_score(y_predicted, y_cv_test))

print(np.mean(cv_accuracies))

Binary file modified code/stat159lambda/classification/test_indices.npy
Binary file not shown.
Binary file modified code/stat159lambda/classification/train_indices.npy
Binary file not shown.
4 changes: 2 additions & 2 deletions code/stat159lambda/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,16 @@

USE_CACHED_DATA = os.environ.get('STAT159_CACHED_DATA', 'True') == 'True'

NUM_PROCESSES = int(os.environ.get('STAT159_NUM_PROCESSES', 5))

# fMRI data specific constants

NUM_OFFSET_VOLUMES = 9

NUM_VOLUMES = 3543

NUM_VOXELS = 1108800

VOXEL_DIMENSIONS = (132, 175, 48)

NUM_RUNS = 8

SUBJECTS = [1, 2, 3, 5, 6]
Expand Down
57 changes: 32 additions & 25 deletions code/stat159lambda/linear_modeling/linear_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@


class VoxelExtractor:

def __init__(self, subject, interest_col_str, data=None):
"""
VoxelExtractor generates the t-statistic for each voxel based on a
Expand All @@ -28,13 +27,13 @@ def __init__(self, subject, interest_col_str, data=None):
self.subject = subject
self.interest_col_str = interest_col_str
if not data:
data_path = dp.get_smoothed_path_2d(self.subject, 4)
data = nib.load(data_path).get_data()
data = data[:, :, :, NUM_OFFSET_VOLUMES:]
# Data is shaped as number of voxels by time
self.data = np.reshape(data, (-1, data.shape[-1]))
data_path = dp.get_smoothed_2d_path(self.subject, 4)
data = np.load(data_path)
data = data[:, NUM_OFFSET_VOLUMES:]
self.data = data
self.design = None
self.B = None
self.t_values = None

def get_design_matrix(self):
"""
Expand All @@ -51,7 +50,8 @@ def get_design_matrix(self):
elif self.interest_col_str == "day-night":
interest_col_ind = 0
else:
print("Incorrect interest column name: please use either 'int-ext' or 'day-night'")
print(
"Incorrect interest column name: please use either 'int-ext' or 'day-night'")
interest_col = ss.get_scene_slices()[interest_col_ind]
n_trs = self.data.shape[-1]
design = np.ones((n_trs, 3))
Expand All @@ -70,9 +70,13 @@ def plot_design_matrix(self):
if self.design is None:
self.get_design_matrix()
design_fig = plt.gcf()
plt.imshow(self.design, aspect=0.1, cmap='gray', interpolation='nearest')
plt.imshow(self.design,
aspect=0.1,
cmap='gray',
interpolation='nearest')
plt.xticks([])
design_fig_path = '{0}/figures/design_fig_{1}.png'.format(REPO_HOME_PATH, self.interest_col_str)
design_fig_path = '{0}/figures/design_fig_{1}.png'.format(
REPO_HOME_PATH, self.interest_col_str)
design_fig.savefig(design_fig_path, dpi=100)
plt.clf()

Expand All @@ -98,21 +102,23 @@ def t_stat(self):
"""
if self.design is None:
self.get_design_matrix()
y = np.asarray(self.data.T)
X = np.asarray(self.design)
c = [0, 0, 1]
c = np.atleast_2d(c).T
beta = npl.pinv(X).dot(y)
fitted = X.dot(beta)
errors = y - fitted
RSS = (errors**2).sum(axis=0)
df = X.shape[0] - npl.matrix_rank(X)
MRSS = RSS / df
SE = np.sqrt(MRSS * c.T.dot(npl.pinv(X.T.dot(X)).dot(c)))
SE[SE == 0] = np.amin(SE[SE != 0])
t = c.T.dot(beta) / SE
self.t_values = abs(t[0])
self.t_indices = np.array(self.t_values).argsort()[::-1][:self.t_values.size]
if self.t_values is None:
y = self.data.T
X = self.design
c = [0, 0, 1]
c = np.atleast_2d(c).T
beta = npl.pinv(X).dot(y)
fitted = X.dot(beta)
errors = y - fitted
RSS = (errors**2).sum(axis=0)
df = X.shape[0] - npl.matrix_rank(X)
MRSS = RSS / df
SE = np.sqrt(MRSS * c.T.dot(npl.pinv(X.T.dot(X)).dot(c)))
SE[SE == 0] = np.amin(SE[SE != 0])
t = c.T.dot(beta) / SE
self.t_values = abs(t[0])
self.t_indices = np.array(self.t_values).argsort(
)[::-1][:self.t_values.size]
return self.t_indices

def plot_single_voxel(self, voxel_index):
Expand All @@ -125,6 +131,7 @@ def plot_single_voxel(self, voxel_index):
"""
voxel_img = plt.gcf()
plt.plot(self.data[voxel_index, :])
voxel_img_path = '{0}/figures/voxel_{1}.png'.format(REPO_HOME_PATH, voxel_index)
voxel_img_path = '{0}/figures/voxel_{1}.png'.format(REPO_HOME_PATH,
voxel_index)
voxel_img.savefig(voxel_img_path, dpi=100)
plt.clf()
Loading

0 comments on commit c0fd181

Please sign in to comment.