Skip to content

Commit

Permalink
Merge aa6229f into 8227487
Browse files Browse the repository at this point in the history
  • Loading branch information
Ying Luo committed Dec 13, 2015
2 parents 8227487 + aa6229f commit 1f49431
Show file tree
Hide file tree
Showing 5 changed files with 252 additions and 3 deletions.
45 changes: 45 additions & 0 deletions code/stat159lambda/classification/svm/svm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,27 @@


class Classifier:
"""
An instance of the Classifer class has the following attributes:
(1) model : Support Vector Machine for Regression or Scalable Linear
Support Vector Machine, depending on kernel specification
(2) X : vector of training data
(3) y : vector of target values
Parameters
----------
X : array
y : array
C : float that is the penalty parameter of the error term
kernel : string that specifies the kernel type to be used in the
classification algorithm
degree : int that specifies the degree of the polynomial kernel
function
Returns
-------
None
"""
def __init__(self, X, y, C=1.0, kernel='rbf', degree=2):
if kernel == 'linear':
self.model = LinearSVC(C=C)
Expand All @@ -12,8 +33,32 @@ def __init__(self, X, y, C=1.0, kernel='rbf', degree=2):
self.X = X
self.y = y


def train(self):
"""
Classifier method that fits the SVM model according to the given training
data X
Parameters
----------
None
Returns
-------
self : object
"""
self.model.fit(self.X, self.y)

def predict(self, new_data):
"""
Performs classification on samples in new_data
Parameters
----------
new_data : array
Returns
-------
pred: array that contains class labels for samples in new_data
"""
return self.model.predict(new_data)
52 changes: 49 additions & 3 deletions code/stat159lambda/reproduction/analyze_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,58 @@


def get_pairwise_correlations():
"""
Finds and returns the paths to the correlations of all possible pairs of
subjects (if the paths exist)
Parameters
----------
None
Returns
-------
paths : string array
"""
subject_pairs = itertools.combinations(SUBJECTS, 2)
return [np.load(dp.get_correlation_path(subj_a, subj_b))
for subj_a, subj_b in subject_pairs]


def get_correlations(aggregation='pooled'):
"""
Calculates correlations either using means or the pooled data, depending
on specification
Parameters
----------
aggregation : string (optional)
Returns
-------
correlations : array
"""
correlations = np.concatenate(tuple(get_pairwise_correlations()))
if aggregation == 'mean':
correlations = get_pairwise_correlations()
correlations = np.mean(np.matrix(correlations).T, axis=1)
correlations = correlations[~np.isnan(correlations)]
return np.squeeze(np.asarray(correlations))
if aggregation == 'pooled':
correlations = np.concatenate(tuple(get_pairwise_correlations()))
return correlations[~np.isnan(correlations)]
return correlations[~np.isnan(correlations)]


def save_correlation_histogram(aggregation):
"""
Plots and saves the histogram of all correlations calculated by the
specified aggregation into figures folder
Parameters
----------
aggregation : string
Returns
-------
None
"""
plt.hist(get_correlations(aggregation), bins=40)
output_file_name = '{0}/figures/{1}_correlation_histogram.png'.format(
REPO_HOME_PATH, aggregation)
Expand All @@ -40,6 +74,18 @@ def save_correlation_histogram(aggregation):


def save_correlation_percentiles(aggregation):
"""
Calculates and saves the correlation percentiles calculated by the
specified aggregation into figures folder
Parameters
----------
aggregation : string
Returns
-------
None
"""
correlations = get_correlations(aggregation)
results = [[p, np.percentile(correlations, p)] for p in PERCENTILES]
output_file_name = '{0}/figures/{1}_correlation_percentiles.txt'.format(
Expand Down
26 changes: 26 additions & 0 deletions code/stat159lambda/reproduction/inter_run_diagnostics.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,19 @@


def calc_vol_rms_diff(data_file_path):
"""
Finds the difference between data[n+1] and data[n] for all elements in data
array to calculate the root mean squares. Does not include the data points
when they are tuning in the first 17 seconds.
Parameters
----------
data_file_path : string
Returns
-------
vol_rms_diff : array
"""
data = np.load(open(data_file_path))
diff_data = np.diff(data, axis=1)
del data
Expand All @@ -17,6 +30,19 @@ def calc_vol_rms_diff(data_file_path):


def save_plot(vol_rms_diff, subj_num):
"""
Plots the root mean square differences for a particular subject and saves
that plot into the figures folder
Parameters
----------
vol_rms_diff : array
subj_num : int
Returns
-------
None
"""
plt.plot(vol_rms_diff)
plt.savefig('{0}/figures/subj{1}_vol_rms_diff.png'.format(
REPO_HOME_PATH, subj_num))
Expand Down
42 changes: 42 additions & 0 deletions code/stat159lambda/reproduction/similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,20 @@


def pearson_r(X, Y):
"""
Calculates the correlation between every row of two matrices. Assumes the
two matrices given are the same shape.
Parameters
----------
X : array representation of an (n x n) matrix
Y : array representation of an (n x n) matrix
Returns
-------
r : vector of length n, where each element is the correlation of rows X_n
and Y_n
"""
X_centered = X - np.mean(X, axis=1)[:, np.newaxis]
Y_centered = Y - np.mean(Y, axis=1)[:, np.newaxis]
return inner1d(X_centered, Y_centered) / (np.linalg.norm(X_centered,
Expand All @@ -22,6 +36,19 @@ def pearson_r(X, Y):


def correlation(subj_a_data, subj_b_data):
"""
Calculates the averaged correlation using every pair of data points between two
subjects.
Parameters
----------
subj_a_data : array
subj_b_data : array
Returns
-------
correlations : float
"""
run_split_a_data = np.split(subj_a_data, RUN_DIVISIONS[:-1], axis=1)
run_split_b_data = np.split(subj_b_data, RUN_DIVISIONS[:-1], axis=1)
correlations = np.zeros(NUM_VOXELS)
Expand All @@ -32,6 +59,21 @@ def correlation(subj_a_data, subj_b_data):


def calculate_and_save_correlation(subj_1_num, subj_2_num):
"""
Calculates correlation using smoothed 2-D data with 8 full width half
maximum mm, and saves values into a designated correlation_path. If a file
with calculated correlations already exists, uses that cached version
instead.
Parameters
----------
subj_1_num : int
subj_2_num : int
Returns
-------
None
"""
correlation_path = dp.get_correlation_path(subj_1_num, subj_2_num)
if not exists(correlation_path) or not USE_CACHED_DATA:
subj_1_data = np.load(dp.get_smoothed_2d_path(subj_1_num, 8))
Expand Down
90 changes: 90 additions & 0 deletions code/stat159lambda/utils/data_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,32 +5,122 @@


def get_raw_path(subj_num, run_num):
"""
Derives the absolute path to data for a particular subject and run
Parameters
----------
subj_num : int
run_num : int
Returns
-------
path : string
"""
if subj_num < 10:
subj_num = '0' + str(subj_num)
return '{0}/data/raw/sub0{1}/task001_run00{2}/bold_dico_dico_rcds_nl.nii'.format(
REPO_HOME_PATH, subj_num, run_num)


def get_concatenated_path(subj_num):
"""
Derives the absolute path to data for a particular subject only
Parameters
----------
subj_num : int
Returns
-------
path : string
"""
return '{0}/data/processed/sub{1}_rcds.npy'.format(REPO_HOME_PATH,
subj_num)


def get_smoothed_path(subj_num, fwhm_mm):
"""
Derives the absolute path to the smoothed data for a particular subject and
particular full width half maximum smoothed version
Parameters
----------
subj_num : int
fwhm_mm : int
Returns
-------
path: string
"""
return '{0}/data/processed/sub{1}_rcds_smoothed_{2}_mm.npy'.format(
REPO_HOME_PATH, subj_num, fwhm_mm)


def get_smoothed_2d_path(subj_num, fwhm_mm):
"""
Derives the absolute path to the smoothed 2-D data for a particular subject
and particular full width half maximum smoothed version
Parameters
----------
subj_num : int
fwhm_mm : int
Returns
-------
path: string
"""
return '{0}/data/processed/sub{1}_rcds_smoothed_{2}_mm_2d.npy'.format(
REPO_HOME_PATH, subj_num, fwhm_mm)


def get_correlation_path(subj_1_num, subj_2_num):
"""
Derives the absolute path to the calculated correlations between two
subjects
Parameters
----------
subj_1_num : int
subj_2_num : int
Returns
-------
path: string
"""
return '{0}/data/processed/sub{1}_sub{2}_correlation.npy'.format(
REPO_HOME_PATH, subj_1_num, subj_2_num)


def get_2d_path(subj_num):
"""
Derives the absolute path to the 2-D data for a particular subject,
originally contained in a 4-D array
Parameters
----------
subj_num : int
Returns
-------
path : string
"""
return '{0}/data/processed/sub{1}_rcds_2d.npy'.format(REPO_HOME_PATH,
subj_num)

def get_correlation_hist_path(aggregation):
"""
Derives the absolute path to the correlations calculated by using either
the means or pooled data
Parameters
----------
aggregation : string
Returns
-------
path : string
"""
return '{0}/figures/{1}_correlation_histogram.png'.format(REPO_HOME_PATH,
aggregation)

0 comments on commit 1f49431

Please sign in to comment.