Merge aa6229f into 8227487

berkeley-stat159 · Dec 13, 2015 · 1f49431 · 1f49431
2 parents 8227487 + aa6229f
commit 1f49431
Show file tree

Hide file tree

Showing 5 changed files with 252 additions and 3 deletions.
diff --git a/code/stat159lambda/classification/svm/svm.py b/code/stat159lambda/classification/svm/svm.py
@@ -4,6 +4,27 @@
 
 
 class Classifier:
+    """
+    An instance of the Classifer class has the following attributes:
+    (1) model : Support Vector Machine for Regression or Scalable Linear
+                Support Vector Machine, depending on kernel specification
+    (2) X : vector of training data
+    (3) y : vector of target values
+
+    Parameters
+    ----------
+    X : array
+    y : array
+    C : float that is the penalty parameter of the error term
+    kernel : string that specifies the kernel type to be used in the
+                classification algorithm
+    degree : int that specifies the degree of the polynomial kernel
+                function
+
+    Returns
+    -------
+    None
+    """
     def __init__(self, X, y, C=1.0, kernel='rbf', degree=2):
         if kernel == 'linear':
             self.model = LinearSVC(C=C)
@@ -12,8 +33,32 @@ def __init__(self, X, y, C=1.0, kernel='rbf', degree=2):
         self.X = X
         self.y = y
 
+
     def train(self):
+    """
+    Classifier method that fits the SVM model according to the given training
+    data X
+
+    Parameters
+    ----------
+    None
+
+    Returns
+    -------
+    self : object
+    """
         self.model.fit(self.X, self.y)
 
     def predict(self, new_data):
+    """
+    Performs classification on samples in new_data
+
+    Parameters
+    ----------
+    new_data : array
+
+    Returns
+    -------
+    pred: array that contains class labels for samples in new_data
+    """
         return self.model.predict(new_data)
diff --git a/code/stat159lambda/reproduction/analyze_similarity.py b/code/stat159lambda/reproduction/analyze_similarity.py
@@ -13,24 +13,58 @@
 
 
 def get_pairwise_correlations():
+    """
+    Finds and returns the paths to the correlations of all possible pairs of
+    subjects (if the paths exist)
+
+    Parameters
+    ----------
+    None
+
+    Returns
+    -------
+    paths : string array
+    """
     subject_pairs = itertools.combinations(SUBJECTS, 2)
     return [np.load(dp.get_correlation_path(subj_a, subj_b))
             for subj_a, subj_b in subject_pairs]
 
 
 def get_correlations(aggregation='pooled'):
+    """
+    Calculates correlations either using means or the pooled data, depending
+    on specification
+
+    Parameters
+    ----------
+    aggregation : string (optional)
+
+    Returns
+    -------
+    correlations : array
+    """
     correlations = np.concatenate(tuple(get_pairwise_correlations()))
     if aggregation == 'mean':
         correlations = get_pairwise_correlations()
         correlations = np.mean(np.matrix(correlations).T, axis=1)
         correlations = correlations[~np.isnan(correlations)]
         return np.squeeze(np.asarray(correlations))
-    if aggregation == 'pooled':
-        correlations = np.concatenate(tuple(get_pairwise_correlations()))
-        return correlations[~np.isnan(correlations)]
+    return correlations[~np.isnan(correlations)]
 
 
 def save_correlation_histogram(aggregation):
+    """
+    Plots and saves the histogram of all correlations calculated by the
+    specified aggregation into figures folder
+
+    Parameters
+    ----------
+    aggregation : string
+
+    Returns
+    -------
+    None
+    """
     plt.hist(get_correlations(aggregation), bins=40)
     output_file_name = '{0}/figures/{1}_correlation_histogram.png'.format(
         REPO_HOME_PATH, aggregation)
@@ -40,6 +74,18 @@ def save_correlation_histogram(aggregation):
 
 
 def save_correlation_percentiles(aggregation):
+    """
+    Calculates and saves the correlation percentiles calculated by the
+    specified aggregation into figures folder
+
+    Parameters
+    ----------
+    aggregation : string
+
+    Returns
+    -------
+    None
+    """
     correlations = get_correlations(aggregation)
     results = [[p, np.percentile(correlations, p)] for p in PERCENTILES]
     output_file_name = '{0}/figures/{1}_correlation_percentiles.txt'.format(

diff --git a/code/stat159lambda/reproduction/inter_run_diagnostics.py b/code/stat159lambda/reproduction/inter_run_diagnostics.py
@@ -8,6 +8,19 @@
 
 
 def calc_vol_rms_diff(data_file_path):
+    """
+    Finds the difference between data[n+1] and data[n] for all elements in data
+    array to calculate the root mean squares. Does not include the data points
+    when they are tuning in the first 17 seconds.
+
+    Parameters
+    ----------
+    data_file_path : string
+
+    Returns
+    -------
+    vol_rms_diff : array 
+    """
     data = np.load(open(data_file_path))
     diff_data = np.diff(data, axis=1)
     del data
@@ -17,6 +30,19 @@ def calc_vol_rms_diff(data_file_path):
 
 
 def save_plot(vol_rms_diff, subj_num):
+    """
+    Plots the root mean square differences for a particular subject and saves
+    that plot into the figures folder
+
+    Parameters
+    ----------
+    vol_rms_diff : array
+    subj_num : int
+
+    Returns
+    -------
+    None
+    """
     plt.plot(vol_rms_diff)
     plt.savefig('{0}/figures/subj{1}_vol_rms_diff.png'.format(
         REPO_HOME_PATH, subj_num))

diff --git a/code/stat159lambda/reproduction/similarity.py b/code/stat159lambda/reproduction/similarity.py
@@ -13,6 +13,20 @@
 
 
 def pearson_r(X, Y):
+    """
+    Calculates the correlation between every row of two matrices. Assumes the
+    two matrices given are the same shape.
+
+    Parameters
+    ----------
+    X : array representation of an (n x n) matrix
+    Y : array representation of an (n x n) matrix
+
+    Returns 
+    -------
+    r : vector of length n, where each element is the correlation of rows X_n
+        and Y_n
+    """
     X_centered = X - np.mean(X, axis=1)[:, np.newaxis]
     Y_centered = Y - np.mean(Y, axis=1)[:, np.newaxis]
     return inner1d(X_centered, Y_centered) / (np.linalg.norm(X_centered,
@@ -22,6 +36,19 @@ def pearson_r(X, Y):
 
 
 def correlation(subj_a_data, subj_b_data):
+    """
+    Calculates the averaged correlation using every pair of data points between two
+    subjects.
+
+    Parameters
+    ----------
+    subj_a_data : array
+    subj_b_data : array
+
+    Returns
+    -------
+    correlations : float
+    """
     run_split_a_data = np.split(subj_a_data, RUN_DIVISIONS[:-1], axis=1)
     run_split_b_data = np.split(subj_b_data, RUN_DIVISIONS[:-1], axis=1)
     correlations = np.zeros(NUM_VOXELS)
@@ -32,6 +59,21 @@ def correlation(subj_a_data, subj_b_data):
 
 
 def calculate_and_save_correlation(subj_1_num, subj_2_num):
+    """
+    Calculates correlation using smoothed 2-D data with 8 full width half
+    maximum mm, and saves values into a designated correlation_path. If a file
+    with calculated correlations already exists, uses that cached version
+    instead.
+
+    Parameters
+    ----------
+    subj_1_num : int
+    subj_2_num : int
+
+    Returns
+    -------
+    None
+    """
     correlation_path = dp.get_correlation_path(subj_1_num, subj_2_num)
     if not exists(correlation_path) or not USE_CACHED_DATA:
         subj_1_data = np.load(dp.get_smoothed_2d_path(subj_1_num, 8))

diff --git a/code/stat159lambda/utils/data_path.py b/code/stat159lambda/utils/data_path.py
@@ -5,32 +5,122 @@
 
 
 def get_raw_path(subj_num, run_num):
+    """
+    Derives the absolute path to data for a particular subject and run
+
+    Parameters
+    ----------
+    subj_num : int
+    run_num : int
+
+    Returns
+    -------
+    path : string
+    """
     if subj_num < 10:
         subj_num = '0' + str(subj_num)
     return '{0}/data/raw/sub0{1}/task001_run00{2}/bold_dico_dico_rcds_nl.nii'.format(
         REPO_HOME_PATH, subj_num, run_num)
 
 
 def get_concatenated_path(subj_num):
+    """
+    Derives the absolute path to data for a particular subject only
+
+    Parameters
+    ----------
+    subj_num : int
+
+    Returns
+    -------
+    path : string
+    """
     return '{0}/data/processed/sub{1}_rcds.npy'.format(REPO_HOME_PATH,
                                                         subj_num)
 
 
 def get_smoothed_path(subj_num, fwhm_mm):
+    """
+    Derives the absolute path to the smoothed data for a particular subject and
+    particular full width half maximum smoothed version
+
+    Parameters
+    ----------
+    subj_num : int
+    fwhm_mm : int
+
+    Returns
+    -------
+    path: string
+    """
     return '{0}/data/processed/sub{1}_rcds_smoothed_{2}_mm.npy'.format(
         REPO_HOME_PATH, subj_num, fwhm_mm)
 
 
 def get_smoothed_2d_path(subj_num, fwhm_mm):
+    """
+    Derives the absolute path to the smoothed 2-D data for a particular subject
+    and particular full width half maximum smoothed version
+
+    Parameters
+    ----------
+    subj_num : int
+    fwhm_mm : int
+
+    Returns
+    -------
+    path: string
+    """
     return '{0}/data/processed/sub{1}_rcds_smoothed_{2}_mm_2d.npy'.format(
         REPO_HOME_PATH, subj_num, fwhm_mm)
 
 
 def get_correlation_path(subj_1_num, subj_2_num):
+    """
+    Derives the absolute path to the calculated correlations between two
+    subjects
+
+    Parameters
+    ----------
+    subj_1_num : int
+    subj_2_num : int
+
+    Returns
+    -------
+    path: string
+    """
     return '{0}/data/processed/sub{1}_sub{2}_correlation.npy'.format(
         REPO_HOME_PATH, subj_1_num, subj_2_num)
 
 
 def get_2d_path(subj_num):
+    """
+    Derives the absolute path to the 2-D data for a particular subject,
+    originally contained in a 4-D array
+
+    Parameters
+    ----------
+    subj_num : int
+
+    Returns
+    -------
+    path : string
+    """
     return '{0}/data/processed/sub{1}_rcds_2d.npy'.format(REPO_HOME_PATH,
                                                            subj_num)
+
+def get_correlation_hist_path(aggregation):
+    """
+    Derives the absolute path to the correlations calculated by using either
+    the means or pooled data
+
+    Parameters
+    ----------
+    aggregation : string
+
+    Returns
+    -------
+    path : string
+    """
+    return '{0}/figures/{1}_correlation_histogram.png'.format(REPO_HOME_PATH,
+                                                                aggregation)