From aa6229f2f446b93882293c82cd2ee1b3e6c20db6 Mon Sep 17 00:00:00 2001
From: Ying <ying.t.luo@gmail.com>
Date: Sat, 12 Dec 2015 16:07:37 -0800
Subject: [PATCH] Updated data_path.py and added documentation to
 analyze_similarity.py

---
 .../reproduction/analyze_similarity.py        | 52 +++++++++++++++++--
 code/stat159lambda/utils/data_path.py         | 16 ++++++
 2 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/code/stat159lambda/reproduction/analyze_similarity.py b/code/stat159lambda/reproduction/analyze_similarity.py
index c9c1abb..9e51015 100644
--- a/code/stat159lambda/reproduction/analyze_similarity.py
+++ b/code/stat159lambda/reproduction/analyze_similarity.py
@@ -13,24 +13,58 @@
 
 
 def get_pairwise_correlations():
+    """
+    Finds and returns the paths to the correlations of all possible pairs of
+    subjects (if the paths exist)
+
+    Parameters
+    ----------
+    None
+
+    Returns
+    -------
+    paths : string array
+    """
     subject_pairs = itertools.combinations(SUBJECTS, 2)
     return [np.load(dp.get_correlation_path(subj_a, subj_b))
             for subj_a, subj_b in subject_pairs]
 
 
 def get_correlations(aggregation='pooled'):
+    """
+    Calculates correlations either using means or the pooled data, depending
+    on specification
+
+    Parameters
+    ----------
+    aggregation : string (optional)
+
+    Returns
+    -------
+    correlations : array
+    """
     correlations = np.concatenate(tuple(get_pairwise_correlations()))
     if aggregation == 'mean':
         correlations = get_pairwise_correlations()
         correlations = np.mean(np.matrix(correlations).T, axis=1)
         correlations = correlations[~np.isnan(correlations)]
         return np.squeeze(np.asarray(correlations))
-    if aggregation == 'pooled':
-        correlations = np.concatenate(tuple(get_pairwise_correlations()))
-        return correlations[~np.isnan(correlations)]
+    return correlations[~np.isnan(correlations)]
 
 
 def save_correlation_histogram(aggregation):
+    """
+    Plots and saves the histogram of all correlations calculated by the
+    specified aggregation into figures folder
+
+    Parameters
+    ----------
+    aggregation : string
+
+    Returns
+    -------
+    None
+    """
     plt.hist(get_correlations(aggregation), bins=40)
     output_file_name = '{0}/figures/{1}_correlation_histogram.png'.format(
         REPO_HOME_PATH, aggregation)
@@ -40,6 +74,18 @@ def save_correlation_histogram(aggregation):
 
 
 def save_correlation_percentiles(aggregation):
+    """
+    Calculates and saves the correlation percentiles calculated by the
+    specified aggregation into figures folder
+
+    Parameters
+    ----------
+    aggregation : string
+
+    Returns
+    -------
+    None
+    """
     correlations = get_correlations(aggregation)
     results = [[p, np.percentile(correlations, p)] for p in PERCENTILES]
     output_file_name = '{0}/figures/{1}_correlation_percentiles.txt'.format(
diff --git a/code/stat159lambda/utils/data_path.py b/code/stat159lambda/utils/data_path.py
index 5579c6b..d5511cb 100644
--- a/code/stat159lambda/utils/data_path.py
+++ b/code/stat159lambda/utils/data_path.py
@@ -108,3 +108,19 @@ def get_2d_path(subj_num):
     """
     return '{0}/data/processed/sub{1}_rcds_2d.npy'.format(REPO_HOME_PATH,
                                                            subj_num)
+
+def get_correlation_hist_path(aggregation):
+    """
+    Derives the absolute path to the correlations calculated by using either
+    the means or pooled data
+
+    Parameters
+    ----------
+    aggregation : string
+
+    Returns
+    -------
+    path : string
+    """
+    return '{0}/figures/{1}_correlation_histogram.png'.format(REPO_HOME_PATH,
+                                                                aggregation)
\ No newline at end of file