From 683cad26e296226fbc131c557700d2c35f2faa77 Mon Sep 17 00:00:00 2001
From: Ying <ying.t.luo@gmail.com>
Date: Sat, 12 Dec 2015 15:42:11 -0800
Subject: [PATCH] Added documentation to similarity.py

---
 code/stat159lambda/reproduction/similarity.py | 42 +++++++++++++++++++
 code/stat159lambda/utils/data_path.py         |  2 +-
 2 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/code/stat159lambda/reproduction/similarity.py b/code/stat159lambda/reproduction/similarity.py
index 66d6418..0dabd07 100644
--- a/code/stat159lambda/reproduction/similarity.py
+++ b/code/stat159lambda/reproduction/similarity.py
@@ -13,6 +13,20 @@
 
 
 def pearson_r(X, Y):
+    """
+    Calculates the correlation between every row of two matrices. Assumes the
+    two matrices given are the same shape.
+
+    Parameters
+    ----------
+    X : array representation of an (n x n) matrix
+    Y : array representation of an (n x n) matrix
+
+    Returns 
+    -------
+    r : vector of length n, where each element is the correlation of rows X_n
+        and Y_n
+    """
     X_centered = X - np.mean(X, axis=1)[:, np.newaxis]
     Y_centered = Y - np.mean(Y, axis=1)[:, np.newaxis]
     return inner1d(X_centered, Y_centered) / (np.linalg.norm(X_centered,
@@ -22,6 +36,19 @@ def pearson_r(X, Y):
 
 
 def correlation(subj_a_data, subj_b_data):
+    """
+    Calculates the averaged correlation using every pair of data points between two
+    subjects.
+
+    Parameters
+    ----------
+    subj_a_data : array
+    subj_b_data : array
+
+    Returns
+    -------
+    correlations : float
+    """
     run_split_a_data = np.split(subj_a_data, RUN_DIVISIONS[:-1], axis=1)
     run_split_b_data = np.split(subj_b_data, RUN_DIVISIONS[:-1], axis=1)
     correlations = np.zeros(NUM_VOXELS)
@@ -32,6 +59,21 @@ def correlation(subj_a_data, subj_b_data):
 
 
 def calculate_and_save_correlation(subj_1_num, subj_2_num):
+    """
+    Calculates correlation using smoothed 2-D data with 8 full width half
+    maximum mm, and saves values into a designated correlation_path. If a file
+    with calculated correlations already exists, uses that cached version
+    instead.
+
+    Parameters
+    ----------
+    subj_1_num : int
+    subj_2_num : int
+
+    Returns
+    -------
+    None
+    """
     correlation_path = dp.get_correlation_path(subj_1_num, subj_2_num)
     if not exists(correlation_path) or not USE_CACHED_DATA:
         subj_1_data = np.load(dp.get_smoothed_2d_path(subj_1_num, 8))
diff --git a/code/stat159lambda/utils/data_path.py b/code/stat159lambda/utils/data_path.py
index 3dcb85f..1a24235 100644
--- a/code/stat159lambda/utils/data_path.py
+++ b/code/stat159lambda/utils/data_path.py
@@ -42,7 +42,7 @@ def get_concatenated_path(subj_num):
 def get_smoothed_path(subj_num, fwhm_mm):
     """
     Derives the absolute path to the smoothed data for a particular subject and
-    particular smoothing and particular full width half maximum smoothed version
+    particular full width half maximum smoothed version
 
     Parameters
     ----------