From aa6229f2f446b93882293c82cd2ee1b3e6c20db6 Mon Sep 17 00:00:00 2001 From: Ying Date: Sat, 12 Dec 2015 16:07:37 -0800 Subject: [PATCH] Updated data_path.py and added documentation to analyze_similarity.py --- .../reproduction/analyze_similarity.py | 52 +++++++++++++++++-- code/stat159lambda/utils/data_path.py | 16 ++++++ 2 files changed, 65 insertions(+), 3 deletions(-) diff --git a/code/stat159lambda/reproduction/analyze_similarity.py b/code/stat159lambda/reproduction/analyze_similarity.py index c9c1abb..9e51015 100644 --- a/code/stat159lambda/reproduction/analyze_similarity.py +++ b/code/stat159lambda/reproduction/analyze_similarity.py @@ -13,24 +13,58 @@ def get_pairwise_correlations(): + """ + Finds and returns the paths to the correlations of all possible pairs of + subjects (if the paths exist) + + Parameters + ---------- + None + + Returns + ------- + paths : string array + """ subject_pairs = itertools.combinations(SUBJECTS, 2) return [np.load(dp.get_correlation_path(subj_a, subj_b)) for subj_a, subj_b in subject_pairs] def get_correlations(aggregation='pooled'): + """ + Calculates correlations either using means or the pooled data, depending + on specification + + Parameters + ---------- + aggregation : string (optional) + + Returns + ------- + correlations : array + """ correlations = np.concatenate(tuple(get_pairwise_correlations())) if aggregation == 'mean': correlations = get_pairwise_correlations() correlations = np.mean(np.matrix(correlations).T, axis=1) correlations = correlations[~np.isnan(correlations)] return np.squeeze(np.asarray(correlations)) - if aggregation == 'pooled': - correlations = np.concatenate(tuple(get_pairwise_correlations())) - return correlations[~np.isnan(correlations)] + return correlations[~np.isnan(correlations)] def save_correlation_histogram(aggregation): + """ + Plots and saves the histogram of all correlations calculated by the + specified aggregation into figures folder + + Parameters + ---------- + aggregation : string + + Returns + ------- + None + """ plt.hist(get_correlations(aggregation), bins=40) output_file_name = '{0}/figures/{1}_correlation_histogram.png'.format( REPO_HOME_PATH, aggregation) @@ -40,6 +74,18 @@ def save_correlation_histogram(aggregation): def save_correlation_percentiles(aggregation): + """ + Calculates and saves the correlation percentiles calculated by the + specified aggregation into figures folder + + Parameters + ---------- + aggregation : string + + Returns + ------- + None + """ correlations = get_correlations(aggregation) results = [[p, np.percentile(correlations, p)] for p in PERCENTILES] output_file_name = '{0}/figures/{1}_correlation_percentiles.txt'.format( diff --git a/code/stat159lambda/utils/data_path.py b/code/stat159lambda/utils/data_path.py index 5579c6b..d5511cb 100644 --- a/code/stat159lambda/utils/data_path.py +++ b/code/stat159lambda/utils/data_path.py @@ -108,3 +108,19 @@ def get_2d_path(subj_num): """ return '{0}/data/processed/sub{1}_rcds_2d.npy'.format(REPO_HOME_PATH, subj_num) + +def get_correlation_hist_path(aggregation): + """ + Derives the absolute path to the correlations calculated by using either + the means or pooled data + + Parameters + ---------- + aggregation : string + + Returns + ------- + path : string + """ + return '{0}/figures/{1}_correlation_histogram.png'.format(REPO_HOME_PATH, + aggregation) \ No newline at end of file