From 1dabcdf2022a58ebe753992bb750618cae5f33f4 Mon Sep 17 00:00:00 2001
From: chiffa <andrei.chiffa136@gmail.com>
Date: Mon, 18 Jan 2016 17:24:06 -0500
Subject: [PATCH] folded in clustering showing to outputs plotting

---
 .../algorithms_bank/conduction_routines.py    |  4 +-
 bioflow/analysis_pipeline_example.py          | 57 ++++++++++---------
 .../knowledge_access_analysis.py              | 15 +++--
 bioflow/main_configs.py                       | 11 ++--
 .../molecular_network/interactome_analysis.py |  7 +--
 bioflow/utils/dataviz.py                      |  6 +-
 6 files changed, 53 insertions(+), 47 deletions(-)

diff --git a/bioflow/algorithms_bank/conduction_routines.py b/bioflow/algorithms_bank/conduction_routines.py
index a7bd79b6..7265b568 100644
--- a/bioflow/algorithms_bank/conduction_routines.py
+++ b/bioflow/algorithms_bank/conduction_routines.py
@@ -325,7 +325,7 @@ def group_edge_current_with_limitations(inflated_laplacian, idx_pair, reach_limi
     return inverter[1]/inverter[0], inverter[0]
 
 
-def perform_clustering(inter_node_tension, cluster_number, show=True):
+def perform_clustering(inter_node_tension, cluster_number, show='undefined clustering'):
     """
     Performs a clustering on the voltages of the nodes,
 
@@ -377,7 +377,7 @@ def perform_clustering(inter_node_tension, cluster_number, show=True):
                                 for _, items, mean_corr in group_2_mean_off_diag])
 
     if show:
-        render_2d_matrix(relations_matrix.toarray(), 'Relationships matrix')
+        render_2d_matrix(relations_matrix.toarray(), show)
 
     return np.array(group_2_mean_off_diag), \
         remainder, \
diff --git a/bioflow/analysis_pipeline_example.py b/bioflow/analysis_pipeline_example.py
index e2c1f188..b3fe8901 100644
--- a/bioflow/analysis_pipeline_example.py
+++ b/bioflow/analysis_pipeline_example.py
@@ -15,46 +15,51 @@
 from bioflow.neo4j_db.db_io_routines import look_up_annotation_set, \
     cast_analysis_set_to_bulbs_ids, cast_background_set_to_bulbs_id
 from bioflow.utils.io_routines import get_source_bulbs_ids, get_background_bulbs_ids
+from bioflow.utils.log_behavior import clear_logs
 
 
-# setting static folders and urls for the databases
-set_folders('/home/ank/data_repository', 'http://localhost:7474', 'mongodb://localhost:27017/')
-# pulling the online databases
-StructureGenerator.pull_online_dbs()
-# setting the organism to yeast
-StructureGenerator.build_source_config('yeast')
+# first, let's clear logs:
+# clear_logs()
 
-# # clearing the database, if required
-# destroy_db()
+# # setting static folders and urls for the databases
+# set_folders('/home/ank/support', 'http://localhost:7474', 'mongodb://localhost:27017/')
+# # pulling the online databases
+# StructureGenerator.pull_online_dbs()
+# # setting the organism to yeast
+# StructureGenerator.build_source_config('yeast')
+#
+# # # clearing the database, if required
+# # destroy_db()
+#
+# # building the neo4j database
+# build_db()
 
-# building the neo4j database
-build_db()
-
-# set the source file of the ids of perturbed proteins and background set:
-cast_analysis_set_to_bulbs_ids("/home/andrei/support/tmp/Chr_10.txt")
-cast_background_set_to_bulbs_id(background_set_csv_location=None,
-                                analysis_set_csv_location="/home/andrei/support/tmp/Chr_10.txt")
+# # set the source file of the ids of perturbed proteins and background set:
+# cast_analysis_set_to_bulbs_ids("/home/andrei/H202_70_Linhao_outliers.txt")
+# cast_background_set_to_bulbs_id(
+#     background_set_csv_location=None,
+#     analysis_set_csv_location="/home/andrei/H202_70_Linhao_outliers.txt")
 
 # get the bulbs ids oif the nodes we would like to analyze
 source_bulbs_ids = get_source_bulbs_ids()
 background_bulbs_ids = get_background_bulbs_ids()
 
-# building the interactome interface object
-local_matrix = InteractomeInterface(main_connex_only=True, full_impact=False)
-local_matrix.full_rebuild()
+# # building the interactome interface object
+# local_matrix = InteractomeInterface(main_connex_only=True, full_impact=False)
+# local_matrix.full_rebuild()
 
-# perform the interactome analysis
-interactome_analysis([source_bulbs_ids], desired_depth=24, processors=6,
-                     background_list=background_bulbs_ids)
-# TODO: make sure interactome analysis works as expected with background list
+# # perform the interactome analysis
+# interactome_analysis([source_bulbs_ids], desired_depth=24, processors=3,
+#                      background_list=background_bulbs_ids, skip_sampling=True)
 
-# building the annotome interface object for GO "biological process" type terms
+# building the reference parameters set
 _filter = ['biological_process']
 ref_param_set = [_filter, background_bulbs_ids, (1, 1), True, 3]
 
 # build the annotome interface
-annot_matrix = AnnotomeInterface(*ref_param_set)
-annot_matrix.full_rebuild()
+# annot_matrix = AnnotomeInterface(*ref_param_set)
+# annot_matrix.full_rebuild()
 
 # perform the knowledge analysis
-knowledge_analysis([source_bulbs_ids], desired_depth=24, processors=6, param_set=ref_param_set)
+knowledge_analysis([source_bulbs_ids], desired_depth=24, processors=3,
+                   param_set=ref_param_set, skip_sampling=True)
diff --git a/bioflow/annotation_network/knowledge_access_analysis.py b/bioflow/annotation_network/knowledge_access_analysis.py
index a7616080..0dcd3b49 100644
--- a/bioflow/annotation_network/knowledge_access_analysis.py
+++ b/bioflow/annotation_network/knowledge_access_analysis.py
@@ -19,12 +19,11 @@
 
 log = get_logger(__name__)
 
-plt.gcf().set_size_inches(20, 15)
-
 _filter = ['biological_process']
 _correlation_factors = (1, 1)
 ref_param_set = [_filter, [], (1, 1), True, 3]
 
+
 def get_go_interface_instance(param_set=ref_param_set):
     """
     Generates a Matrix_Knowledge_DB interface for the use in the spawner. If
@@ -162,7 +161,8 @@ def show_correlations(
     inf_sel = (go_interface_instance.calculate_informativity(selector[0]),
                go_interface_instance.calculate_informativity(selector[1]))
 
-    plt.figure()
+    fig = plt.figure()
+    fig.set_size_inches(30, 20)
 
     plt.subplot(331)
     plt.title('current through nodes')
@@ -380,7 +380,7 @@ def compare_to_blank(
     log.info('blank comparison: %s', curr_inf_conf.shape)
     if not sparse_rounds:
         group2avg_off_diag, _, mean_correlations, eigenvalue = perform_clustering(
-            go_interface_instance.UP2UP_voltages, cluster_no)
+            go_interface_instance.UP2UP_voltages, cluster_no, 'GO terms clustering')
     else:
         group2avg_off_diag = np.array([[(0, ), 0, 0]]*cluster_no)
         mean_correlations = np.array([[0, 0]]*cluster_no)
@@ -417,11 +417,14 @@ def compare_to_blank(
                   go_interface_instance.GO2UP_Reachable_nodes.items()[:10])
         # basically the second element below are the nodes that contribute to the information
         #  flow through the node that is considered as non-random
+
         node_char_list = [
             [int(GO_id), go_interface_instance.GO_Names[GO_id]] +
             dict_system[GO_id] + r_nodes[go_node_ids == float(GO_id)].tolist() +
-            [list(set(go_interface_instance.GO2UP_Reachable_nodes[GO_id]).
-                  intersection(set(go_interface_instance.analytic_uniprots)))]
+            [go_interface_instance.interactome_interface_instance.
+                bulbs_id_2_display_name[up_bulbs_id]
+             for up_bulbs_id in list(set(go_interface_instance.GO2UP_Reachable_nodes[GO_id]).
+                intersection(set(go_interface_instance.analytic_uniprots)))]
             for GO_id in not_random_nodes]
 
         return sorted(node_char_list, key=lambda x: x[5]), not_random_groups
diff --git a/bioflow/main_configs.py b/bioflow/main_configs.py
index b0bd4d34..4a8b2355 100644
--- a/bioflow/main_configs.py
+++ b/bioflow/main_configs.py
@@ -10,12 +10,14 @@
 from bioflow.utils.general_utils import high_level_os_io as hl_os_io
 
 
-dump_location = path.join(path.abspath(path.dirname(__file__)), 'dumps')
 
+dump_location = path.join(path.abspath(path.dirname(__file__)), 'dumps')
 output_location = path.join(path.abspath(os.path.expanduser('~')), 'outputs')
-
 log_location = path.join(path.abspath(path.dirname(__file__)), 'logs')
 
+hl_os_io.mkdir_recursive(dump_location)
+hl_os_io.mkdir_recursive(output_location)
+hl_os_io.mkdir_recursive(log_location)
 
 Servers, Options, Sources, Predictions = parse_configs()
 
@@ -60,7 +62,6 @@ class Dumps(object):
     between the graph DB and the mapping matrix holders
     """
     prefix = dump_location
-    hl_os_io.mkdir_recursive(prefix)
 
     prefix_2 = Sources['INTERNAL']['dumpprefix']
     postfix = '.dump'
@@ -107,9 +108,7 @@ class Outputs(object):
     """
     Defines the locations to output actual results
     """
-    prefix = path.join(path.abspath(
-        path.join(path.dirname(__file__), os.pardir)), 'outputs')
-    hl_os_io.mkdir_recursive(prefix)
+    prefix = output_location
 
     GO_GDF_output = prefix + '/GO_Analysis_output.gdf'
     Interactome_GDF_output = prefix + '/Interactome_Analysis_output.gdf'
diff --git a/bioflow/molecular_network/interactome_analysis.py b/bioflow/molecular_network/interactome_analysis.py
index fdb8c73d..6bdedcf6 100644
--- a/bioflow/molecular_network/interactome_analysis.py
+++ b/bioflow/molecular_network/interactome_analysis.py
@@ -18,8 +18,6 @@
 
 log = get_logger(__name__)
 
-plt.gcf().set_size_inches(20, 15)
-
 
 # TODO: factor that into the "retrieve" routine of the laplacian wrapper
 def get_interactome_interface():
@@ -145,7 +143,8 @@ def show_test_statistics(
     :param sparse: True if we are showing test statistics of a sparse kernel run
     :return:
     """
-    plt.figure()
+    fig = plt.figure()
+    fig.set_size_inches(30, 20)
 
     plt.subplot(331)
     plt.title('current through nodes')
@@ -321,7 +320,7 @@ def compare_to_blank(
 
     if not sparse_rounds:
         group2avg_offdiag, _, mean_correlations, eigenvalue = perform_clustering(
-            interactome_interface_instance.UP2UP_voltages, cluster_no)
+            interactome_interface_instance.UP2UP_voltages, cluster_no, 'Interactome clustering')
 
     else:
         group2avg_offdiag = np.array([[(0, ), 0, 0]]*cluster_no)
diff --git a/bioflow/utils/dataviz.py b/bioflow/utils/dataviz.py
index 382809ae..731af489 100644
--- a/bioflow/utils/dataviz.py
+++ b/bioflow/utils/dataviz.py
@@ -11,7 +11,7 @@
 import numpy as np
 from scipy import histogram2d
 from scipy.stats import gaussian_kde
-from sys import path
+import os
 from bioflow.main_configs import output_location
 
 
@@ -138,11 +138,11 @@ def render_2d_matrix(matrix, name):
     :param name:
     :return:
     """
+    print name
     plt.title(name)
     plt.imshow(matrix, interpolation='nearest')
     plt.colorbar()
-    plt.savefig(path.join(output_location, name+'.png'))
-    plt.show()
+    plt.savefig(os.path.join(output_location, name+'.png'))
 
 
 if __name__ == "__main__":