From 1dabcdf2022a58ebe753992bb750618cae5f33f4 Mon Sep 17 00:00:00 2001 From: chiffa Date: Mon, 18 Jan 2016 17:24:06 -0500 Subject: [PATCH] folded in clustering showing to outputs plotting --- .../algorithms_bank/conduction_routines.py | 4 +- bioflow/analysis_pipeline_example.py | 57 ++++++++++--------- .../knowledge_access_analysis.py | 15 +++-- bioflow/main_configs.py | 11 ++-- .../molecular_network/interactome_analysis.py | 7 +-- bioflow/utils/dataviz.py | 6 +- 6 files changed, 53 insertions(+), 47 deletions(-) diff --git a/bioflow/algorithms_bank/conduction_routines.py b/bioflow/algorithms_bank/conduction_routines.py index a7bd79b6..7265b568 100644 --- a/bioflow/algorithms_bank/conduction_routines.py +++ b/bioflow/algorithms_bank/conduction_routines.py @@ -325,7 +325,7 @@ def group_edge_current_with_limitations(inflated_laplacian, idx_pair, reach_limi return inverter[1]/inverter[0], inverter[0] -def perform_clustering(inter_node_tension, cluster_number, show=True): +def perform_clustering(inter_node_tension, cluster_number, show='undefined clustering'): """ Performs a clustering on the voltages of the nodes, @@ -377,7 +377,7 @@ def perform_clustering(inter_node_tension, cluster_number, show=True): for _, items, mean_corr in group_2_mean_off_diag]) if show: - render_2d_matrix(relations_matrix.toarray(), 'Relationships matrix') + render_2d_matrix(relations_matrix.toarray(), show) return np.array(group_2_mean_off_diag), \ remainder, \ diff --git a/bioflow/analysis_pipeline_example.py b/bioflow/analysis_pipeline_example.py index e2c1f188..b3fe8901 100644 --- a/bioflow/analysis_pipeline_example.py +++ b/bioflow/analysis_pipeline_example.py @@ -15,46 +15,51 @@ from bioflow.neo4j_db.db_io_routines import look_up_annotation_set, \ cast_analysis_set_to_bulbs_ids, cast_background_set_to_bulbs_id from bioflow.utils.io_routines import get_source_bulbs_ids, get_background_bulbs_ids +from bioflow.utils.log_behavior import clear_logs -# setting static folders and urls for the databases -set_folders('/home/ank/data_repository', 'http://localhost:7474', 'mongodb://localhost:27017/') -# pulling the online databases -StructureGenerator.pull_online_dbs() -# setting the organism to yeast -StructureGenerator.build_source_config('yeast') +# first, let's clear logs: +# clear_logs() -# # clearing the database, if required -# destroy_db() +# # setting static folders and urls for the databases +# set_folders('/home/ank/support', 'http://localhost:7474', 'mongodb://localhost:27017/') +# # pulling the online databases +# StructureGenerator.pull_online_dbs() +# # setting the organism to yeast +# StructureGenerator.build_source_config('yeast') +# +# # # clearing the database, if required +# # destroy_db() +# +# # building the neo4j database +# build_db() -# building the neo4j database -build_db() - -# set the source file of the ids of perturbed proteins and background set: -cast_analysis_set_to_bulbs_ids("/home/andrei/support/tmp/Chr_10.txt") -cast_background_set_to_bulbs_id(background_set_csv_location=None, - analysis_set_csv_location="/home/andrei/support/tmp/Chr_10.txt") +# # set the source file of the ids of perturbed proteins and background set: +# cast_analysis_set_to_bulbs_ids("/home/andrei/H202_70_Linhao_outliers.txt") +# cast_background_set_to_bulbs_id( +# background_set_csv_location=None, +# analysis_set_csv_location="/home/andrei/H202_70_Linhao_outliers.txt") # get the bulbs ids oif the nodes we would like to analyze source_bulbs_ids = get_source_bulbs_ids() background_bulbs_ids = get_background_bulbs_ids() -# building the interactome interface object -local_matrix = InteractomeInterface(main_connex_only=True, full_impact=False) -local_matrix.full_rebuild() +# # building the interactome interface object +# local_matrix = InteractomeInterface(main_connex_only=True, full_impact=False) +# local_matrix.full_rebuild() -# perform the interactome analysis -interactome_analysis([source_bulbs_ids], desired_depth=24, processors=6, - background_list=background_bulbs_ids) -# TODO: make sure interactome analysis works as expected with background list +# # perform the interactome analysis +# interactome_analysis([source_bulbs_ids], desired_depth=24, processors=3, +# background_list=background_bulbs_ids, skip_sampling=True) -# building the annotome interface object for GO "biological process" type terms +# building the reference parameters set _filter = ['biological_process'] ref_param_set = [_filter, background_bulbs_ids, (1, 1), True, 3] # build the annotome interface -annot_matrix = AnnotomeInterface(*ref_param_set) -annot_matrix.full_rebuild() +# annot_matrix = AnnotomeInterface(*ref_param_set) +# annot_matrix.full_rebuild() # perform the knowledge analysis -knowledge_analysis([source_bulbs_ids], desired_depth=24, processors=6, param_set=ref_param_set) +knowledge_analysis([source_bulbs_ids], desired_depth=24, processors=3, + param_set=ref_param_set, skip_sampling=True) diff --git a/bioflow/annotation_network/knowledge_access_analysis.py b/bioflow/annotation_network/knowledge_access_analysis.py index a7616080..0dcd3b49 100644 --- a/bioflow/annotation_network/knowledge_access_analysis.py +++ b/bioflow/annotation_network/knowledge_access_analysis.py @@ -19,12 +19,11 @@ log = get_logger(__name__) -plt.gcf().set_size_inches(20, 15) - _filter = ['biological_process'] _correlation_factors = (1, 1) ref_param_set = [_filter, [], (1, 1), True, 3] + def get_go_interface_instance(param_set=ref_param_set): """ Generates a Matrix_Knowledge_DB interface for the use in the spawner. If @@ -162,7 +161,8 @@ def show_correlations( inf_sel = (go_interface_instance.calculate_informativity(selector[0]), go_interface_instance.calculate_informativity(selector[1])) - plt.figure() + fig = plt.figure() + fig.set_size_inches(30, 20) plt.subplot(331) plt.title('current through nodes') @@ -380,7 +380,7 @@ def compare_to_blank( log.info('blank comparison: %s', curr_inf_conf.shape) if not sparse_rounds: group2avg_off_diag, _, mean_correlations, eigenvalue = perform_clustering( - go_interface_instance.UP2UP_voltages, cluster_no) + go_interface_instance.UP2UP_voltages, cluster_no, 'GO terms clustering') else: group2avg_off_diag = np.array([[(0, ), 0, 0]]*cluster_no) mean_correlations = np.array([[0, 0]]*cluster_no) @@ -417,11 +417,14 @@ def compare_to_blank( go_interface_instance.GO2UP_Reachable_nodes.items()[:10]) # basically the second element below are the nodes that contribute to the information # flow through the node that is considered as non-random + node_char_list = [ [int(GO_id), go_interface_instance.GO_Names[GO_id]] + dict_system[GO_id] + r_nodes[go_node_ids == float(GO_id)].tolist() + - [list(set(go_interface_instance.GO2UP_Reachable_nodes[GO_id]). - intersection(set(go_interface_instance.analytic_uniprots)))] + [go_interface_instance.interactome_interface_instance. + bulbs_id_2_display_name[up_bulbs_id] + for up_bulbs_id in list(set(go_interface_instance.GO2UP_Reachable_nodes[GO_id]). + intersection(set(go_interface_instance.analytic_uniprots)))] for GO_id in not_random_nodes] return sorted(node_char_list, key=lambda x: x[5]), not_random_groups diff --git a/bioflow/main_configs.py b/bioflow/main_configs.py index b0bd4d34..4a8b2355 100644 --- a/bioflow/main_configs.py +++ b/bioflow/main_configs.py @@ -10,12 +10,14 @@ from bioflow.utils.general_utils import high_level_os_io as hl_os_io -dump_location = path.join(path.abspath(path.dirname(__file__)), 'dumps') +dump_location = path.join(path.abspath(path.dirname(__file__)), 'dumps') output_location = path.join(path.abspath(os.path.expanduser('~')), 'outputs') - log_location = path.join(path.abspath(path.dirname(__file__)), 'logs') +hl_os_io.mkdir_recursive(dump_location) +hl_os_io.mkdir_recursive(output_location) +hl_os_io.mkdir_recursive(log_location) Servers, Options, Sources, Predictions = parse_configs() @@ -60,7 +62,6 @@ class Dumps(object): between the graph DB and the mapping matrix holders """ prefix = dump_location - hl_os_io.mkdir_recursive(prefix) prefix_2 = Sources['INTERNAL']['dumpprefix'] postfix = '.dump' @@ -107,9 +108,7 @@ class Outputs(object): """ Defines the locations to output actual results """ - prefix = path.join(path.abspath( - path.join(path.dirname(__file__), os.pardir)), 'outputs') - hl_os_io.mkdir_recursive(prefix) + prefix = output_location GO_GDF_output = prefix + '/GO_Analysis_output.gdf' Interactome_GDF_output = prefix + '/Interactome_Analysis_output.gdf' diff --git a/bioflow/molecular_network/interactome_analysis.py b/bioflow/molecular_network/interactome_analysis.py index fdb8c73d..6bdedcf6 100644 --- a/bioflow/molecular_network/interactome_analysis.py +++ b/bioflow/molecular_network/interactome_analysis.py @@ -18,8 +18,6 @@ log = get_logger(__name__) -plt.gcf().set_size_inches(20, 15) - # TODO: factor that into the "retrieve" routine of the laplacian wrapper def get_interactome_interface(): @@ -145,7 +143,8 @@ def show_test_statistics( :param sparse: True if we are showing test statistics of a sparse kernel run :return: """ - plt.figure() + fig = plt.figure() + fig.set_size_inches(30, 20) plt.subplot(331) plt.title('current through nodes') @@ -321,7 +320,7 @@ def compare_to_blank( if not sparse_rounds: group2avg_offdiag, _, mean_correlations, eigenvalue = perform_clustering( - interactome_interface_instance.UP2UP_voltages, cluster_no) + interactome_interface_instance.UP2UP_voltages, cluster_no, 'Interactome clustering') else: group2avg_offdiag = np.array([[(0, ), 0, 0]]*cluster_no) diff --git a/bioflow/utils/dataviz.py b/bioflow/utils/dataviz.py index 382809ae..731af489 100644 --- a/bioflow/utils/dataviz.py +++ b/bioflow/utils/dataviz.py @@ -11,7 +11,7 @@ import numpy as np from scipy import histogram2d from scipy.stats import gaussian_kde -from sys import path +import os from bioflow.main_configs import output_location @@ -138,11 +138,11 @@ def render_2d_matrix(matrix, name): :param name: :return: """ + print name plt.title(name) plt.imshow(matrix, interpolation='nearest') plt.colorbar() - plt.savefig(path.join(output_location, name+'.png')) - plt.show() + plt.savefig(os.path.join(output_location, name+'.png')) if __name__ == "__main__":