Skip to content

Commit

Permalink
folded in clustering showing to outputs plotting
Browse files Browse the repository at this point in the history
  • Loading branch information
chiffa committed Jan 18, 2016
1 parent 02e8395 commit 1dabcdf
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 47 deletions.
4 changes: 2 additions & 2 deletions bioflow/algorithms_bank/conduction_routines.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ def group_edge_current_with_limitations(inflated_laplacian, idx_pair, reach_limi
return inverter[1]/inverter[0], inverter[0]


def perform_clustering(inter_node_tension, cluster_number, show=True):
def perform_clustering(inter_node_tension, cluster_number, show='undefined clustering'):
"""
Performs a clustering on the voltages of the nodes,
Expand Down Expand Up @@ -377,7 +377,7 @@ def perform_clustering(inter_node_tension, cluster_number, show=True):
for _, items, mean_corr in group_2_mean_off_diag])

if show:
render_2d_matrix(relations_matrix.toarray(), 'Relationships matrix')
render_2d_matrix(relations_matrix.toarray(), show)

return np.array(group_2_mean_off_diag), \
remainder, \
Expand Down
57 changes: 31 additions & 26 deletions bioflow/analysis_pipeline_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,46 +15,51 @@
from bioflow.neo4j_db.db_io_routines import look_up_annotation_set, \
cast_analysis_set_to_bulbs_ids, cast_background_set_to_bulbs_id
from bioflow.utils.io_routines import get_source_bulbs_ids, get_background_bulbs_ids
from bioflow.utils.log_behavior import clear_logs


# setting static folders and urls for the databases
set_folders('/home/ank/data_repository', 'http://localhost:7474', 'mongodb://localhost:27017/')
# pulling the online databases
StructureGenerator.pull_online_dbs()
# setting the organism to yeast
StructureGenerator.build_source_config('yeast')
# first, let's clear logs:
# clear_logs()

# # clearing the database, if required
# destroy_db()
# # setting static folders and urls for the databases
# set_folders('/home/ank/support', 'http://localhost:7474', 'mongodb://localhost:27017/')
# # pulling the online databases
# StructureGenerator.pull_online_dbs()
# # setting the organism to yeast
# StructureGenerator.build_source_config('yeast')
#
# # # clearing the database, if required
# # destroy_db()
#
# # building the neo4j database
# build_db()

# building the neo4j database
build_db()

# set the source file of the ids of perturbed proteins and background set:
cast_analysis_set_to_bulbs_ids("/home/andrei/support/tmp/Chr_10.txt")
cast_background_set_to_bulbs_id(background_set_csv_location=None,
analysis_set_csv_location="/home/andrei/support/tmp/Chr_10.txt")
# # set the source file of the ids of perturbed proteins and background set:
# cast_analysis_set_to_bulbs_ids("/home/andrei/H202_70_Linhao_outliers.txt")
# cast_background_set_to_bulbs_id(
# background_set_csv_location=None,
# analysis_set_csv_location="/home/andrei/H202_70_Linhao_outliers.txt")

# get the bulbs ids oif the nodes we would like to analyze
source_bulbs_ids = get_source_bulbs_ids()
background_bulbs_ids = get_background_bulbs_ids()

# building the interactome interface object
local_matrix = InteractomeInterface(main_connex_only=True, full_impact=False)
local_matrix.full_rebuild()
# # building the interactome interface object
# local_matrix = InteractomeInterface(main_connex_only=True, full_impact=False)
# local_matrix.full_rebuild()

# perform the interactome analysis
interactome_analysis([source_bulbs_ids], desired_depth=24, processors=6,
background_list=background_bulbs_ids)
# TODO: make sure interactome analysis works as expected with background list
# # perform the interactome analysis
# interactome_analysis([source_bulbs_ids], desired_depth=24, processors=3,
# background_list=background_bulbs_ids, skip_sampling=True)

# building the annotome interface object for GO "biological process" type terms
# building the reference parameters set
_filter = ['biological_process']
ref_param_set = [_filter, background_bulbs_ids, (1, 1), True, 3]

# build the annotome interface
annot_matrix = AnnotomeInterface(*ref_param_set)
annot_matrix.full_rebuild()
# annot_matrix = AnnotomeInterface(*ref_param_set)
# annot_matrix.full_rebuild()

# perform the knowledge analysis
knowledge_analysis([source_bulbs_ids], desired_depth=24, processors=6, param_set=ref_param_set)
knowledge_analysis([source_bulbs_ids], desired_depth=24, processors=3,
param_set=ref_param_set, skip_sampling=True)
15 changes: 9 additions & 6 deletions bioflow/annotation_network/knowledge_access_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,11 @@

log = get_logger(__name__)

plt.gcf().set_size_inches(20, 15)

_filter = ['biological_process']
_correlation_factors = (1, 1)
ref_param_set = [_filter, [], (1, 1), True, 3]


def get_go_interface_instance(param_set=ref_param_set):
"""
Generates a Matrix_Knowledge_DB interface for the use in the spawner. If
Expand Down Expand Up @@ -162,7 +161,8 @@ def show_correlations(
inf_sel = (go_interface_instance.calculate_informativity(selector[0]),
go_interface_instance.calculate_informativity(selector[1]))

plt.figure()
fig = plt.figure()
fig.set_size_inches(30, 20)

plt.subplot(331)
plt.title('current through nodes')
Expand Down Expand Up @@ -380,7 +380,7 @@ def compare_to_blank(
log.info('blank comparison: %s', curr_inf_conf.shape)
if not sparse_rounds:
group2avg_off_diag, _, mean_correlations, eigenvalue = perform_clustering(
go_interface_instance.UP2UP_voltages, cluster_no)
go_interface_instance.UP2UP_voltages, cluster_no, 'GO terms clustering')
else:
group2avg_off_diag = np.array([[(0, ), 0, 0]]*cluster_no)
mean_correlations = np.array([[0, 0]]*cluster_no)
Expand Down Expand Up @@ -417,11 +417,14 @@ def compare_to_blank(
go_interface_instance.GO2UP_Reachable_nodes.items()[:10])
# basically the second element below are the nodes that contribute to the information
# flow through the node that is considered as non-random

node_char_list = [
[int(GO_id), go_interface_instance.GO_Names[GO_id]] +
dict_system[GO_id] + r_nodes[go_node_ids == float(GO_id)].tolist() +
[list(set(go_interface_instance.GO2UP_Reachable_nodes[GO_id]).
intersection(set(go_interface_instance.analytic_uniprots)))]
[go_interface_instance.interactome_interface_instance.
bulbs_id_2_display_name[up_bulbs_id]
for up_bulbs_id in list(set(go_interface_instance.GO2UP_Reachable_nodes[GO_id]).
intersection(set(go_interface_instance.analytic_uniprots)))]
for GO_id in not_random_nodes]

return sorted(node_char_list, key=lambda x: x[5]), not_random_groups
Expand Down
11 changes: 5 additions & 6 deletions bioflow/main_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@
from bioflow.utils.general_utils import high_level_os_io as hl_os_io


dump_location = path.join(path.abspath(path.dirname(__file__)), 'dumps')

dump_location = path.join(path.abspath(path.dirname(__file__)), 'dumps')
output_location = path.join(path.abspath(os.path.expanduser('~')), 'outputs')

log_location = path.join(path.abspath(path.dirname(__file__)), 'logs')

hl_os_io.mkdir_recursive(dump_location)
hl_os_io.mkdir_recursive(output_location)
hl_os_io.mkdir_recursive(log_location)

Servers, Options, Sources, Predictions = parse_configs()

Expand Down Expand Up @@ -60,7 +62,6 @@ class Dumps(object):
between the graph DB and the mapping matrix holders
"""
prefix = dump_location
hl_os_io.mkdir_recursive(prefix)

prefix_2 = Sources['INTERNAL']['dumpprefix']
postfix = '.dump'
Expand Down Expand Up @@ -107,9 +108,7 @@ class Outputs(object):
"""
Defines the locations to output actual results
"""
prefix = path.join(path.abspath(
path.join(path.dirname(__file__), os.pardir)), 'outputs')
hl_os_io.mkdir_recursive(prefix)
prefix = output_location

GO_GDF_output = prefix + '/GO_Analysis_output.gdf'
Interactome_GDF_output = prefix + '/Interactome_Analysis_output.gdf'
Expand Down
7 changes: 3 additions & 4 deletions bioflow/molecular_network/interactome_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@

log = get_logger(__name__)

plt.gcf().set_size_inches(20, 15)


# TODO: factor that into the "retrieve" routine of the laplacian wrapper
def get_interactome_interface():
Expand Down Expand Up @@ -145,7 +143,8 @@ def show_test_statistics(
:param sparse: True if we are showing test statistics of a sparse kernel run
:return:
"""
plt.figure()
fig = plt.figure()
fig.set_size_inches(30, 20)

plt.subplot(331)
plt.title('current through nodes')
Expand Down Expand Up @@ -321,7 +320,7 @@ def compare_to_blank(

if not sparse_rounds:
group2avg_offdiag, _, mean_correlations, eigenvalue = perform_clustering(
interactome_interface_instance.UP2UP_voltages, cluster_no)
interactome_interface_instance.UP2UP_voltages, cluster_no, 'Interactome clustering')

else:
group2avg_offdiag = np.array([[(0, ), 0, 0]]*cluster_no)
Expand Down
6 changes: 3 additions & 3 deletions bioflow/utils/dataviz.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import numpy as np
from scipy import histogram2d
from scipy.stats import gaussian_kde
from sys import path
import os
from bioflow.main_configs import output_location


Expand Down Expand Up @@ -138,11 +138,11 @@ def render_2d_matrix(matrix, name):
:param name:
:return:
"""
print name
plt.title(name)
plt.imshow(matrix, interpolation='nearest')
plt.colorbar()
plt.savefig(path.join(output_location, name+'.png'))
plt.show()
plt.savefig(os.path.join(output_location, name+'.png'))


if __name__ == "__main__":
Expand Down

0 comments on commit 1dabcdf

Please sign in to comment.