Skip to content

Commit

Permalink
Converted the output of the non-random nodes to .tsv compatible form
Browse files Browse the repository at this point in the history
  • Loading branch information
chiffa committed Jan 18, 2016
1 parent 0fb5cec commit 02e8395
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 93 deletions.
2 changes: 0 additions & 2 deletions bioflow/algorithms_bank/conduction_routines.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,8 +348,6 @@ def perform_clustering(inter_node_tension, cluster_number, show=True):
relations_matrix[local_index[UP1], local_index[UP1]] += 1.0 / tension

# underlying method is spectral clustering: do we really lie in a good zone for that?
print relations_matrix.shape
print cluster_number
groups = cluster_nodes(relations_matrix, cluster_number)


Expand Down
1 change: 0 additions & 1 deletion bioflow/annotation_network/BioKnowledgeInterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,6 @@ def full_rebuild(self):

log.info('Finished rebuilding the GO Interface object %s', self.pretty_time())


def load(self):
"""
loads itself from the saved dumps, in case the Filtering system is the same
Expand Down
79 changes: 33 additions & 46 deletions bioflow/annotation_network/knowledge_access_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

log = get_logger(__name__)

plt.gcf().set_size_inches(20, 15)

_filter = ['biological_process']
_correlation_factors = (1, 1)
Expand Down Expand Up @@ -297,7 +298,6 @@ def show_correlations(
def compare_to_blank(
blank_model_size,
zoom_range_selector,
real_knowledge_interface=None,
p_val=0.05,
sparse_rounds=False,
cluster_no=3,
Expand All @@ -309,7 +309,6 @@ def compare_to_blank(
:param blank_model_size: the number of uniprots in the blanc model
:param zoom_range_selector: tuple representing the coverage range for which we would want
to see the histogram of current distributions
:param real_knowledge_interface: The GO_Interface that has run the current computation
:param p_val: desired p_value for the returned terms
:param sparse_rounds: if set to a number, sparse computation technique would be used with
the number of rounds equal to the number
Expand Down Expand Up @@ -369,29 +368,23 @@ def compare_to_blank(
final = np.concatenate(tuple(curr_inf_conf_general), axis=1)
final_mean_correlations = np.concatenate(tuple(mean_correlation_accumulator), axis=0).T
final_eigenvalues = np.concatenate(tuple(eigenvalues_accumulator), axis=0).T
curr_inf_conf = None
mean_correlations = None
eigenvalue = None
group2avg_off_diag = None
go_node_ids = None
dict_system = None
if real_knowledge_interface:
node_currents = real_knowledge_interface.node_current
dict_system = go_interface_instance.format_node_props(node_currents)
curr_inf_conf_tot = np.array(
[[int(key)] + list(val) for key, val in dict_system.iteritems()]).T
go_node_ids, curr_inf_conf = (
curr_inf_conf_tot[
0, :], curr_inf_conf_tot[
(1, 2, 3), :])
log.info('blank comparison: %s', curr_inf_conf.shape)
if not sparse_rounds:
group2avg_off_diag, _, mean_correlations, eigenvalue = perform_clustering(
real_knowledge_interface.UP2UP_voltages, cluster_no)
else:
group2avg_off_diag = np.array([[(0, ), 0, 0]]*cluster_no)
mean_correlations = np.array([[0, 0]]*cluster_no)
eigenvalue = np.array([-1]*cluster_no)

node_currents = go_interface_instance.node_current
dict_system = go_interface_instance.format_node_props(node_currents)
curr_inf_conf_tot = np.array(
[[int(key)] + list(val) for key, val in dict_system.iteritems()]).T
go_node_ids, curr_inf_conf = (
curr_inf_conf_tot[
0, :], curr_inf_conf_tot[
(1, 2, 3), :])
log.info('blank comparison: %s', curr_inf_conf.shape)
if not sparse_rounds:
group2avg_off_diag, _, mean_correlations, eigenvalue = perform_clustering(
go_interface_instance.UP2UP_voltages, cluster_no)
else:
group2avg_off_diag = np.array([[(0, ), 0, 0]]*cluster_no)
mean_correlations = np.array([[0, 0]]*cluster_no)
eigenvalue = np.array([-1]*cluster_no)

log.info('stats on %s samples', count)

Expand All @@ -402,16 +395,12 @@ def compare_to_blank(
zoom_range_selector, curr_inf_conf, mean_correlations.T, eigenvalue.T, count,
sparse=sparse_rounds)

go_node_char = namedtuple(
'Node_Char', [
'current', 'informativity', 'confusion_potential', 'p_value'])
group_char = namedtuple(
'Group_Char', [
'UPs', 'num_UPs', 'average_connection', 'p_value'])

if r_nodes is not None:
not_random_nodes = [str(int(GO_id))
for GO_id in go_node_ids[r_nodes < p_val].tolist()]
not_random_nodes = [GO_id for GO_id in go_node_ids[r_nodes < p_val].tolist()]

if not sparse_rounds:
not_random_groups = np.concatenate(
Expand All @@ -423,21 +412,19 @@ def compare_to_blank(
else:
not_random_groups = []

print 'debug, not random nodes', not_random_nodes
print 'debug bulbs_id_disp_name', \
go_interface_instance.GO2UP_Reachable_nodes.items()[:10]

log.debug('not random nodes: %s', not_random_nodes)
log.debug('bulbs_id_disp_name: %s',
go_interface_instance.GO2UP_Reachable_nodes.items()[:10])
# basically the second element below are the nodes that contribute to the information
# flow through the node that is considered as non-random
dct = dict((GO_id,
tuple([go_node_char(*(dict_system[GO_id] +
r_nodes[go_node_ids == float(GO_id)].tolist())),
list(set(go_interface_instance.GO2UP_Reachable_nodes[GO_id]
).intersection(set(real_knowledge_interface.analytic_Uniprots
)))]))
for GO_id in not_random_nodes)
node_char_list = [
[int(GO_id), go_interface_instance.GO_Names[GO_id]] +
dict_system[GO_id] + r_nodes[go_node_ids == float(GO_id)].tolist() +
[list(set(go_interface_instance.GO2UP_Reachable_nodes[GO_id]).
intersection(set(go_interface_instance.analytic_uniprots)))]
for GO_id in not_random_nodes]

return sorted(dct.iteritems(), key=lambda x: x[1][0][3]), not_random_groups
return sorted(node_char_list, key=lambda x: x[5]), not_random_groups

return None, None

Expand Down Expand Up @@ -518,7 +505,6 @@ def auto_analyze(source=None, go_interface_instance=None, processors=3, desired_
nr_nodes, nr_groups = compare_to_blank(
len(go_interface_instance.analytic_uniprots),
[1100, 1300],
go_interface_instance,
p_val=0.9,
go_interface_instance=go_interface_instance,
param_set=param_set)
Expand All @@ -544,17 +530,18 @@ def auto_analyze(source=None, go_interface_instance=None, processors=3, desired_
nr_nodes, nr_groups = compare_to_blank(
len(go_interface_instance.analytic_uniprots),
[1100, 1300],
go_interface_instance,
p_val=0.9, sparse_rounds=sampling_depth,
go_interface_instance=go_interface_instance,
param_set=param_set)

go_interface_instance.export_conduction_system()

for group in nr_groups:
print group
log.info(group)
log.info('\t NodeID \t Name \t current \t informativity \t confusion_potential \t p_val \t '
'UP_list')
for node in nr_nodes:
print node
log.info('\t %s \t %s \t %s \t %s \t %s \t %s \t %s', *node)


if __name__ == "__main__":
Expand Down
74 changes: 30 additions & 44 deletions bioflow/molecular_network/interactome_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

log = get_logger(__name__)

plt.gcf().set_size_inches(20, 15)


# TODO: factor that into the "retrieve" routine of the laplacian wrapper
def get_interactome_interface():
Expand Down Expand Up @@ -88,7 +90,7 @@ def spawn_sampler_pool(
sparse_rounds,
chromosome_specific,
interactome_interface_instance)]
print payload
log.debug('spawning the sampler with payload %s', payload)
process_pool.map(spawn_sampler, payload * pool_size)


Expand Down Expand Up @@ -249,7 +251,6 @@ def show_test_statistics(
def compare_to_blank(
blank_model_size,
zoom_range_selector,
real_interactome_interface=None,
p_val=0.05,
sparse_rounds=False,
cluster_no=3,
Expand All @@ -260,8 +261,6 @@ def compare_to_blank(
:param blank_model_size: the number of uniprots in the blank model
:param zoom_range_selector: tuple representing the coverage range for which we would
want to see the histogram of current distributions
:param real_interactome_interface: The interactome_Interface that has run the current
computation
:param p_val: desired p_value for the returned terms
:param sparse_rounds: if set to a number, sparse computation technique would be used
with the number of rounds equal the integer value of that argument
Expand Down Expand Up @@ -312,28 +311,22 @@ def compare_to_blank(
final = np.concatenate(tuple(curr_inf_conf_general), axis=1)
final_mean_correlations = np.concatenate(tuple(mean_correlation_accumulator), axis=0).T
final_eigenvalues = np.concatenate(tuple(eigenvalues_accumulator), axis=0).T
curr_inf_conf = None
mean_correlations = None
eigenvalue = None
group2avg_offdiag = None
node_ids = None
dictionary_system = None
if real_interactome_interface:
node_currents = real_interactome_interface.node_current
dictionary_system = interactome_interface_instance.format_node_props(node_currents)
curr_inf_conf_tot = np.array(
[[int(key)] + list(val) for key, val in dictionary_system.iteritems()]).T
node_ids, curr_inf_conf = (curr_inf_conf_tot[0, :],
curr_inf_conf_tot[(1, 2), :])

if not sparse_rounds:
group2avg_offdiag, _, mean_correlations, eigenvalue = perform_clustering(
real_interactome_interface.UP2UP_voltages, cluster_no)
node_currents = interactome_interface_instance.node_current
dictionary_system = interactome_interface_instance.format_node_props(node_currents)
curr_inf_conf_tot = np.array(
[[int(key)] + list(val) for key, val in dictionary_system.iteritems()]).T
node_ids, curr_inf_conf = (curr_inf_conf_tot[0, :],
curr_inf_conf_tot[(1, 2), :])

else:
group2avg_offdiag = np.array([[(0, ), 0, 0]]*cluster_no)
mean_correlations = np.array([[0, 0]]*cluster_no)
eigenvalue = np.array([-1]*cluster_no)
if not sparse_rounds:
group2avg_offdiag, _, mean_correlations, eigenvalue = perform_clustering(
interactome_interface_instance.UP2UP_voltages, cluster_no)

else:
group2avg_offdiag = np.array([[(0, ), 0, 0]]*cluster_no)
mean_correlations = np.array([[0, 0]]*cluster_no)
eigenvalue = np.array([-1]*cluster_no)

log.info("stats on %s samples" % count)

Expand All @@ -344,8 +337,6 @@ def compare_to_blank(
zoom_range_selector, curr_inf_conf, mean_correlations.T,
eigenvalue.T, count, sparse_rounds)

interactome_node_char = namedtuple(
'Node_Char', ['name', 'current', 'degree', 'p_value'])
group_char = namedtuple(
'Group_Char', [
'UPs', 'num_UPs', 'average_connection', 'p_value'])
Expand All @@ -366,21 +357,17 @@ def compare_to_blank(
# basically the second element below are the nodes that contribute to the
# information flow through the node that is considered as non-random

print 'debug, not random nodes', not_random_nodes
print 'debug bulbs_id_disp_name', \
interactome_interface_instance.bulbs_id_2_display_name.items()[:10]
log.debug('debug, not random nodes: %s', not_random_nodes)
log.debug('debug bulbs_id_disp_name: %s',
interactome_interface_instance.bulbs_id_2_display_name.items()[:10])

dct = dict(
(nr_node_id,
interactome_node_char(
interactome_interface_instance.bulbs_id_2_display_name[nr_node_id],
*
(dictionary_system[nr_node_id] +
r_nodes[node_ids == float(nr_node_id)].tolist())))
for nr_node_id in not_random_nodes)
node_char_list = [
[int(nr_node_id),
interactome_interface_instance.bulbs_id_2_display_name[nr_node_id]] +
dictionary_system[nr_node_id] + r_nodes[node_ids == float(nr_node_id)].tolist()
for nr_node_id in not_random_nodes]

return sorted(dct.iteritems(), key=lambda x: x[1][3]),\
not_random_groups
return sorted(node_char_list, key=lambda x: x[4]), not_random_groups

return None, None

Expand All @@ -406,7 +393,7 @@ def auto_analyze(source_list, desired_depth=24, processors=4, background_list=No
desired_depth = desired_depth / processors

for _list in source_list:
log.info('Auto analyzing list of interest: %s %s', len(_list), _list)
log.info('Auto analyzing list of interest: %s', len(_list))
interactome_interface = get_interactome_interface()
log.debug("retrieved interactome_interface instance e_p_u_b_i length: %s",
len(interactome_interface.entry_point_uniprots_bulbs_ids))
Expand Down Expand Up @@ -437,7 +424,6 @@ def auto_analyze(source_list, desired_depth=24, processors=4, background_list=No
nr_nodes, nr_groups = compare_to_blank(
len(interactome_interface.entry_point_uniprots_bulbs_ids),
[0.5, 0.6],
interactome_interface,
p_val=0.9, interactome_interface_instance=interactome_interface)
else:

Expand All @@ -463,15 +449,15 @@ def auto_analyze(source_list, desired_depth=24, processors=4, background_list=No
nr_nodes, nr_groups = compare_to_blank(
len(interactome_interface.entry_point_uniprots_bulbs_ids),
[0.5, 0.6],
interactome_interface,
p_val=0.9, sparse_rounds=sampling_depth,
interactome_interface_instance=interactome_interface)

interactome_interface.export_conduction_system()
for group in nr_groups:
print group
log.info(group)
log.info('\t Node_ID \t Name \t current \t connectedness \t p_value')
for node in nr_nodes:
print node
log.info('\t %s \t %s \t %s \t %s \t %s', *node)


if __name__ == "__main__":
Expand Down
4 changes: 4 additions & 0 deletions bioflow/utils/dataviz.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
import numpy as np
from scipy import histogram2d
from scipy.stats import gaussian_kde
from sys import path
from bioflow.main_configs import output_location



def better_2d_density_plot(x_data, y_data, threshold=3, bins=(100, 100)):
Expand Down Expand Up @@ -138,6 +141,7 @@ def render_2d_matrix(matrix, name):
plt.title(name)
plt.imshow(matrix, interpolation='nearest')
plt.colorbar()
plt.savefig(path.join(output_location, name+'.png'))
plt.show()


Expand Down
11 changes: 11 additions & 0 deletions bioflow/utils/log_behavior.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@ def add_handler(_logger, level, file_name):


def get_logger(logger_name):
"""
Returns a properly configured logger object
:param logger_name: name of the logger object
"""
_logger = logging.getLogger(logger_name)
_logger.setLevel(logging.DEBUG)

Expand All @@ -114,3 +118,10 @@ def get_logger(logger_name):
return _logger

logger = get_logger('this_logger_needs_to_be_renamed')


def clear_logs():
"""
Wipes the logs
"""
wipe_dir(log_location)

0 comments on commit 02e8395

Please sign in to comment.