In [1]:
import sys
from optparse import OptionParser
import pickle as pkl

#from ilp import find_shortest_hyperpath_parallel 
#from pathheuristic import get_doubly_reachable_graph, tail_path_heuristic
#from cutfinder import convert_vertex_cuts_to_edge_cuts

from halp.directed_hypergraph import DirectedHypergraph

In [2]:
def make_hypergraph(file_prefix,delim=';',sep='\t',keep_singleton_nodes=False,target=None,select_edges=[]):
    hypernodes = {}
    with open(file_prefix+'-hypernodes.txt') as fin:
        for line in fin:
            if line[0] == '#':
                continue
            row = line.strip().split(sep)
            if len(row) == 1:
                hypernodes[row[0]] = ['OtherComplexes-FIX']
            else:
                hypernodes[row[0]] = row[1].split(delim)
    print(('%d hypernodes from hypernodes file' % (len(hypernodes))))
    identifier2id = {}
    id2identifier = {}
    H = DirectedHypergraph()
    if keep_singleton_nodes:
        for n in hypernodes:
            H.add_node(n)

    skipped1 = 0
    skipped2 = 0
    tailsizes = []
    headsizes = []
    selfloops = []
    noinselfloops = 0
    indegree = []
    outdegree = []
    numtargets = 0
    numsources = 0

    with open(file_prefix+'-hyperedges.txt') as fin:
        for line in fin:
            if line[0] == '#':
                continue
            #row = line.strip().split(sep)
            row = line.strip().split()
            tail = set()
            head = set()

            ## Tail includes tail and regulators.
            ## Head includes head.
            if row[0] != 'None' and row[0] != '':
                tail.update(row[0].split(delim))
            if row[1] != 'None' and row[1] != '':
                head.update(row[1].split(delim))
            if row[2] != 'None' and row[2] != '':
                tail.update(row[2].split(delim))
            #These are the negative regulators!
            #if row[3] != 'None':
                #tail.update(row[3].split(delim))
            hedge_id = row[4]

            ## THIS IS A HACK FOR NOW ( should be incorporated in the make-hypergraph.py code)
            ## IGnore any reactions that have a Reactome Identifier (e.g. has "HSA") instead of
            ## a PAthway Commons identifier.
            if any(['HSA' in s for s in tail]+['HSA' in s for s in head]):
                skipped1+=1
            elif len(tail)==0 or len(head)==0:
                skipped2+=1
            elif select_edges == [] or hedge_id in select_edges:
                hid = H.add_hyperedge(tail,head,identifier=hedge_id)
                tailsizes.append(len(tail))
                headsizes.append(len(head))
                intersection = tail.intersection(head)
                if len(intersection) > 0:
                    selfloops.append([v for v in intersection])

                identifier2id[hedge_id] = hid
                id2identifier[hid] = hedge_id

    print(('%d reactions skipped because of Reactome identifier' % (skipped1)))
    print(('%d reactions skipped because of an empty tail or head' % (skipped2)))
    ## annotate nodes
    num_hypernodes = 0
    for node in H.get_node_set():
        if node in hypernodes and hypernodes[node] != [node]:
            H.add_node(node,hypernode_members=hypernodes[node],is_hypernode=True)
            num_hypernodes+=1
        else:
            H.add_node(node,is_hypernode=False,hypernode_members=[])

        H.add_node(node)

    return H, identifier2id, id2identifier

In [3]:
H,_,__ = make_hypergraph('hypergraphs/allpid')

11451 hypernodes from hypernodes file
0 reactions skipped because of Reactome identifier
313 reactions skipped because of an empty tail or head


In [4]:
with open("allpid-halpHypergraph.pkl", 'wb') as f:
    pkl.dump(H, f)

In [5]:
H,_,__ = make_hypergraph('hypergraphs/allreactome')

48425 hypernodes from hypernodes file
60 reactions skipped because of Reactome identifier
0 reactions skipped because of an empty tail or head


In [6]:
with open("allreactome-halpHypergraph.pkl", 'wb') as f:
    pkl.dump(H, f)