In [153]:
import pybiopax
from pybiopax.biopax import BioPaxObject
from pybiopax.paths import find_objects
from typing import List, Tuple
import networkx as nx
import matplotlib.pyplot as plt
from pyvis import network as net
from IPython.display import HTML
from tabulate import tabulate
from networkx.drawing.nx_pydot import graphviz_layout
from collections import Counter
from ontobio.rdfgen.gocamgen.subgraphs import AnnotationSubgraph
import pprint
from base import relations, find_relation_meta


In [13]:
biopax_file = './resources/test_biopax/R-HSA-204174_level3.owl'
biopax_file = './resources/test_biopax/R-HSA-70171_level3.owl'
biopax_file = './resources/test_biopax/LIPASYN-PWY-1.owl'
model = pybiopax.model_from_owl_file(biopax_file, encoding="utf8")

Processing OWL elements:   0%|          | 0.00/15.0 [00:00<?, ?it/s]

### Check the most common objects and take note on Pathway

In [54]:
type_counter = Counter(
    obj.__class__.__name__
    for obj in model.objects.values()
)

print(f"Model has {sum(type_counter.values()):,} objects")

HTML(tabulate(type_counter.most_common(), tablefmt="html", headers=["Type", "Count"]))

Model has 159 objects


Type,Count
UnificationXref,90
RelationshipXref,12
SmallMoleculeReference,9
ChemicalStructure,9
Stoichiometry,9
SmallMolecule,9
BiochemicalReaction,3
BiochemicalPathwayStep,3
Catalysis,3
Protein,3


In [15]:
class GoCAM (AnnotationSubgraph):
    def __init__(self):
        self.bp_node = None
        self.mf_map = {}
        super().__init__()

    def process_pathway(self, pathway):        
            
        if pathway.xref:
            self.process_bp(pathway.xref)

        if pathway.pathway_order:
            self.process_step_processes(pathway.pathway_order, pathway)
        
        if pathway.pathway_component:
            self.process_components(pathway.pathway_component, pathway)

    
    def process_step_processes(self, step_processes, pathway):
        for obj in step_processes:
            pc = model.objects[obj.uid]
            for sp in pc.step_process:
                if isinstance(sp, pybiopax.biopax.Catalysis) and sp.control_type == 'ACTIVATION':
                    self.process_mf(sp.xref, model.objects[sp.uid])
   
    
    def process_components(self, components, pathway):
        for obj in components:
            mf = self.mf_map.get(obj.uid, None)

            if mf is None:
                continue

            mf = self.add_instance_of_class(mf, is_anchor=True)

            if self.bp_node is not None:
                self.add_edge(mf, relations['part_of'], self.bp_node)

            gp = self.add_instance_of_class(obj.display_name)
            self.add_edge(mf, relations['enabled_by'], gp)

            pc = model.objects[obj.uid]

            if pc.left:
                self.process_mols(pc.left, relations['has_input'], mf)

            if pc.right:
                self.process_mols(pc.right, relations['has_output'], mf)
                
                                        
    def process_mf(self, xrefs, catalysis:pybiopax.biopax.Catalysis)    : 
        for xref in xrefs:
            if xref.db == 'GENE ONTOLOGY':
                self.mf_map[catalysis.controlled.uid] = xref.id
                
                
    def process_bp(self, xrefs): 
        for xref in xrefs:
            if xref.db == 'GENE ONTOLOGY':
                bp = self.add_instance_of_class(xref.id)
                self.bp_node = bp
                

    def process_mols(self, mols, relation, mf):
        for mol in mols:
            sm_node = self.add_instance_of_class(mol.display_name)
            self.add_edge(mf, relation, sm_node)
            
    
    def process_cc(self, xrefs): 
        pass
    
     
gocam = GoCAM()
for pathway in model.get_objects_by_type(pybiopax.biopax.Pathway):
    gocam.process_pathway(pathway)


In [17]:
g = net.Network(notebook=True, directed=True)

g.from_nx(gocam)
for edge in g.edges:
    edge['color'] = find_relation_meta(edge['relation'])['color']
display(g.show('nx.html', notebook=True))

nx.html


In [42]:
# Yeast

gocam = AnnotationSubgraph()
for pathway in model.get_objects_by_type(pybiopax.biopax.Pathway):
    name = pathway
   
    print(name.uid)  
        
    for pc in pathway.pathway_component:
        pprint.pp('%s -> %s' % (pc.uid, '-'))
        if isinstance(pc, pybiopax.biopax.Catalysis):
            ctrl = pc.controlled
            print('\t%s -> %s' % (ctrl.uid, '--'))
            for stoich in ctrl.participant_stoichiometry:
                print('\t\t%s -> %s' % (stoich.uid, '---'))
                stoich_obj = model.objects[stoich.uid]   
                #pprint.pp(dir(stoich))
                           
       
        
   

Pathway26234
'Catalysis26386 -> -'
	BiochemicalReaction26361 -> --
		Stoichiometry26314 -> ---
		Stoichiometry26385 -> ---
		Stoichiometry26339 -> ---
		Stoichiometry26267 -> ---
		Stoichiometry26370 -> ---
'BiochemicalReaction26361 -> -'
'Catalysis26354 -> -'
	BiochemicalReaction26323 -> --
		Stoichiometry26314 -> ---
		Stoichiometry26353 -> ---
		Stoichiometry26339 -> ---
		Stoichiometry26267 -> ---
		Stoichiometry26253 -> ---
'BiochemicalReaction26323 -> -'
'BiochemicalReaction26238 -> -'
'Catalysis26315 -> -'
	BiochemicalReaction26238 -> --
		Stoichiometry26314 -> ---
		Stoichiometry26301 -> ---
		Stoichiometry26283 -> ---
		Stoichiometry26267 -> ---
		Stoichiometry26253 -> ---


In [207]:
pathway = model.objects['Pathway26234']
objs = find_objects(pathway, 'pathway_component/controlled/participant_stoichiometry/physical_entity/entity_reference/xref/')
for obj in objs:
    if obj.db == 'YeastCyc':
        print('%s -> %s' % (obj.db, obj.id))
    if obj.db == 'ChEBI':
        print('%s -> %s' % (obj.db, obj.id))

YeastCyc -> PROTON
ChEBI -> CHEBI:15378
YeastCyc -> INOSITOL-1-4-5-TRISPHOSPHATE
ChEBI -> CHEBI:203600
YeastCyc -> DIACYLGLYCEROL
ChEBI -> CHEBI:17815
YeastCyc -> WATER
ChEBI -> CHEBI:15377
YeastCyc -> PHOSPHATIDYL-MYO-INOSITOL-45-BISPHOSPHA
YeastCyc -> PROTON
ChEBI -> CHEBI:15378
YeastCyc -> PHOSPHORYL-CHOLINE
ChEBI -> CHEBI:295975
YeastCyc -> DIACYLGLYCEROL
ChEBI -> CHEBI:17815
YeastCyc -> WATER
ChEBI -> CHEBI:15377
YeastCyc -> PHOSPHATIDYLCHOLINE
ChEBI -> CHEBI:16110
YeastCyc -> PROTON
ChEBI -> CHEBI:15378
YeastCyc -> CHOLINE
ChEBI -> CHEBI:15354
YeastCyc -> L-PHOSPHATIDATE
ChEBI -> CHEBI:29089
YeastCyc -> WATER
ChEBI -> CHEBI:15377
YeastCyc -> PHOSPHATIDYLCHOLINE
ChEBI -> CHEBI:16110


In [179]:
def find_path(source_obj: BioPaxObject, target_obj: BioPaxObject) ->  List[BioPaxObject]:
    if source_obj is target_obj:
        return [source_obj.uid]

    for attr_name in dir(source_obj):
        attr_value = getattr(source_obj, attr_name)
        if attr_value is target_obj:
            return [source_obj.uid, target_obj.uid]
        if isinstance(attr_value, list):
            for obj in attr_value:
                if isinstance(obj, BioPaxObject):
                    path = find_path(obj, target_obj)
                    if path:
                        return [source_obj.uid] + path
        if isinstance(attr_value, BioPaxObject):
            path = find_path(attr_value, target_obj)
            if path:
                return [source_obj.uid] + path

    return []

In [196]:
def find_path_components(source_obj: BioPaxObject, target_obj: BioPaxObject) -> List[Tuple[BioPaxObject, str]]:
  
    if source_obj is target_obj:
        return [(source_obj.uid, '--')]

    for attr_name in dir(source_obj):
        attr_value = getattr(source_obj, attr_name)
        if attr_value is target_obj:
            return [(source_obj.uid, attr_name), (target_obj.uid, attr_name)]
        if isinstance(attr_value, list):
            for obj in attr_value:
                if isinstance(obj, BioPaxObject):
                    path = find_path_components(obj, target_obj)
                    if path:
                        return [(source_obj.uid, attr_name)] + path
        if isinstance(attr_value, BioPaxObject):
            path = find_path_components(attr_value, target_obj)
            if path:
                return [(attr_value.uid, attr_name)] + path

    return []


In [197]:
source_obj = model.objects['Pathway26234']
target_obj = model.objects['UnificationXref26241']

path = find_path_components(source_obj, target_obj)

if path:
    for obj in path:
        print(obj)
else:
    print("Path not found.")
    

('Pathway26234', 'pathway_component')
('BiochemicalReaction26361', 'controlled')
('BiochemicalReaction26361', 'left')
('CellularLocationVocabulary26240', 'cellular_location')
('CellularLocationVocabulary26240', 'xref')
('UnificationXref26241', '--')


In [208]:
for uni in model.get_objects_by_type(pybiopax.biopax.UnificationXref):
    if uni.db=='GO':
        print(uni.uid, uni.id)

UnificationXref26241 GO:0005829


In [150]:
gocam = AnnotationSubgraph()
for pathway in model.get_objects_by_type(pybiopax.biopax.Pathway):
    name = pathway
   
    print(name.uid)
    for obj in pathway.pathway_order:
        pc = model.objects[obj.uid]   
        for sp in pc.step_process:            
            if isinstance (sp, pybiopax.biopax.Catalysis) and sp.control_type == 'ACTIVATION':
                catalysis = model.objects[sp.uid]     
                for xref in sp.xref:
                    if(xref.db == 'GENE ONTOLOGY'):                        
                        print('\t %s: %s --> %s' % (catalysis.controlled.uid, 'MF', xref.id))
        
    for obj in pathway.pathway_component:
        pprint.pp('%s -> %s' % (obj.uid, obj.display_name))
        pc = model.objects[obj.uid]
        
        pprint.pp(dir(pc))
                   
        for l in pc.left or []:
            print('\t %s: %s -> %s' % ('L', l.uid,  l.display_name))
            
        for r in pc.right or []:
            print('\t %s: %s -> %s' % ('R', r.uid,  r.display_name))
        


Pathway26234
'Catalysis26386 -> None'
['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_controlled_of',
 '_name',
 '_participant_of',
 '_pathway_component_of',
 '_simple_to_xml',
 '_step_process_of',
 'availability',
 'catalysis_direction',
 'cofactor',
 'comment',
 'control_type',
 'controlled',
 'controlled_of',
 'controller',
 'data_source',
 'display_name',
 'evidence',
 'from_xml',
 'get_plain_names',
 'interaction_type',
 'list_types',
 'name',
 'participant',
 'participant_of',
 'pathway_component_of',
 'standard_name',
 'step_process_of',
 'to_xml',
 'uid',
 'xml_types',
 'xref']


AttributeError: 'Catalysis' object has no attribute 'left'