In [1]:
import pybiopax
import networkx as nx
import matplotlib.pyplot as plt
from pyvis import network as net
from networkx.drawing.nx_pydot import graphviz_layout
from collections import Counter
from ontobio.rdfgen.gocamgen.subgraphs import AnnotationSubgraph
import pprint
from base import relations, find_relation_meta


In [14]:
biopax_file = './resources/test_biopax/R-HSA-204174_level3.owl'
#biopax_file = './resources/test_biopax/R-HSA-70171_level3.owl'
model = pybiopax.model_from_owl_file(biopax_file, encoding="utf8")

Processing OWL elements:   0%|          | 0.00/423 [00:00<?, ?it/s]

### Check the most common objects and take note on Pathway

In [15]:
stats = Counter([obj.__class__.__name__  for uid, obj in model.objects.items()]).most_common(100)
stats

[('UnificationXref', 160),
 ('Stoichiometry', 37),
 ('SequenceSite', 34),
 ('RelationshipXref', 25),
 ('PublicationXref', 24),
 ('Protein', 18),
 ('ProteinReference', 16),
 ('FragmentFeature', 16),
 ('SequenceInterval', 16),
 ('SmallMolecule', 15),
 ('SmallMoleculeReference', 15),
 ('Complex', 15),
 ('Control', 8),
 ('BiochemicalReaction', 4),
 ('RelationshipTypeVocabulary', 4),
 ('PathwayStep', 4),
 ('Catalysis', 3),
 ('CellularLocationVocabulary', 2),
 ('ModificationFeature', 2),
 ('Pathway', 1),
 ('Provenance', 1),
 ('BioSource', 1),
 ('SequenceModificationVocabulary', 1)]

In [16]:

class GoCAM (AnnotationSubgraph):
    def __init__(self):
        self.bp_node = None
        self.mf_map = {}
        super().__init__()

    def process_pathway(self, pathway):        
            
        if pathway.xref:
            self.process_bp(pathway.xref)

        if pathway.pathway_order:
            self.process_step_processes(pathway.pathway_order, pathway)
        
        if pathway.pathway_component:
            self.process_components(pathway.pathway_component, pathway)

    
    def process_step_processes(self, step_processes, pathway):
        for obj in step_processes:
            pc = model.objects[obj.uid]
            for sp in pc.step_process:
                if isinstance(sp, pybiopax.biopax.Catalysis) and sp.control_type == 'ACTIVATION':
                    self.process_mf(sp.xref, model.objects[sp.uid])
   
    
    def process_components(self, components, pathway):
        for obj in components:
            mf = self.mf_map.get(obj.uid, None)

            if mf is None:
                continue

            mf = self.add_instance_of_class(mf, is_anchor=True)

            if self.bp_node is not None:
                self.add_edge(mf, relations['part_of'], self.bp_node)

            gp = self.add_instance_of_class(obj.display_name)
            self.add_edge(mf, relations['enabled_by'], gp)

            pc = model.objects[obj.uid]

            if pc.left:
                self.process_mols(pc.left, relations['has_input'], mf)

            if pc.right:
                self.process_mols(pc.right, relations['has_output'], mf)
                
                                        
    def process_mf(self, xrefs, catalysis:pybiopax.biopax.Catalysis)    : 
        for xref in xrefs:
            if xref.db == 'GENE ONTOLOGY':
                self.mf_map[catalysis.controlled.uid] = xref.id
                
                
    def process_bp(self, xrefs): 
        for xref in xrefs:
            if xref.db == 'GENE ONTOLOGY':
                bp = self.add_instance_of_class(xref.id)
                self.bp_node = bp
                

    def process_mols(self, mols, relation, mf):
        for mol in mols:
            sm_node = self.add_instance_of_class(mol.display_name)
            self.add_edge(mf, relation, sm_node)
            
    
    def process_cc(self, xrefs): 
        pass
    
     
gocam = GoCAM()
for pathway in model.get_objects_by_type(pybiopax.biopax.Pathway):
    gocam.process_pathway(pathway)


In [17]:
g = net.Network(notebook=True, directed=True)

g.from_nx(gocam)
for edge in g.edges:
    edge['color'] = find_relation_meta(edge['relation'])['color']
g.show('nx.html', notebook=True)

nx.html


In [25]:
gocam = AnnotationSubgraph()
for pathway in model.get_objects_by_type(pybiopax.biopax.Pathway):
    name = pathway
   
    print(name.uid)
    for obj in pathway.pathway_order:
        pc = model.objects[obj.uid]   
        for sp in pc.step_process:            
            if isinstance (sp, pybiopax.biopax.Catalysis) and sp.control_type == 'ACTIVATION':
                catalysis = model.objects[sp.uid]     
                for xref in sp.xref:
                    if(xref.db == 'GENE ONTOLOGY'):                        
                        print('\t %s: %s --> %s' % (catalysis.controlled.uid, 'MF', xref.id))
        
    for obj in pathway.pathway_component:
        pprint.pp('%s -> %s' % (obj.uid, obj.display_name))
        pc = model.objects[obj.uid]
                   
        for l in pc.left:
            print('\t %s: %s -> %s' % ('L', l.uid,  l.display_name))
            
        for r in pc.right:
            print('\t %s: %s -> %s' % ('R', r.uid,  r.display_name))
        
   

Pathway1
	 BiochemicalReaction1: MF --> GO:0019120
	 BiochemicalReaction3: MF --> GO:0004740
	 BiochemicalReaction4: MF --> GO:0004741
'BiochemicalReaction1 -> GSTZ1 dimer dehalogenates DCA to glyoxylate'
	 L: SmallMolecule1 -> H2O
	 L: SmallMolecule2 -> DCA
	 R: SmallMolecule3 -> glyoxylate
	 R: SmallMolecule4 -> HCl
'BiochemicalReaction2 -> DCA binds PDK2'
	 L: Protein2 -> PDK2
	 L: SmallMolecule2 -> DCA
	 R: Complex2 -> DCA:PDK2
'BiochemicalReaction3 -> PDK isoforms phosphorylate lipo-PDH'
	 L: SmallMolecule6 -> ATP
	 L: Complex3 -> lipo-PDH
	 R: SmallMolecule9 -> ADP
	 R: Complex8 -> p-lipo-PDH
'BiochemicalReaction4 -> PDP dephosphorylates p-lipo-PDH'
	 L: Complex8 -> p-lipo-PDH
	 L: SmallMolecule1 -> H2O
	 R: Complex3 -> lipo-PDH
	 R: SmallMolecule14 -> Pi
