# Export Pathway Info from Reactome

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from collections import defaultdict
from bioservices.kegg import KEGG

In [3]:
import sys

sys.path.append('../pals')

from common import load_json, save_json

## Common methods

In [4]:
from neo4j import GraphDatabase, basic_auth
NEO4J_SERVER='bolt://localhost:7687'
NEO4J_USER='neo4j'
NEO4J_PASSWORD='neo4j'
driver = GraphDatabase.driver(NEO4J_SERVER,
                              auth=basic_auth(NEO4J_USER, NEO4J_PASSWORD))

def get_neo4j_session():
    session = None
    try:
        session = driver.session()
    except Exception:
        raise
    return session

def rchop(thestring, ending):
    if thestring.endswith(ending):
        return thestring[:-len(ending)]
    return thestring

## Export Data from Reactome

In [5]:
def get_species_list():
    results = []
    try:
        session = get_neo4j_session()
        query = """
        MATCH (n:Species) RETURN n.displayName AS name order by name        
        """
        query_res = session.run(query)
        print(query)
        for record in query_res:
            results.append(record['name'])
    finally:
        if session is not None: session.close()
    return results

In [6]:
def get_pathway_dict(species, metabolic_pathway_only=True, leaf=True):
    results = {}
    try:
        session = get_neo4j_session()

        # initial match clause in the query
        query = """
            MATCH (tp:TopLevelPathway)-[:hasEvent*]->(p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent)
            WHERE
                tp.speciesName = {species} AND
        """
        
        if leaf: # retrieve only the leaf nodes in the pathway hierarchy
            query += " (p)-[:hasEvent]->(rle) AND "

        if metabolic_pathway_only: # only retrieves metabolic pathways
            query += " tp.displayName = 'Metabolism' AND "

        # remove last AND
        query = rchop(query.strip(), 'AND')

        # add return clause        
        query += """
            RETURN DISTINCT
                p.speciesName AS species_name,            
                p.displayName AS pathway_name,
                p.stId AS pathway_id                       
        """
        
        params = {
            'species': species
        }
        query_res = session.run(query, params)
        print(query)

        for record in query_res:
            pathway_name = record['pathway_name']
            pathway_id = record['pathway_id']
            results[pathway_id] = { 'display_name' : pathway_name }
    finally:
        if session is not None: session.close()
    return results

In [10]:
all_species = get_species_list()
all_species


        MATCH (n:Species) RETURN n.displayName AS name order by name        
        


['Alphapapillomavirus 9',
 'Arabidopsis thaliana',
 'Arenicola marina',
 'Bacillus anthracis',
 'Bos taurus',
 'Caenorhabditis elegans',
 'Candida albicans',
 'Canis familiaris',
 'Cavia porcellus',
 'Cercopithecus aethiops',
 'Chlamydia trachomatis',
 'Chlorocebus sabaeus',
 'Clostridium botulinum',
 'Clostridium tetani',
 'Corynephage beta',
 'Cowpox virus',
 'Cricetulus griseus',
 'Crithidia fasciculata',
 'Danio rerio',
 'Dictyostelium discoideum',
 'Drosophila melanogaster',
 'Escherichia coli',
 'Felis catus',
 'Gallus gallus',
 'Hepatitis B virus',
 'Hepatitis C Virus',
 'Hepatitis C virus genotype 2a',
 'Hepatitis C virus subtype 1a',
 'Homarus americanus',
 'Homo sapiens',
 'Human alphaherpesvirus 2',
 'Human cytomegalovirus',
 'Human gammaherpesvirus 4',
 'Human herpesvirus 1',
 'Human herpesvirus 8',
 'Human immunodeficiency virus 1',
 'Human papillomavirus type 16',
 'Infectious bronchitis virus',
 'Influenza A virus',
 'Legionella pneumophila',
 'Listeria monocytogenes',
 

### Export KEGG Data

In [7]:
def get_kegg_mapping_dict(species, metabolic_pathway_only=True, leaf=True):
    results = defaultdict(list)
    try:
        session = get_neo4j_session()

        # initial match clause in the query
        query = """
        MATCH (tp:TopLevelPathway)-[:hasEvent*]->
              (p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent),
              (rle)-[:input|output|catalystActivity|physicalEntity|regulatedBy|regulator|hasComponent
              |hasMember|hasCandidate*]->(pe:PhysicalEntity),
              (pe:PhysicalEntity)-[:crossReference]->(di:DatabaseIdentifier)<-[:crossReference]-(rm:ReferenceMolecule)
        WHERE
              tp.speciesName = {species} AND
              di.databaseName = 'COMPOUND' AND
        """
        
        if leaf: # retrieve only the leaf nodes in the pathway hierarchy
            query += " (p)-[:hasEvent]->(rle) AND "

        if metabolic_pathway_only: # only retrieves metabolic pathways
            query += " tp.displayName = 'Metabolism' AND "

        # remove last AND
        query = rchop(query.strip(), 'AND')

        # add return clause        
        query += """
            RETURN DISTINCT
                p.stId AS pathway_id,
                di.identifier AS entity_id
        """
        
        params = {
            'species': species
        }
        query_res = session.run(query, params)
        print(query)

        i = 0
        for record in query_res:
            pathway_id = record['pathway_id']
            entity_id = record['entity_id']
            results[entity_id].append(pathway_id)
    finally:
        if session is not None: session.close()
    return dict(results)

Get KEGG compound dict from downloaded JSON file

In [8]:
# json_file = '../pals/data/all_kegg_compounds.json.test.zip'
# all_kegg_compounds = load_json(json_file, compressed=True)
# entity_dict = {}
# compound_info = all_kegg_compounds['cmpd_info']
# for compound_id in compound_info:
#     try:
#         name = compound_info[compound_id]['NAME'][0]        
#         formula = compound_info[compound_id]['FORMULA']
#         entity_dict[compound_id] = {
#             'unique_id': formula,
#             'display_name': name
#         }
#     except:
#         pass

Get KEGG compound dict from exported PiMP data

In [9]:
json_file = '../pals/data/PiMP_KEGG.json'
all_kegg_compounds = load_json(json_file, compressed=False)
entity_dict = all_kegg_compounds['entity_dict']

In [11]:
metabolic_pathway_only = True
for species in all_species:
    print('Processing', species)
    pathway_dict = get_pathway_dict(species, metabolic_pathway_only)
    mapping_dict = get_kegg_mapping_dict(species, metabolic_pathway_only)
    if len(pathway_dict) > 0:
        data = {
            'pathway_dict': pathway_dict,
            'entity_dict': entity_dict,
            'mapping_dict': mapping_dict
        }
        json_file = '../pals/data/reactome/metabolic_pathways/KEGG/%s.json.zip' % (species)
        save_json(data, json_file, compressed=True)

Processing Alphapapillomavirus 9
MATCH (tp:TopLevelPathway)-[:hasEvent*]->(p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent)
            WHERE
                tp.speciesName = {species} AND
         (p)-[:hasEvent]->(rle) AND  tp.displayName = 'Metabolism' 
            RETURN DISTINCT
                p.speciesName AS species_name,            
                p.displayName AS pathway_name,
                p.stId AS pathway_id                       
        
MATCH (tp:TopLevelPathway)-[:hasEvent*]->
              (p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent),
              (rle)-[:input|output|catalystActivity|physicalEntity|regulatedBy|regulator|hasComponent
              |hasMember|hasCandidate*]->(pe:PhysicalEntity),
              (pe:PhysicalEntity)-[:crossReference]->(di:DatabaseIdentifier)<-[:crossReference]-(rm:ReferenceMolecule)
        WHERE
              tp.speciesName = {species} AND
              di.databaseName = 'COMPOUND' AND
         (p)-[:hasEvent]->(rle) AND  tp.disp

        
MATCH (tp:TopLevelPathway)-[:hasEvent*]->
              (p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent),
              (rle)-[:input|output|catalystActivity|physicalEntity|regulatedBy|regulator|hasComponent
              |hasMember|hasCandidate*]->(pe:PhysicalEntity),
              (pe:PhysicalEntity)-[:crossReference]->(di:DatabaseIdentifier)<-[:crossReference]-(rm:ReferenceMolecule)
        WHERE
              tp.speciesName = {species} AND
              di.databaseName = 'COMPOUND' AND
         (p)-[:hasEvent]->(rle) AND  tp.displayName = 'Metabolism' 
            RETURN DISTINCT
                p.stId AS pathway_id,
                di.identifier AS entity_id
        
Processing Cavia porcellus
MATCH (tp:TopLevelPathway)-[:hasEvent*]->(p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent)
            WHERE
                tp.speciesName = {species} AND
         (p)-[:hasEvent]->(rle) AND  tp.displayName = 'Metabolism' 
            RETURN DISTINCT
                p.speciesName 

        
Processing Cowpox virus
MATCH (tp:TopLevelPathway)-[:hasEvent*]->(p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent)
            WHERE
                tp.speciesName = {species} AND
         (p)-[:hasEvent]->(rle) AND  tp.displayName = 'Metabolism' 
            RETURN DISTINCT
                p.speciesName AS species_name,            
                p.displayName AS pathway_name,
                p.stId AS pathway_id                       
        
MATCH (tp:TopLevelPathway)-[:hasEvent*]->
              (p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent),
              (rle)-[:input|output|catalystActivity|physicalEntity|regulatedBy|regulator|hasComponent
              |hasMember|hasCandidate*]->(pe:PhysicalEntity),
              (pe:PhysicalEntity)-[:crossReference]->(di:DatabaseIdentifier)<-[:crossReference]-(rm:ReferenceMolecule)
        WHERE
              tp.speciesName = {species} AND
              di.databaseName = 'COMPOUND' AND
         (p)-[:hasEvent]->(rle) AND  tp.disp

        
MATCH (tp:TopLevelPathway)-[:hasEvent*]->
              (p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent),
              (rle)-[:input|output|catalystActivity|physicalEntity|regulatedBy|regulator|hasComponent
              |hasMember|hasCandidate*]->(pe:PhysicalEntity),
              (pe:PhysicalEntity)-[:crossReference]->(di:DatabaseIdentifier)<-[:crossReference]-(rm:ReferenceMolecule)
        WHERE
              tp.speciesName = {species} AND
              di.databaseName = 'COMPOUND' AND
         (p)-[:hasEvent]->(rle) AND  tp.displayName = 'Metabolism' 
            RETURN DISTINCT
                p.stId AS pathway_id,
                di.identifier AS entity_id
        
Processing Gallus gallus
MATCH (tp:TopLevelPathway)-[:hasEvent*]->(p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent)
            WHERE
                tp.speciesName = {species} AND
         (p)-[:hasEvent]->(rle) AND  tp.displayName = 'Metabolism' 
            RETURN DISTINCT
                p.speciesName AS

        
Processing Human alphaherpesvirus 2
MATCH (tp:TopLevelPathway)-[:hasEvent*]->(p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent)
            WHERE
                tp.speciesName = {species} AND
         (p)-[:hasEvent]->(rle) AND  tp.displayName = 'Metabolism' 
            RETURN DISTINCT
                p.speciesName AS species_name,            
                p.displayName AS pathway_name,
                p.stId AS pathway_id                       
        
MATCH (tp:TopLevelPathway)-[:hasEvent*]->
              (p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent),
              (rle)-[:input|output|catalystActivity|physicalEntity|regulatedBy|regulator|hasComponent
              |hasMember|hasCandidate*]->(pe:PhysicalEntity),
              (pe:PhysicalEntity)-[:crossReference]->(di:DatabaseIdentifier)<-[:crossReference]-(rm:ReferenceMolecule)
        WHERE
              tp.speciesName = {species} AND
              di.databaseName = 'COMPOUND' AND
         (p)-[:hasEvent]->(rle) 

        
MATCH (tp:TopLevelPathway)-[:hasEvent*]->
              (p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent),
              (rle)-[:input|output|catalystActivity|physicalEntity|regulatedBy|regulator|hasComponent
              |hasMember|hasCandidate*]->(pe:PhysicalEntity),
              (pe:PhysicalEntity)-[:crossReference]->(di:DatabaseIdentifier)<-[:crossReference]-(rm:ReferenceMolecule)
        WHERE
              tp.speciesName = {species} AND
              di.databaseName = 'COMPOUND' AND
         (p)-[:hasEvent]->(rle) AND  tp.displayName = 'Metabolism' 
            RETURN DISTINCT
                p.stId AS pathway_id,
                di.identifier AS entity_id
        
Processing Influenza A virus
MATCH (tp:TopLevelPathway)-[:hasEvent*]->(p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent)
            WHERE
                tp.speciesName = {species} AND
         (p)-[:hasEvent]->(rle) AND  tp.displayName = 'Metabolism' 
            RETURN DISTINCT
                p.speciesNam

        
Processing Molluscum contagiosum virus
MATCH (tp:TopLevelPathway)-[:hasEvent*]->(p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent)
            WHERE
                tp.speciesName = {species} AND
         (p)-[:hasEvent]->(rle) AND  tp.displayName = 'Metabolism' 
            RETURN DISTINCT
                p.speciesName AS species_name,            
                p.displayName AS pathway_name,
                p.stId AS pathway_id                       
        
MATCH (tp:TopLevelPathway)-[:hasEvent*]->
              (p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent),
              (rle)-[:input|output|catalystActivity|physicalEntity|regulatedBy|regulator|hasComponent
              |hasMember|hasCandidate*]->(pe:PhysicalEntity),
              (pe:PhysicalEntity)-[:crossReference]->(di:DatabaseIdentifier)<-[:crossReference]-(rm:ReferenceMolecule)
        WHERE
              tp.speciesName = {species} AND
              di.databaseName = 'COMPOUND' AND
         (p)-[:hasEvent]->(rl

        
MATCH (tp:TopLevelPathway)-[:hasEvent*]->
              (p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent),
              (rle)-[:input|output|catalystActivity|physicalEntity|regulatedBy|regulator|hasComponent
              |hasMember|hasCandidate*]->(pe:PhysicalEntity),
              (pe:PhysicalEntity)-[:crossReference]->(di:DatabaseIdentifier)<-[:crossReference]-(rm:ReferenceMolecule)
        WHERE
              tp.speciesName = {species} AND
              di.databaseName = 'COMPOUND' AND
         (p)-[:hasEvent]->(rle) AND  tp.displayName = 'Metabolism' 
            RETURN DISTINCT
                p.stId AS pathway_id,
                di.identifier AS entity_id
        
Processing Oryctolagus cuniculus
MATCH (tp:TopLevelPathway)-[:hasEvent*]->(p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent)
            WHERE
                tp.speciesName = {species} AND
         (p)-[:hasEvent]->(rle) AND  tp.displayName = 'Metabolism' 
            RETURN DISTINCT
                p.specie

        
Processing Saccharomyces cerevisiae
MATCH (tp:TopLevelPathway)-[:hasEvent*]->(p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent)
            WHERE
                tp.speciesName = {species} AND
         (p)-[:hasEvent]->(rle) AND  tp.displayName = 'Metabolism' 
            RETURN DISTINCT
                p.speciesName AS species_name,            
                p.displayName AS pathway_name,
                p.stId AS pathway_id                       
        
MATCH (tp:TopLevelPathway)-[:hasEvent*]->
              (p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent),
              (rle)-[:input|output|catalystActivity|physicalEntity|regulatedBy|regulator|hasComponent
              |hasMember|hasCandidate*]->(pe:PhysicalEntity),
              (pe:PhysicalEntity)-[:crossReference]->(di:DatabaseIdentifier)<-[:crossReference]-(rm:ReferenceMolecule)
        WHERE
              tp.speciesName = {species} AND
              di.databaseName = 'COMPOUND' AND
         (p)-[:hasEvent]->(rle) 

        
MATCH (tp:TopLevelPathway)-[:hasEvent*]->
              (p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent),
              (rle)-[:input|output|catalystActivity|physicalEntity|regulatedBy|regulator|hasComponent
              |hasMember|hasCandidate*]->(pe:PhysicalEntity),
              (pe:PhysicalEntity)-[:crossReference]->(di:DatabaseIdentifier)<-[:crossReference]-(rm:ReferenceMolecule)
        WHERE
              tp.speciesName = {species} AND
              di.databaseName = 'COMPOUND' AND
         (p)-[:hasEvent]->(rle) AND  tp.displayName = 'Metabolism' 
            RETURN DISTINCT
                p.stId AS pathway_id,
                di.identifier AS entity_id
        
Processing Toxoplasma gondii
MATCH (tp:TopLevelPathway)-[:hasEvent*]->(p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent)
            WHERE
                tp.speciesName = {species} AND
         (p)-[:hasEvent]->(rle) AND  tp.displayName = 'Metabolism' 
            RETURN DISTINCT
                p.speciesNam

        


In [12]:
metabolic_pathway_only = False
for species in all_species:
    print('Processing', species)
    pathway_dict = get_pathway_dict(species,metabolic_pathway_only)
    mapping_dict = get_kegg_mapping_dict(species, metabolic_pathway_only)
    if len(pathway_dict) > 0:
        data = {
            'pathway_dict': pathway_dict,
            'entity_dict': entity_dict,
            'mapping_dict': mapping_dict
        }
        json_file = '../pals/data/reactome/all_pathways/KEGG/%s.json.zip' % (species)
        save_json(data, json_file, compressed=True)

Processing Alphapapillomavirus 9
MATCH (tp:TopLevelPathway)-[:hasEvent*]->(p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent)
            WHERE
                tp.speciesName = {species} AND
         (p)-[:hasEvent]->(rle) 
            RETURN DISTINCT
                p.speciesName AS species_name,            
                p.displayName AS pathway_name,
                p.stId AS pathway_id                       
        
MATCH (tp:TopLevelPathway)-[:hasEvent*]->
              (p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent),
              (rle)-[:input|output|catalystActivity|physicalEntity|regulatedBy|regulator|hasComponent
              |hasMember|hasCandidate*]->(pe:PhysicalEntity),
              (pe:PhysicalEntity)-[:crossReference]->(di:DatabaseIdentifier)<-[:crossReference]-(rm:ReferenceMolecule)
        WHERE
              tp.speciesName = {species} AND
              di.databaseName = 'COMPOUND' AND
         (p)-[:hasEvent]->(rle) 
            RETURN DISTINCT
                p.

        
Processing Cavia porcellus
MATCH (tp:TopLevelPathway)-[:hasEvent*]->(p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent)
            WHERE
                tp.speciesName = {species} AND
         (p)-[:hasEvent]->(rle) 
            RETURN DISTINCT
                p.speciesName AS species_name,            
                p.displayName AS pathway_name,
                p.stId AS pathway_id                       
        
MATCH (tp:TopLevelPathway)-[:hasEvent*]->
              (p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent),
              (rle)-[:input|output|catalystActivity|physicalEntity|regulatedBy|regulator|hasComponent
              |hasMember|hasCandidate*]->(pe:PhysicalEntity),
              (pe:PhysicalEntity)-[:crossReference]->(di:DatabaseIdentifier)<-[:crossReference]-(rm:ReferenceMolecule)
        WHERE
              tp.speciesName = {species} AND
              di.databaseName = 'COMPOUND' AND
         (p)-[:hasEvent]->(rle) 
            RETURN DISTINCT
               

        
Processing Cricetulus griseus
MATCH (tp:TopLevelPathway)-[:hasEvent*]->(p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent)
            WHERE
                tp.speciesName = {species} AND
         (p)-[:hasEvent]->(rle) 
            RETURN DISTINCT
                p.speciesName AS species_name,            
                p.displayName AS pathway_name,
                p.stId AS pathway_id                       
        
MATCH (tp:TopLevelPathway)-[:hasEvent*]->
              (p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent),
              (rle)-[:input|output|catalystActivity|physicalEntity|regulatedBy|regulator|hasComponent
              |hasMember|hasCandidate*]->(pe:PhysicalEntity),
              (pe:PhysicalEntity)-[:crossReference]->(di:DatabaseIdentifier)<-[:crossReference]-(rm:ReferenceMolecule)
        WHERE
              tp.speciesName = {species} AND
              di.databaseName = 'COMPOUND' AND
         (p)-[:hasEvent]->(rle) 
            RETURN DISTINCT
            

        
Processing Hepatitis B virus
MATCH (tp:TopLevelPathway)-[:hasEvent*]->(p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent)
            WHERE
                tp.speciesName = {species} AND
         (p)-[:hasEvent]->(rle) 
            RETURN DISTINCT
                p.speciesName AS species_name,            
                p.displayName AS pathway_name,
                p.stId AS pathway_id                       
        
MATCH (tp:TopLevelPathway)-[:hasEvent*]->
              (p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent),
              (rle)-[:input|output|catalystActivity|physicalEntity|regulatedBy|regulator|hasComponent
              |hasMember|hasCandidate*]->(pe:PhysicalEntity),
              (pe:PhysicalEntity)-[:crossReference]->(di:DatabaseIdentifier)<-[:crossReference]-(rm:ReferenceMolecule)
        WHERE
              tp.speciesName = {species} AND
              di.databaseName = 'COMPOUND' AND
         (p)-[:hasEvent]->(rle) 
            RETURN DISTINCT
             

        
Processing Human gammaherpesvirus 4
MATCH (tp:TopLevelPathway)-[:hasEvent*]->(p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent)
            WHERE
                tp.speciesName = {species} AND
         (p)-[:hasEvent]->(rle) 
            RETURN DISTINCT
                p.speciesName AS species_name,            
                p.displayName AS pathway_name,
                p.stId AS pathway_id                       
        
MATCH (tp:TopLevelPathway)-[:hasEvent*]->
              (p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent),
              (rle)-[:input|output|catalystActivity|physicalEntity|regulatedBy|regulator|hasComponent
              |hasMember|hasCandidate*]->(pe:PhysicalEntity),
              (pe:PhysicalEntity)-[:crossReference]->(di:DatabaseIdentifier)<-[:crossReference]-(rm:ReferenceMolecule)
        WHERE
              tp.speciesName = {species} AND
              di.databaseName = 'COMPOUND' AND
         (p)-[:hasEvent]->(rle) 
            RETURN DISTINCT
      

        
Processing Listeria monocytogenes
MATCH (tp:TopLevelPathway)-[:hasEvent*]->(p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent)
            WHERE
                tp.speciesName = {species} AND
         (p)-[:hasEvent]->(rle) 
            RETURN DISTINCT
                p.speciesName AS species_name,            
                p.displayName AS pathway_name,
                p.stId AS pathway_id                       
        
MATCH (tp:TopLevelPathway)-[:hasEvent*]->
              (p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent),
              (rle)-[:input|output|catalystActivity|physicalEntity|regulatedBy|regulator|hasComponent
              |hasMember|hasCandidate*]->(pe:PhysicalEntity),
              (pe:PhysicalEntity)-[:crossReference]->(di:DatabaseIdentifier)<-[:crossReference]-(rm:ReferenceMolecule)
        WHERE
              tp.speciesName = {species} AND
              di.databaseName = 'COMPOUND' AND
         (p)-[:hasEvent]->(rle) 
            RETURN DISTINCT
        

        
Processing Mycobacterium tuberculosis
MATCH (tp:TopLevelPathway)-[:hasEvent*]->(p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent)
            WHERE
                tp.speciesName = {species} AND
         (p)-[:hasEvent]->(rle) 
            RETURN DISTINCT
                p.speciesName AS species_name,            
                p.displayName AS pathway_name,
                p.stId AS pathway_id                       
        
MATCH (tp:TopLevelPathway)-[:hasEvent*]->
              (p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent),
              (rle)-[:input|output|catalystActivity|physicalEntity|regulatedBy|regulator|hasComponent
              |hasMember|hasCandidate*]->(pe:PhysicalEntity),
              (pe:PhysicalEntity)-[:crossReference]->(di:DatabaseIdentifier)<-[:crossReference]-(rm:ReferenceMolecule)
        WHERE
              tp.speciesName = {species} AND
              di.databaseName = 'COMPOUND' AND
         (p)-[:hasEvent]->(rle) 
            RETURN DISTINCT
    

        
Processing Penicillium chrysogenum
MATCH (tp:TopLevelPathway)-[:hasEvent*]->(p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent)
            WHERE
                tp.speciesName = {species} AND
         (p)-[:hasEvent]->(rle) 
            RETURN DISTINCT
                p.speciesName AS species_name,            
                p.displayName AS pathway_name,
                p.stId AS pathway_id                       
        
MATCH (tp:TopLevelPathway)-[:hasEvent*]->
              (p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent),
              (rle)-[:input|output|catalystActivity|physicalEntity|regulatedBy|regulator|hasComponent
              |hasMember|hasCandidate*]->(pe:PhysicalEntity),
              (pe:PhysicalEntity)-[:crossReference]->(di:DatabaseIdentifier)<-[:crossReference]-(rm:ReferenceMolecule)
        WHERE
              tp.speciesName = {species} AND
              di.databaseName = 'COMPOUND' AND
         (p)-[:hasEvent]->(rle) 
            RETURN DISTINCT
       

        
Processing Sendai virus
MATCH (tp:TopLevelPathway)-[:hasEvent*]->(p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent)
            WHERE
                tp.speciesName = {species} AND
         (p)-[:hasEvent]->(rle) 
            RETURN DISTINCT
                p.speciesName AS species_name,            
                p.displayName AS pathway_name,
                p.stId AS pathway_id                       
        
MATCH (tp:TopLevelPathway)-[:hasEvent*]->
              (p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent),
              (rle)-[:input|output|catalystActivity|physicalEntity|regulatedBy|regulator|hasComponent
              |hasMember|hasCandidate*]->(pe:PhysicalEntity),
              (pe:PhysicalEntity)-[:crossReference]->(di:DatabaseIdentifier)<-[:crossReference]-(rm:ReferenceMolecule)
        WHERE
              tp.speciesName = {species} AND
              di.databaseName = 'COMPOUND' AND
         (p)-[:hasEvent]->(rle) 
            RETURN DISTINCT
                p.

        
Processing Vigna radiata var. radiata
MATCH (tp:TopLevelPathway)-[:hasEvent*]->(p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent)
            WHERE
                tp.speciesName = {species} AND
         (p)-[:hasEvent]->(rle) 
            RETURN DISTINCT
                p.speciesName AS species_name,            
                p.displayName AS pathway_name,
                p.stId AS pathway_id                       
        
MATCH (tp:TopLevelPathway)-[:hasEvent*]->
              (p:Pathway)-[:hasEvent*]->(rle:ReactionLikeEvent),
              (rle)-[:input|output|catalystActivity|physicalEntity|regulatedBy|regulator|hasComponent
              |hasMember|hasCandidate*]->(pe:PhysicalEntity),
              (pe:PhysicalEntity)-[:crossReference]->(di:DatabaseIdentifier)<-[:crossReference]-(rm:ReferenceMolecule)
        WHERE
              tp.speciesName = {species} AND
              di.databaseName = 'COMPOUND' AND
         (p)-[:hasEvent]->(rle) 
            RETURN DISTINCT
    

### Export ChEBI Data

### Export UniProt Data

### Export ENSEMBL Data