# Export Pathway Info from Reactome

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import re
from collections import defaultdict

from bioservices.kegg import KEGG

In [3]:
import sys

sys.path.append('../pals')

from common import *
from reactome import *

## Export Data from Reactome

In [4]:
all_species = get_species_list()
all_species

['Alphapapillomavirus 9',
 'Arabidopsis thaliana',
 'Arenicola marina',
 'Bacillus anthracis',
 'Bos taurus',
 'Caenorhabditis elegans',
 'Candida albicans',
 'Canis familiaris',
 'Cavia porcellus',
 'Cercopithecus aethiops',
 'Chlamydia trachomatis',
 'Chlorocebus sabaeus',
 'Clostridium botulinum',
 'Clostridium tetani',
 'Corynephage beta',
 'Cowpox virus',
 'Cricetulus griseus',
 'Crithidia fasciculata',
 'Danio rerio',
 'Dictyostelium discoideum',
 'Drosophila melanogaster',
 'Escherichia coli',
 'Felis catus',
 'Gallus gallus',
 'Hepatitis B virus',
 'Hepatitis C Virus',
 'Hepatitis C virus genotype 2a',
 'Hepatitis C virus subtype 1a',
 'Homarus americanus',
 'Homo sapiens',
 'Human alphaherpesvirus 2',
 'Human cytomegalovirus',
 'Human gammaherpesvirus 4',
 'Human herpesvirus 1',
 'Human herpesvirus 8',
 'Human immunodeficiency virus 1',
 'Human papillomavirus type 16',
 'Infectious bronchitis virus',
 'Influenza A virus',
 'Legionella pneumophila',
 'Listeria monocytogenes',
 

### Export Compound Data

Get KEGG compound dict from exported PiMP data

In [13]:
json_file = '../pals/data/PiMP_KEGG.json.zip'
all_kegg_compounds = load_json(json_file, compressed=True)
kegg_entity_dict = all_kegg_compounds['entity_dict']

In [14]:
out_file = '../pals/data/COMPOUND.json.zip'
save_json(kegg_entity_dict, out_file, compressed=True)

Get ChEBI compound dict from the Ontology file downloaded from https://www.ebi.ac.uk/chebi/downloadsForward.do

In [15]:
owl_file = 'C:/Users/joewa/Downloads/chebi.owl'
chebi_entity_dict = parse_chebi_entity_dict(owl_file)

In [16]:
out_file = '../pals/data/ChEBI.json.zip'
save_json(chebi_entity_dict, out_file, compressed=True)

Export Reactome data for all species

In [None]:
for species in all_species:
    
    # KEGG - metabolic pathways only
    database_name = DATABASE_REACTOME_KEGG
    metabolic_pathway_only = True
    print('Writing %s - %s metabolic_pathway_only=%s' % (species, database_name, metabolic_pathway_only))

    pathway_dict = get_pathway_dict(species, metabolic_pathway_only)    
    mapping_dict = get_compound_mapping_dict(species, database_name, metabolic_pathway_only)    
    json_file = '../pals/data/reactome/metabolic_pathways/%s/%s.json.zip' % (database_name, species)
    write_database(pathway_dict, kegg_entity_dict, mapping_dict, json_file)

    # KEGG - all pathways
    database_name = DATABASE_REACTOME_KEGG
    metabolic_pathway_only = False    
    print('Writing %s - %s metabolic_pathway_only=%s' % (species, database_name, metabolic_pathway_only))
    
    pathway_dict = get_pathway_dict(species, metabolic_pathway_only)    
    mapping_dict = get_compound_mapping_dict(species, database_name, metabolic_pathway_only)    
    json_file = '../pals/data/reactome/all_pathways/%s/%s.json.zip' % (database_name, species)
    write_database(pathway_dict, kegg_entity_dict, mapping_dict, json_file)
    
    # ChEBI - metabolic pathways only
    database_name = DATABASE_REACTOME_CHEBI
    metabolic_pathway_only = True    
    pathway_dict = get_pathway_dict(species, metabolic_pathway_only)        
    print('Writing %s - %s metabolic_pathway_only=%s' % (species, database_name, metabolic_pathway_only))
    
    pathway_dict = get_pathway_dict(species, metabolic_pathway_only)    
    mapping_dict = get_compound_mapping_dict(species, database_name, metabolic_pathway_only)    
    json_file = '../pals/data/reactome/metabolic_pathways/%s/%s.json.zip' % (database_name, species)
    write_database(pathway_dict, chebi_entity_dict, mapping_dict, json_file)

    # ChEBI - all pathways
    database_name = DATABASE_REACTOME_CHEBI
    metabolic_pathway_only = False    
    print('Writing %s - %s metabolic_pathway_only=%s' % (species, database_name, metabolic_pathway_only))

    pathway_dict = get_pathway_dict(species, metabolic_pathway_only)        
    mapping_dict = get_compound_mapping_dict(species, database_name, metabolic_pathway_only)    
    json_file = '../pals/data/reactome/all_pathways/%s/%s.json.zip' % (database_name, species)    
    write_database(pathway_dict, chebi_entity_dict, mapping_dict, json_file)

### Export UniProt Data

### Export ENSEMBL Data