# Export Pathway Info from Reactome

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import re
from collections import defaultdict

from bioservices.kegg import KEGG

In [3]:
import sys

sys.path.append('../..')

from pals.common import *
from pals.reactome import *

2020-11-27 14:51:32.662 | INFO     | pals.reactome:get_neo4j_driver:24 - Created graph database driver for bolt://localhost:7687 (neo4j)


## Export Data from Reactome

In [4]:
all_species = get_species_list()
all_species

['Alphapapillomavirus 9',
 'Arenicola marina',
 'Bacillus anthracis',
 'Bos taurus',
 'Caenorhabditis elegans',
 'Candida albicans',
 'Canis familiaris',
 'Cavia porcellus',
 'Cercopithecus aethiops',
 'Chlamydia trachomatis',
 'Chlorocebus sabaeus',
 'Clostridium botulinum',
 'Clostridium perfringens',
 'Clostridium tetani',
 'Corynephage beta',
 'Cowpox virus',
 'Cricetulus griseus',
 'Crithidia fasciculata',
 'Danio rerio',
 'Dengue virus',
 'Dictyostelium discoideum',
 'Drosophila melanogaster',
 'Escherichia coli',
 'Felis catus',
 'Gallus gallus',
 'Hepatitis B virus',
 'Hepatitis C Virus',
 'Hepatitis C virus genotype 2a',
 'Hepatitis C virus subtype 1a',
 'Homarus americanus',
 'Homo sapiens',
 'Human SARS coronavirus',
 'Human alphaherpesvirus 2',
 'Human cytomegalovirus',
 'Human gammaherpesvirus 4',
 'Human herpesvirus 1',
 'Human herpesvirus 8',
 'Human immunodeficiency virus 1',
 'Human papillomavirus type 16',
 'Infectious bronchitis virus',
 'Influenza A virus',
 'Legion

### Export Compound Data

Get KEGG compound dict from exported PiMP data

In [6]:
json_file = '../../pals/data/PiMP_KEGG.json.zip'
all_kegg_compounds = load_json(json_file, compressed=True)
kegg_entity_dict = all_kegg_compounds['entity_dict']

In [7]:
out_file = '../../pals/data/COMPOUND.json.zip'
save_json(kegg_entity_dict, out_file, compressed=True)

Get ChEBI compound dict from the Ontology file downloaded from https://www.ebi.ac.uk/chebi/downloadsForward.do

In [8]:
owl_file = 'C:/Users/joewa/Downloads/chebi.owl'
chebi_entity_dict = parse_chebi_entity_dict(owl_file)

In [9]:
out_file = '../../pals/data/ChEBI.json.zip'
save_json(chebi_entity_dict, out_file, compressed=True)

Export Reactome data for all species

In [12]:
for species in all_species:
    
    # KEGG - metabolic pathways only
    database_name = DATABASE_REACTOME_KEGG
    metabolic_pathway_only = True
    print('Writing %s - %s metabolic_pathway_only=%s' % (species, database_name, metabolic_pathway_only))

    pathway_dict = get_pathway_dict(species, metabolic_pathway_only)    
    mapping_dict = get_compound_mapping_dict(species, database_name, metabolic_pathway_only)    
    json_file = '../../pals/data/reactome/metabolic_pathways/%s/%s.json.zip' % (database_name, species)
    write_database(pathway_dict, kegg_entity_dict, mapping_dict, json_file)

    # KEGG - all pathways
#     database_name = DATABASE_REACTOME_KEGG
#     metabolic_pathway_only = False    
#     print('Writing %s - %s metabolic_pathway_only=%s' % (species, database_name, metabolic_pathway_only))
    
#     pathway_dict = get_pathway_dict(species, metabolic_pathway_only)    
#     mapping_dict = get_compound_mapping_dict(species, database_name, metabolic_pathway_only)    
#     json_file = '../../pals/data/reactome/all_pathways/%s/%s.json.zip' % (database_name, species)
#     write_database(pathway_dict, kegg_entity_dict, mapping_dict, json_file)
    
    # ChEBI - metabolic pathways only
    database_name = DATABASE_REACTOME_CHEBI
    metabolic_pathway_only = True    
    pathway_dict = get_pathway_dict(species, metabolic_pathway_only)        
    print('Writing %s - %s metabolic_pathway_only=%s' % (species, database_name, metabolic_pathway_only))
    
    pathway_dict = get_pathway_dict(species, metabolic_pathway_only)    
    mapping_dict = get_compound_mapping_dict(species, database_name, metabolic_pathway_only)    
    json_file = '../../pals/data/reactome/metabolic_pathways/%s/%s.json.zip' % (database_name, species)
    write_database(pathway_dict, chebi_entity_dict, mapping_dict, json_file)

    # ChEBI - all pathways
#     database_name = DATABASE_REACTOME_CHEBI
#     metabolic_pathway_only = False    
#     print('Writing %s - %s metabolic_pathway_only=%s' % (species, database_name, metabolic_pathway_only))

#     pathway_dict = get_pathway_dict(species, metabolic_pathway_only)        
#     mapping_dict = get_compound_mapping_dict(species, database_name, metabolic_pathway_only)    
#     json_file = '../../pals/data/reactome/all_pathways/%s/%s.json.zip' % (database_name, species)    
#     write_database(pathway_dict, chebi_entity_dict, mapping_dict, json_file)

Writing Alphapapillomavirus 9 - COMPOUND metabolic_pathway_only=True
Writing Alphapapillomavirus 9 - ChEBI metabolic_pathway_only=True
Writing Arenicola marina - COMPOUND metabolic_pathway_only=True
Writing Arenicola marina - ChEBI metabolic_pathway_only=True
Writing Bacillus anthracis - COMPOUND metabolic_pathway_only=True
Writing Bacillus anthracis - ChEBI metabolic_pathway_only=True
Writing Bos taurus - COMPOUND metabolic_pathway_only=True
Writing Bos taurus - ChEBI metabolic_pathway_only=True
Writing Caenorhabditis elegans - COMPOUND metabolic_pathway_only=True
Writing Caenorhabditis elegans - ChEBI metabolic_pathway_only=True
Writing Candida albicans - COMPOUND metabolic_pathway_only=True
Writing Candida albicans - ChEBI metabolic_pathway_only=True
Writing Canis familiaris - COMPOUND metabolic_pathway_only=True
Writing Canis familiaris - ChEBI metabolic_pathway_only=True
Writing Cavia porcellus - COMPOUND metabolic_pathway_only=True
Writing Cavia porcellus - ChEBI metabolic_pathwa

### Export UniProt Data

In [None]:
species = 'Homo sapiens'

In [None]:
database_name = DATABASE_REACTOME_UNIPROT
metabolic_pathway_only = True
pathway_dict = get_pathway_dict(species, metabolic_pathway_only)
len(pathway_dict), pathway_dict

In [None]:
entity_dict = get_protein_entity_dict(species, database_name)
len(entity_dict), entity_dict

In [None]:
mapping_dict = get_protein_mapping_dict(species, DATABASE_REACTOME_UNIPROT)
len(mapping_dict), mapping_dict

### Export ENSEMBL Data

In [None]:
species = 'Homo sapiens'
database_name = DATABASE_REACTOME_ENSEMBL

In [None]:
entity_dict = get_gene_entity_dict(species, database_name)
len(entity_dict), entity_dict

In [None]:
mapping_dict = get_gene_mapping_dict(species, database_name)
len(mapping_dict), mapping_dict