# Create all shortest path traces with reactome db

The biggest difference from biocyc is entity mapping.  It is a lot more complicated since Reactome does not have a consistent ID.

In [1]:
import os

# Import GDS modules
from lifelike_gds.arango_network.shortest_paths_trace import ShortestPathTrace
from lifelike_gds.arango_network.reactome import *
from lifelike_gds.arango_network.trace_graph_utils import *
from lifelike_gds.arango_network.reactome_notebook_functions import *

# Ignore warnings
import warnings

warnings.filterwarnings('ignore')

## Settings
Make sure to change to the correct one based on your local settings

In [2]:
# Directory where to look for input data
input_dir = './input'

# Directory where to output results
output_dir = './output'
os.makedirs(output_dir, exist_ok=True)

# Reactome arango database connection parameters
# If you are running this notebook from Lifelike's online training BinderHub website,
# then these paremeters are already set for you in the environment.
arango_dbname = os.getenv('REACTOME_ARANGO_DATABASE', 'ecocyc-25')

## Parameters
Set parameters
- source_name: name for the source entities. 
- target_name: name for the target entities. 
- source_file: source file name in input dir
- target_file: target file name in input dir


In [3]:
source_name = 'il6'
target_name = 'vegfa'

## Define functions to get source/target nodes
- read input file with ids (stId or dbId)
- read input file with reference ids (gene_id or chebi_id)

## Define functions to export shortest paths traces to graph file 

## Connect to arango database

In [4]:
database = ReactomeDB(arango_dbname)

## Load graph from arango graph database to memery

In [5]:
tracegraph = ShortestPathTrace(Reactome(database))

# set up output directory where the excel and graph files will write to
tracegraph.datadir = output_dir

# initiate tracegraph by loading graph data from arango
# a networkx graph is created here.
tracegraph.init_default_graph()

INFO: load reactome graph


AQLQueryExecuteError: [HTTP 404][ERR 1203] AQL: collection or view not found: activeUnitOf (while parsing)

## Get source and target nodes
Make sure to choose the right method to get the nodes

In [None]:
source_ids = ['R-HSA-447100']
source_nodes = database.get_nodes_by_attr(source_ids, 'stId')
print('stIds:', len(source_ids), ', nodes matched:', len(source_nodes))

In [None]:
target_ids = ['R-HSA-2975974']
target_nodes = database.get_nodes_by_attr(target_ids, 'stId')
print('stIds:', len(target_ids), ', nodes matched:', len(target_nodes))

## Create (sankey) graph file for shortest paths

In [None]:
write_shortest_paths(
    tracegraph, source_name, source_nodes, target_name, target_nodes
)
