```
---
title: Data Integration in BioPAXtags: BioPAX, SPARQL, Reactome
lang: en
version: 0.10
date: 2023-06-02
---
```

In [1]:
import importlib
import json
import matplotlib.pyplot as plt
import os
import pandas
import rdflib
import rdflib.namespace
import sparqldataframe
import seaborn as sns
from SPARQLWrapper import SPARQLWrapper, JSON
import sys

pandas.set_option("max_colwidth", 80)

In [2]:
reactomeVersion = 84
endpointURL = "http://localhost:3030/chebi_reactome/query"
rdfFormat = "turtle"

In [3]:
prefixes = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX dcterms: <http://purl.org/dc/terms/>

PREFIX bp3: <http://www.biopax.org/release/biopax-level3.owl#>
PREFIX reactome: <http://www.reactome.org/biopax/84/48887#>

PREFIX up:<http://purl.uniprot.org/core/>
PREFIX udb: <http://purl.uniprot.org/database/>
PREFIX uniprot: <http://purl.uniprot.org/uniprot/>

PREFIX chebirel: <http://purl.obolibrary.org/obo/chebi#>
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>
"""

# A. Retrieve Gene and Protein in the BioPAX export of Reactome

## Co-expressed gene modules of interest :

In [4]:
white = '"SLC8A3" "ACAN" "ACVR2B" "ZUFSP" "TM4SF18" "PRKCE" "CHCHD3" "IGDCC3" "GABBR2" "DGKA" "AFF1" "HTR7" "KIAA0247" "PROX1" "LOC102158595" "C8orf37" "RSF1" "TBC1D19" "MTUS2" "LUM" "SMTN" "LDB3" "LOC102162623" "HRH1" "TMEM14C" "FST" "TBR1" "GSK3A"'
darkorange = '"DNAJB9" "WBSCR27" "LOC396781" "SERHL2" "CCR10" "CACNG5" "TNFRSF17" "SRM" "DERL3" "RFC3" "TAOK3" "CLDN3" "IGLL5" "BMP6" "LOC524810" "IGHA" "LOC100037924" "IGL" "BCL2A1" "IGLV-10" "PADI2" '
royalblue = '"TMEM161B" "IZUMO4" "ZNF791" "ANO9" "DPP4" "PPP1R26" "BEX4" "RRAGD" "RASGEF1B" "CD3G" "STRN" "TGFBR3" "GATA3" "PLVAP" "TNFRSF16" "NIPSNAP3B" "KCTD12" "CD3E" "LOC101903221" "SLC4A11" "CD3D" "KATNA1" "SLC22A17" "FMNL3" "SOX4" "CCR7" "ID3" "ZCCHC10" "EEPD1" "SLC25A13" "F8" "SLC27A3" "ARHGEF16" "ETNK2" "SH3BP4" "SPATA21" "FCER1A" "LAT" "MYO10" "ENC1" "CDH24" "LOC100154277" "EZR" "PTTG1" "PRUNE2" "PCYOX1L" "CD247" "CCDC86" "LOC100621726" "FGFR1OP2" "PLEKHH1" "C2H19orf42" "LOC100738180" "DSN1" "TOMM34" "PLXND1" "IFT80" "SYTL3" "NIPSNAP3A" "GRAP2" "POFUT1" "P311" "UBE2J1" "FYN" "SKAP1" "BLK" "LOC100627089" "ACVR2A" "ZAP70" "EPHB6" "DTNB" "C1orf186" "L1CAM" "TCF12" "EPHA7" "ITGAV" "SLC4A4" "NPY" "CATSPERG" "LPAR3" "SEPT6" '
violet = '"MUC4" "PTGS1" "HSP70.2" "SLPI" "RNF39" "TR10D" "AXL" "ZNF7" "MED8" "LOC100627004" "P2RY1" "C15H11orf96" "BAG3" "HLA-E" "HSPA1B" "TRIM26"'
darkred = '"JPH4" "ZFAND6" "RPL38" "LOC102166814" "UBP30" "PFN1" "EIF1B" "FXYD6" "LOC100737327" "FBXO30" "Il6st" "ZNF451" "TTC21A" "SLC35A5" "SLCO2B1" "LOC100622689" "RNF14" "RPL14" "FAM171A2" "SLC22A15" "CCR3" "SYNGAP1" "KIAA1644" "LOC102164756" "GNMT" "CWF19L2" "ATP6V1C2" "C8orf37" "ITSN1" "PREX1" "TMEM52B" "TXLNB" "RAB23" "RPL35" "KRTCAP3" "POLB" "WWP1" "PGBD1" "SLC46A2" "PCIF1" "E4" "ENO3" "CD300C" "FAM102A" "ANAPC4" "YWHAZ" "PIGL" "PLCD4" "PLA2G12A" "DHX33" "CAPSL" "ARID3B" "RGL2"'
darkolivegreen = '"LOC100513097" "PSMB8" "TUBB6" "SLA-DRB1" "ADM" "PERP" "GLI2" "PCK2" "NCR3" "GLO1" "PTGER3" "LOC100516920" "SLA-7"'
steelblue = '"SLA-1" "LOC102161909" "CLU" "SLA-3" "TINF2" "SLC12A1" "PXMP2" "EI24" "PSME2" "UQCC2" "H2-Q4" "LOC100515902" "LOC100515735" "SLA3" "GMPR2" "LOC100622689" "SNUPN" "SLA-DQA1" "EMC9" "LOC100522150" "SLA-2" "LOC100622791" "SLA-5" '
lightcyan = '"FCRLA" "GFRA1" "ANKRD42" "RYBP" "ITGB5" "SLA-DRB1" "BCL2" "TRAF3IP2" "DCTD" "CD79A" "DECR1" "AGMAT" "LY86" "TLR1" "KLK1" "SLC5A4" "LOC100524883" "CD40" "ELL3" "AMIGO2" "PIKFYVE" "TMEM163" "ORAOV1" "ABCA3" "C6H16orf74" "PRDX5" "CAND2" "SIN3A" "ANGPTL4" "FCGR2B" "CD302" "F12" "CCS" "HIP1R" "ZBTB32" "TLR10" "DDC" "CHCHD10" "ZNRF3" "NXN" "DACT3" "XKRX" "COBLL1" "CYAC3" "CYSTM1" "SLC34A3" "SLA-DOA" "CD19" "CD180" "SYK" "MS4A1" "C17H20orf106" "SLA-DQA1" "DSE" "ZKSCAN1" "IL4I1" "CLCN6" "EBF1" "TCF4" "CBFA2T3" "SLA-DOB" "GCK" "ZBTB44" "VAV2" "CD1.1" "VPREB3" "SLC35D2" "PCMTD1" "CYB5B" "LOC100130458" "SPIB" "SREBF2" "STX7" "Mef2c" "MLX" "MYBL1" "C1H9orf9" "HTRA1" "SLC23A1" "NDUFB9" "BLNK" "DNMBP" "MYOF" "BHLHE41" "CD72" "TPD52" "HMG20A" "SLA-DRA" "IGLV-4" "IL11RA" "GZMH" "SERPINF2" "POU2AF1" "CD79B" "IGHD" "LAMP3" "LOC102158858" "C4BPB" "BCL9L" "KIAA0556" "ITGAD" "N4BP2L1" "UHRF1BP1L" "CAMK2N1" "GLIS2" "SHISA2" "LIMD1" "PGAM2" "DTNBP1" "RALGPS2" "FCRL1" "CBR3" "LHCGR" "LOC100620852"'

In [5]:
#current = white
#current = darkorange
current = royalblue
#current = violet
#current = darkred
#current = darkolivegreen
#current = steelblue
#current = lightcyan

We use a 3 steps federated SPARQL query that takes a list of HGNC IDs as input:
- First, it queries the SPARQL endpoint of the UniProt database to get all UniProt IDs related to a HGNC ID.
- Second, it looks for the corresponding ProteinReferences in the BioPAX file.
- Finally, it identifies all the Proteins associated to each ProteinReference.


### 1. Retrieve the UniProt Ids linked to the HGNC IDs gene list:

In [6]:
query = """
SELECT DISTINCT * 
WHERE {

  SERVICE <https://sparql.uniprot.org/sparql> {
    VALUES ?hgncName { """+ current + """ }
    ?hgnc rdf:type up:Resource .
    ?hgnc up:database udb:HGNC .
    ?hgnc rdfs:comment ?hgncName .

    ?uniprotID rdfs:seeAlso ?hgnc .
    ?uniprotID rdf:type up:Protein .
    ?uniprotID up:reviewed "true"^^xsd:boolean .
  }
 }
"""
df = sparqldataframe.query(endpointURL, prefixes+query)
df

Unnamed: 0,hgncName,hgnc,uniprotID
0,DPP4,http://purl.uniprot.org/hgnc/3009,http://purl.uniprot.org/uniprot/P27487
1,FMNL3,http://purl.uniprot.org/hgnc/23698,http://purl.uniprot.org/uniprot/Q8IVF7
2,GATA3,http://purl.uniprot.org/hgnc/4172,http://purl.uniprot.org/uniprot/P23771
3,BLK,http://purl.uniprot.org/hgnc/1057,http://purl.uniprot.org/uniprot/P51451
4,ARHGEF16,http://purl.uniprot.org/hgnc/15515,http://purl.uniprot.org/uniprot/Q5VV41
...,...,...,...
66,PLEKHH1,http://purl.uniprot.org/hgnc/17733,http://purl.uniprot.org/uniprot/Q9ULM0
67,KCTD12,http://purl.uniprot.org/hgnc/14678,http://purl.uniprot.org/uniprot/Q96CX2
68,PTTG1,http://purl.uniprot.org/hgnc/9690,http://purl.uniprot.org/uniprot/O95997
69,NPY,http://purl.uniprot.org/hgnc/7955,http://purl.uniprot.org/uniprot/P01303


### 2. Retrieve the bp3:ProteinReferences annotated with these UniProt IDs:

In [7]:
query = """
SELECT DISTINCT ?hgncName ?uniprotID ?protref
WHERE {

  SERVICE <https://sparql.uniprot.org/sparql> {
    VALUES ?hgncName {  """+ current + """ }
    ?hgnc rdf:type up:Resource .
    ?hgnc up:database udb:HGNC .
    ?hgnc rdfs:comment ?hgncName .

    ?uniprotID rdfs:seeAlso ?hgnc .
    ?uniprotID rdf:type up:Protein .
    ?uniprotID up:reviewed "true"^^xsd:boolean .
  }

 BIND (REPLACE(STR(?uniprotID), "http://purl.uniprot.org/uniprot/", "") AS ?localUniprotID)

 ?xref bp3:id ?localUniprotID .
 ?xref bp3:db "UniProt" .
  ?protref bp3:xref ?xref .
  ?protref rdf:type bp3:ProteinReference .
}  
ORDER BY ?protref
"""
df = sparqldataframe.query(endpointURL, prefixes+query)
df

Unnamed: 0,hgncName,uniprotID,protref
0,EPHA7,http://purl.uniprot.org/uniprot/Q15375,http://www.reactome.org/biopax/84/48887#ProteinReference10042
1,EPHB6,http://purl.uniprot.org/uniprot/O15197,http://www.reactome.org/biopax/84/48887#ProteinReference10047
2,SLC27A3,http://purl.uniprot.org/uniprot/Q5K4L6,http://www.reactome.org/biopax/84/48887#ProteinReference10533
3,ETNK2,http://purl.uniprot.org/uniprot/Q9NVF9,http://www.reactome.org/biopax/84/48887#ProteinReference10677
4,FYN,http://purl.uniprot.org/uniprot/P06241,http://www.reactome.org/biopax/84/48887#ProteinReference113
5,IFT80,http://purl.uniprot.org/uniprot/Q9P2H3,http://www.reactome.org/biopax/84/48887#ProteinReference11523
6,SH3BP4,http://purl.uniprot.org/uniprot/Q9P0V3,http://www.reactome.org/biopax/84/48887#ProteinReference11645
7,STRN,http://purl.uniprot.org/uniprot/O43815,http://www.reactome.org/biopax/84/48887#ProteinReference137
8,ANO9,http://purl.uniprot.org/uniprot/A1A5B4,http://www.reactome.org/biopax/84/48887#ProteinReference1662
9,CD3G,http://purl.uniprot.org/uniprot/P09693,http://www.reactome.org/biopax/84/48887#ProteinReference1937


### 3. Identify the bp3:Proteins associated to these bp3:ProteinReferences:

In [8]:
query = """

SELECT DISTINCT ?hgncName ?uniprotID ?protref ?prot 
WHERE {

  SERVICE <https://sparql.uniprot.org/sparql> {
    VALUES ?hgncName {  """+ current + """ }
    ?hgnc rdf:type up:Resource .
    ?hgnc up:database udb:HGNC .
    ?hgnc rdfs:comment ?hgncName .

    ?uniprotID rdfs:seeAlso ?hgnc .
    ?uniprotID rdf:type up:Protein .
    ?uniprotID up:reviewed "true"^^xsd:boolean .
  }

 BIND (REPLACE(STR(?uniprotID), "http://purl.uniprot.org/uniprot/", "") AS ?localUniprotID)

 ?xref bp3:id ?localUniprotID .
 ?xref bp3:db "UniProt" .
  ?protref bp3:xref ?xref .
  ?protref rdf:type bp3:ProteinReference .
  ?prot bp3:entityReference ?protref .
}  
ORDER BY ?protref
"""
df = sparqldataframe.query(endpointURL, prefixes+query)
df

Unnamed: 0,hgncName,uniprotID,protref,prot
0,EPHA7,http://purl.uniprot.org/uniprot/Q15375,http://www.reactome.org/biopax/84/48887#ProteinReference10042,http://www.reactome.org/biopax/84/48887#Protein28443
1,EPHB6,http://purl.uniprot.org/uniprot/O15197,http://www.reactome.org/biopax/84/48887#ProteinReference10047,http://www.reactome.org/biopax/84/48887#Protein28449
2,EPHB6,http://purl.uniprot.org/uniprot/O15197,http://www.reactome.org/biopax/84/48887#ProteinReference10047,http://www.reactome.org/biopax/84/48887#Protein28488
3,EPHB6,http://purl.uniprot.org/uniprot/O15197,http://www.reactome.org/biopax/84/48887#ProteinReference10047,http://www.reactome.org/biopax/84/48887#Protein28494
4,EPHB6,http://purl.uniprot.org/uniprot/O15197,http://www.reactome.org/biopax/84/48887#ProteinReference10047,http://www.reactome.org/biopax/84/48887#Protein28500
...,...,...,...,...
195,POFUT1,http://purl.uniprot.org/uniprot/Q9H488,http://www.reactome.org/biopax/84/48887#ProteinReference9165,http://www.reactome.org/biopax/84/48887#Protein25920
196,PLXND1,http://purl.uniprot.org/uniprot/Q9Y4D7,http://www.reactome.org/biopax/84/48887#ProteinReference9186,http://www.reactome.org/biopax/84/48887#Protein26009
197,SOX4,http://purl.uniprot.org/uniprot/Q06945,http://www.reactome.org/biopax/84/48887#ProteinReference9213,http://www.reactome.org/biopax/84/48887#Protein26264
198,EEPD1,http://purl.uniprot.org/uniprot/Q7L9B9,http://www.reactome.org/biopax/84/48887#ProteinReference9329,http://www.reactome.org/biopax/84/48887#Protein26639


# B. Retrieve SmallMolecules in the BioPAX export of Reactome

- Starting from a list of metabolites names, we used a SPARQL query to retrieve the ChEBI ID of the descendants of the molecule and its possible enantiomer.

- Then, with the next SPARQL query we identified the corresponding entities in Reactome. Precisely, for each molecule, we looked for the SmallMoleculeReferences that are annotated with the corresponding ChEBI IDs; then, we identified all the SmallMolecules associated to these entity references.

In [9]:
query = """
SELECT ?property ?propertyLabel
WHERE {
   ?property rdf:type/(rdfs:subClassOf*) owl:ObjectProperty .
   #OPTIONAL { ?property rdfs:label|oboInOwl:shorthand ?propertyLabel . }
   OPTIONAL { ?property rdfs:label ?propertyLabel . }
}
"""

### 1. Retrieve the ChEBI IDs of descendants of the molecules and their possible enantiomers

In [10]:
# get enantiomers and children ("is-a") of a ChEBI ID
query = """
SELECT DISTINCT ?id ?molecule2
WHERE {
  VALUES ?id {   "CHEBI:39027" "CHEBI:24996" "CHEBI:39026" "CHEBI:39025" "CHEBI:24898"
"CHEBI:25017" "CHEBI:27266" "CHEBI:37054" "CHEBI:26986"
"CHEBI:16449" "CHEBI:25094" "CHEBI:29016" "CHEBI:30089"
"CHEBI:26271" "CHEBI:28300" "CHEBI:16811" "CHEBI:15347"
"CHEBI:15361" "CHEBI:26806" "CHEBI:133748" "CHEBI:18186"
"CHEBI:16919" "CHEBI:16737" "CHEBI:15354" "CHEBI:17234"
"CHEBI:22860" "CHEBI:15724" "CHEBI:17268" "CHEBI:17925"
"CHEBI:27570" "CHEBI:28044" "CHEBI:15740" "CHEBI:178059"
"CHEBI:30805" "CHEBI:30813" "CHEBI:27781" "CHEBI:42504"
"CHEBI:168544" "CHEBI:28716" "CHEBI:28842" "CHEBI:16196"
"CHEBI:17351" "CHEBI:25048" "CHEBI:28822" "CHEBI:32425"
"CHEBI:72850" "CHEBI:28941" "CHEBI:28792" "CHEBI:15428"
  }
  ?molecule oboInOwl:id ?id .
  {
    ?molecule rdfs:subClassOf ?restriction .
    #?restriction ?rel ?b .
    ?restriction rdf:type owl:Restriction .
    ?restriction owl:onProperty chebirel:is_enantiomer_of .
    ?restriction owl:someValuesFrom ?enantDirect .
    ?molecule2 rdfs:subClassOf* ?enantDirect .
  }  UNION  {
    ?molecule2 rdfs:subClassOf* ?molecule .
  }  
}
ORDER BY ?id ?molecule2
"""
df = sparqldataframe.query(endpointURL, prefixes+query)
df

Unnamed: 0,id,molecule2
0,CHEBI:133748,http://purl.obolibrary.org/obo/CHEBI_132362
1,CHEBI:133748,http://purl.obolibrary.org/obo/CHEBI_133748
2,CHEBI:133748,http://purl.obolibrary.org/obo/CHEBI_16947
3,CHEBI:133748,http://purl.obolibrary.org/obo/CHEBI_35802
4,CHEBI:133748,http://purl.obolibrary.org/obo/CHEBI_35804
...,...,...
160,CHEBI:39027,http://purl.obolibrary.org/obo/CHEBI_39027
161,CHEBI:42504,http://purl.obolibrary.org/obo/CHEBI_42504
162,CHEBI:72850,http://purl.obolibrary.org/obo/CHEBI_72850
163,CHEBI:72850,http://purl.obolibrary.org/obo/CHEBI_73731


In [11]:
# get nb enantiomers + children ("is-a") per ChEBI ID/per metabolites of interest 
query = """
SELECT ?id (COUNT(DISTINCT (?molecule2)) AS ?nbchebi)
WHERE {
  VALUES ?id {   "CHEBI:39027" "CHEBI:24996" "CHEBI:39026" "CHEBI:39025" "CHEBI:24898"
"CHEBI:25017" "CHEBI:27266" "CHEBI:37054" "CHEBI:26986"
"CHEBI:16449" "CHEBI:25094" "CHEBI:29016" "CHEBI:30089"
"CHEBI:26271" "CHEBI:28300" "CHEBI:16811" "CHEBI:15347"
"CHEBI:15361" "CHEBI:26806" "CHEBI:133748" "CHEBI:18186"
"CHEBI:16919" "CHEBI:16737" "CHEBI:15354" "CHEBI:17234"
"CHEBI:22860" "CHEBI:15724" "CHEBI:17268" "CHEBI:17925"
"CHEBI:27570" "CHEBI:28044" "CHEBI:15740" "CHEBI:178059"
"CHEBI:30805" "CHEBI:30813" "CHEBI:27781" "CHEBI:42504"
"CHEBI:168544" "CHEBI:28716" "CHEBI:28842" "CHEBI:16196"
"CHEBI:17351" "CHEBI:25048" "CHEBI:28822" "CHEBI:32425"
"CHEBI:72850" "CHEBI:28941" "CHEBI:28792" "CHEBI:15428"
  }
  ?molecule oboInOwl:id ?id .
  {
    ?molecule rdfs:subClassOf ?restriction .
    ?restriction rdf:type owl:Restriction .
    ?restriction owl:onProperty chebirel:is_enantiomer_of .
    ?restriction owl:someValuesFrom ?enantDirect .
    ?molecule2 rdfs:subClassOf* ?enantDirect .
  }  UNION  {
    ?molecule2 rdfs:subClassOf* ?molecule .
  }  
}
GROUP BY ?id
ORDER BY ?nbchebi
"""
df = sparqldataframe.query(endpointURL, prefixes+query)
df

Unnamed: 0,id,nbchebi
0,CHEBI:15347,1
1,CHEBI:15354,1
2,CHEBI:15361,1
3,CHEBI:15724,1
4,CHEBI:15740,1
5,CHEBI:16737,1
6,CHEBI:168544,1
7,CHEBI:16919,1
8,CHEBI:17351,1
9,CHEBI:178059,1


# C. Compute paths in Neo4j between nodes of interest using Cypher queries

### 1. Characterize shortest paths within modules of co-expressed genes

Example with module 1:
```cypher
WITH ["http://www.reactome.org/biopax/84/48887#Protein25236",
"http://www.reactome.org/biopax/84/48887#Protein6259",
"http://www.reactome.org/biopax/84/48887#Protein6268",
"http://www.reactome.org/biopax/84/48887#Protein6276",
"http://www.reactome.org/biopax/84/48887#Protein6284",
"http://www.reactome.org/biopax/84/48887#Protein6374",
"http://www.reactome.org/biopax/84/48887#Protein22901",
"http://www.reactome.org/biopax/84/48887#Protein29026",
"http://www.reactome.org/biopax/84/48887#Protein29036",
"http://www.reactome.org/biopax/84/48887#Protein29048",
"http://www.reactome.org/biopax/84/48887#Protein29056",
"http://www.reactome.org/biopax/84/48887#Protein29064",
"http://www.reactome.org/biopax/84/48887#Protein29072",
"http://www.reactome.org/biopax/84/48887#Protein13024",
"http://www.reactome.org/biopax/84/48887#Protein13027",
"http://www.reactome.org/biopax/84/48887#Protein16435",
"http://www.reactome.org/biopax/84/48887#Protein21245",
"http://www.reactome.org/biopax/84/48887#Protein24791",
"http://www.reactome.org/biopax/84/48887#Protein25270",
"http://www.reactome.org/biopax/84/48887#Protein25541",
"http://www.reactome.org/biopax/84/48887#Protein628",
"http://www.reactome.org/biopax/84/48887#Protein631",
"http://www.reactome.org/biopax/84/48887#Protein6966",
"http://www.reactome.org/biopax/84/48887#Protein6256",
"http://www.reactome.org/biopax/84/48887#Protein6265",
"http://www.reactome.org/biopax/84/48887#Protein6273",
"http://www.reactome.org/biopax/84/48887#Protein6281",
"http://www.reactome.org/biopax/84/48887#Protein6371",
"http://www.reactome.org/biopax/84/48887#Protein22913",
"http://www.reactome.org/biopax/84/48887#Protein22914",
"http://www.reactome.org/biopax/84/48887#Protein22915",
"http://www.reactome.org/biopax/84/48887#Protein23336",
"http://www.reactome.org/biopax/84/48887#Protein23337",
"http://www.reactome.org/biopax/84/48887#Protein23338",
"http://www.reactome.org/biopax/84/48887#Protein23340",
"http://www.reactome.org/biopax/84/48887#Protein23341",
"http://www.reactome.org/biopax/84/48887#Protein23342",
"http://www.reactome.org/biopax/84/48887#Protein23352",
"http://www.reactome.org/biopax/84/48887#Protein23353",
"http://www.reactome.org/biopax/84/48887#Protein23354",
"http://www.reactome.org/biopax/84/48887#Protein23356",
"http://www.reactome.org/biopax/84/48887#Protein23357",
"http://www.reactome.org/biopax/84/48887#Protein23358",
"http://www.reactome.org/biopax/84/48887#Protein23361",
"http://www.reactome.org/biopax/84/48887#Protein23362",
"http://www.reactome.org/biopax/84/48887#Protein23363",
"http://www.reactome.org/biopax/84/48887#Protein23365",
"http://www.reactome.org/biopax/84/48887#Protein23366",
"http://www.reactome.org/biopax/84/48887#Protein23367",
"http://www.reactome.org/biopax/84/48887#Protein29023",
"http://www.reactome.org/biopax/84/48887#Protein29033",
"http://www.reactome.org/biopax/84/48887#Protein29045",
"http://www.reactome.org/biopax/84/48887#Protein29054",
"http://www.reactome.org/biopax/84/48887#Protein29061",
"http://www.reactome.org/biopax/84/48887#Protein29069",
"http://www.reactome.org/biopax/84/48887#Protein24014",
"http://www.reactome.org/biopax/84/48887#Protein24029",
"http://www.reactome.org/biopax/84/48887#Protein30872",
"http://www.reactome.org/biopax/84/48887#Protein25523",
"http://www.reactome.org/biopax/84/48887#Protein23821",
"http://www.reactome.org/biopax/84/48887#Protein16636"] AS target_proteins
MATCH (p1:Protein), (p2:Protein), path =allShortestPaths((p1)-[r:component|controlled|controller|left|right*]-(p2))
WHERE p1.uri IN target_proteins AND p2.uri IN target_proteins AND p1.uri < p2.uri
RETURN  p1.uri, 
        p2.uri, 
        length(path), 
        size([n IN nodes(path) WHERE n:BiochemicalReaction]) AS nbBiochemicalReaction,
        [n IN nodes(path) WHERE n:BiochemicalReaction | n.uri] AS uriBR,
        size([n IN nodes(path) WHERE n:SmallMolecule]) AS nbSmallMolecule,
        [n IN nodes(path) WHERE n:SmallMolecule | n.uri] AS uriSM;
```