## Load modules

In [1]:
from py2neo import Graph, Node, Relationship

## Address of the graph
See http://localhost:7474/browser/ for the neo4j interface. 

In [2]:
graph = Graph(host="neo4j")

# Load graph
To load the graph from scratch, run the following cells. 

In [3]:
graph.delete_all()

## Load compounds

In [4]:
!head -n3 /home/jovyan/data/import/KEGG_compound_entities.tsv

ID	NAME	SYNONYMS	FORMULA	PATHWAY	BRITE_HIERARCHY
C00001	H2O	H2O,Water	H2O	map00190,map00195,map00710,map01100,map01120,map04918,map04924,map04962,map04964,map04966,map04970,map04971,map04972,map04976,map05014	
C00002	ATP	ATP,Adenosine 5'-triphosphate	C10H16N5O13P3	map00190,map00195,map00230,map00231,map00908,map01100,map01110,map01130,map03070,map04020,map04066,map04080,map04142,map04217,map04611,map04621,map04714,map04721,map04742,map04750,map04911,map04917,map04924,map04930,map05012,map05133,map05410	Ribonucleotides,Nucleotides,Nucleic acids,br08001,Coenzymes,Cofactors,Vitamins and Cofactors,br08001


In [5]:
query1 = '''USING PERIODIC COMMIT 10000
           LOAD CSV WITH HEADERS FROM  'file:///KEGG_compound_entities.tsv' AS line FIELDTERMINATOR '\t'
           CREATE (p:COMPOUND { id:line.ID, 
                                NAME:line.NAME, 
                                SYNONYMS:split(line.SYNONYMS, ","), 
                                FORMULA:line.FORMULA, 
                                PATHWAY:split(line.PATHWAY, ","), 
                                BRITE_HIERARCHY:split(line.BRITE_HIERARCHY, ",")})'''
graph.run(query1)

<py2neo.database.Cursor at 0x7f4220397550>

In [6]:
query2 = '''CREATE CONSTRAINT ON (compound:COMPOUND) ASSERT compound.id IS UNIQUE'''
graph.run(query2)

<py2neo.database.Cursor at 0x7f4211e71dd8>

# Load reactions

In [7]:
!head -n3 /home/jovyan/data/import/KEGG_reaction_entities.tsv

ID	NAME	SYNONYMS	NAME_EQUATION	EQUATION	EC_NUMBERS	PATHWAY
R00001	"polyphosphate polyphosphohydrolase"	polyphosphate polyphosphohydrolase	"Polyphosphate + n H2O <=> (n+1) Oligophosphate"	"C00404 + n C00001 <=> (n+1) C02174"	3.6.1.10	
R00002	"Reduced ferredoxin:dinitrogen oxidoreductase (ATP-hydrolysing)"	Reduced ferredoxin:dinitrogen oxidoreductase (ATP-hydrolysing)	"16 ATP + 16 H2O + 8 Reduced ferredoxin <=> 8 e- + 16 Orthophosphate + 16 ADP + 8 Oxidized ferredoxin"	"16 C00002 + 16 C00001 + 8 C00138 <=> 8 C05359 + 16 C00009 + 16 C00008 + 8 C00139"	1.18.6.1	


In [9]:
query3 = '''USING PERIODIC COMMIT 10000
           LOAD CSV WITH HEADERS FROM  'file:///KEGG_reaction_entities.tsv' AS line FIELDTERMINATOR '\t'
           CREATE (p:REACTION { id:line.ID, 
                                NAME:line.NAME, 
                                SYNONYMS:split(line.SYNONYMS, ","), 
                                EC_NUMBERS:split(line.EC_NUMBERS, ","), 
                                EQUATION:line.EQUATION, 
                                NAME_EQUATION:line.NAME_EQUATION, 
                                PATHWAY:split(line.PATHWAY, ",") 
                                })'''
graph.run(query3)

<py2neo.database.Cursor at 0x7f4211e911d0>

In [10]:
query4 = '''CREATE CONSTRAINT ON (reaction:REACTION) ASSERT reaction.id IS UNIQUE'''
graph.run(query4)

<py2neo.database.Cursor at 0x7f4210bae080>

# Load `reaction-compound` edges

## Products

In [11]:
!head -n3 /home/jovyan/data/import/KEGG_relationship_PRODUCT.tsv

rxnID	cpdID	STOICHIOMETRY
R00001	C02174	n+1
R00002	C05359	8


In [12]:
query5 = '''USING PERIODIC COMMIT 500
            LOAD CSV WITH HEADERS FROM 'file:///KEGG_relationship_PRODUCT.tsv' AS line FIELDTERMINATOR '\t'
            MATCH (reaction:REACTION {id:line.rxnID}), (compound:COMPOUND {id:line.cpdID})
            CREATE (reaction)-[:PRODUCT {STOICHIOMETRY:line.STOICHIOMETRY}]->(compound)'''
graph.run(query5)

<py2neo.database.Cursor at 0x7f4210bae6d8>

## Substrates

In [13]:
!head -n3 /home/jovyan/data/import/KEGG_relationship_SUBSTRATE.tsv

rxnID	cpdID	STOICHIOMETRY
R00001	C00404	1
R00001	C00001	n


In [14]:
query6 = '''USING PERIODIC COMMIT 500
            LOAD CSV WITH HEADERS FROM 'file:///KEGG_relationship_SUBSTRATE.tsv' AS line FIELDTERMINATOR '\t'
            MATCH (reaction:REACTION {id:line.rxnID}), (compound:COMPOUND {id:line.cpdID})
            CREATE (compound)-[:SUBSTRATE {STOICHIOMETRY:line.STOICHIOMETRY}]->(reaction)'''
graph.run(query6)

<py2neo.database.Cursor at 0x7f4211e71c88>