## Load modules

In [1]:
from py2neo import Graph, Node, Relationship

## Address of the graph
See http://localhost:7474/browser/ for the neo4j interface. 

In [2]:
graph = Graph(host="neo4j")

To load the graph from scratch, first run `import_neo4j_kegg_backbone.ipynb`.

## Babies

In [3]:
!head -n3 /home/jovyan/data/import/baby_entities.tsv

ID	NEC	NEC_DIAGNOSIS_DOL	BIRTH_AGE	FEEDING	DELIVERY	BIRTH_WEIGHT	INFECTION	INFECTION_DIAGNOSIS_DOL	SEX	ANTIBIOTIC_TREATMENT
3	No		26	Breast	C-section	822	No		F	
19	No		24	Combination	C-section	731	Yes	23-35//23-36//23-37//23-38//23-39//23-40//23-41//23-42//23-43//23-44//23-45//23-46//23-47//23-48//23-49//23-50//23-51//23-52//23-53//23-54//23-55//23-56	F	


In [4]:
query1 = '''USING PERIODIC COMMIT 10000
           LOAD CSV WITH HEADERS FROM  'file:///baby_entities.tsv' AS line FIELDTERMINATOR '\t'
           CREATE (p:BABY { id:line.ID, 
                            NAME:line.ID, 
                            NEC:line.NEC, 
                            NEC_DIAGNOSIS_DOL:line.NEC_DIAGNOSIS_DOL, 
                            BIRTH_AGE:line.BIRTH_AGE, 
                            FEEDING:line.FEEDING, 
                            DELIVERY:line.DELIVERY, 
                            BIRTH_WEIGHT:line.BIRTH_WEIGHT, 
                            INFECTION:line.INFECTION, 
                            INFECTION_DIAGNOSIS_DOL:split(line.INFECTION_DIAGNOSIS_DOL, "//"), 
                            SEX:line.SEX, 
                            ANTIBIOTIC_TREATMENT:split(line.ANTIBIOTIC_TREATMENT, "//")})'''
graph.run(query1)

<py2neo.graph.Cursor at 0x7f51a0329470>

In [5]:
query2 = '''CREATE CONSTRAINT ON (baby:BABY) ASSERT baby.id IS UNIQUE'''
graph.run(query2)

<py2neo.graph.Cursor at 0x7f51a0329c88>

## Samples

In [6]:
!head -n3 /home/jovyan/data/import/sample_entities.tsv

ID	DAY	GESTATION_WEEK	BABY	NUM_REPS
S_19.12.1	12	25	19	2
S_19.16.1	16	26	19	2


In [7]:
query3 = '''USING PERIODIC COMMIT 10000
           LOAD CSV WITH HEADERS FROM  'file:///sample_entities.tsv' AS line FIELDTERMINATOR '\t'
           CREATE (p:SAMPLE { id:line.ID, 
                              NAME:line.ID, 
                              BABY:line.BABY, 
                              DAY:line.DAY, 
                              GESTATION_WEEK:line.GESTATION_WEEK, 
                              NUM_REPS:line.NUM_REPS})'''
graph.run(query3)

<py2neo.graph.Cursor at 0x7f51a034a0f0>

In [8]:
query4 = '''CREATE CONSTRAINT ON (sample:SAMPLE) ASSERT sample.id IS UNIQUE'''
graph.run(query4)

<py2neo.graph.Cursor at 0x7f51a034a320>

### Link samples to babies

In [9]:
!head -n3 /home/jovyan/data/import/baby_sample_relationship.tsv

ID	BABY
S_19.12.1	19
S_19.16.1	19


In [10]:
query6 = '''USING PERIODIC COMMIT 500
            LOAD CSV WITH HEADERS FROM 'file:///baby_sample_relationship.tsv' AS line FIELDTERMINATOR '\t'
            MATCH (baby:BABY {id:line.BABY}), (sample:SAMPLE {id:line.ID})
            CREATE (baby)-[:SAMPLELINK]->(sample)'''
graph.run(query6)

<py2neo.graph.Cursor at 0x7f51a034a5f8>

## Proteins

In [11]:
!head -n3 /home/jovyan/data/import/protein_entities.tsv

ID	CLUSTER	SPECIES	GENUS	KO	TAXA
b003-d078_scaffold_1413_2	Cluster516100	"nan"		K18568	bacteria
70_007_scaffold_158_24	Cluster51958	"Enterococcus faecalis 2"	Enterococcus	K02470,K02622	bacteria


In [12]:
query7 = '''USING PERIODIC COMMIT 10000
           LOAD CSV WITH HEADERS FROM  'file:///protein_entities.tsv' AS line FIELDTERMINATOR '\t'
           CREATE (p:PROTEIN { id:line.ID, 
                               NAME:line.ID,
                               CLUSTER:line.CLUSTER, 
                               SPECIES:line.SPECIES, 
                               GENUS:line.GENUS,
                               KO:split(line.KO, ","),
                               TAXA:line.TAXA})'''
graph.run(query7)

<py2neo.graph.Cursor at 0x7f51a034a908>

In [13]:
query8 = '''CREATE CONSTRAINT ON (protein:PROTEIN) ASSERT protein.id IS UNIQUE'''
graph.run(query8)

<py2neo.graph.Cursor at 0x7f51a034ac18>

### Link proteins to samples

In [14]:
!head /home/jovyan/data/import/protein_aggregated_sample_relationship.tsv

SAMPLE	PROTEIN	Q_VALUE	NSAF
S_19.12.1	b019-d010_scaffold_0_100	0.0205924	8.8573779e-06
S_19.12.1	b019-d010_scaffold_0_104	0.015774700000000003	6.9799951e-06
S_19.12.1	b019-d010_scaffold_0_105	0.0278418	4.3565605999999985e-06
S_19.12.1	b019-d010_scaffold_0_106	0.021569	2.2143445e-06
S_19.12.1	b019-d010_scaffold_0_108	0.0219222	8.684831300000003e-06
S_19.12.1	b019-d010_scaffold_0_110	0.0156336	9.3203071e-06
S_19.12.1	b019-d010_scaffold_0_111	0.020296400000000003	1.4634183999999999e-05
S_19.12.1	b019-d010_scaffold_0_113	0.0127018	1.56794825e-05
S_19.12.1	b019-d010_scaffold_0_115	0.023001	3.9670981e-06


In [15]:
query9 = '''USING PERIODIC COMMIT 500
            LOAD CSV WITH HEADERS FROM 'file:///protein_aggregated_sample_relationship.tsv' AS line FIELDTERMINATOR '\t'
            MATCH (sample:SAMPLE {id:line.SAMPLE}), (protein:PROTEIN {id:line.PROTEIN})
            CREATE (sample)-[:PROTEINLINK  {Q_VALUE:line.Q_VALUE, NSAF:line.NSAF}]->(protein)'''
graph.run(query9)

<py2neo.graph.Cursor at 0x7f51a034af60>

### Link proteins to reactions

In [16]:
!head /home/jovyan/data/import/protein_reaction_relationship.tsv

PROTEIN	REACTION	KO
31_003_scaffold_0_1	R00465	K12972
31_003_scaffold_0_1	R01392	K12972
31_003_scaffold_0_1	R02527	K12972
31_003_scaffold_0_1	R01388	K12972
31_003_scaffold_0_103	R00485	K01424
31_003_scaffold_0_103	R06134	K01424
31_003_scaffold_0_103	R00256	K05597
31_003_scaffold_0_103	R06134	K05597
31_003_scaffold_0_103	R01579	K05597


In [17]:
query10 = '''USING PERIODIC COMMIT 500
            LOAD CSV WITH HEADERS FROM 'file:///protein_reaction_relationship.tsv' AS line FIELDTERMINATOR '\t'
            MATCH (protein:PROTEIN {id:line.PROTEIN}), (reaction:REACTION {id:line.REACTION})
            CREATE (protein)-[:FUNCTIONLINK  {KO:line.KO}]->(reaction)'''
graph.run(query10)

<py2neo.graph.Cursor at 0x7f51a0352390>