# Goals

- Read gene co-expression data from csv
- Add to given `neo4j` graph

In [1]:
import pandas as pd
import numpy as np
from py2neo import Graph, Node, Relationship

## Read

Read data from csv with `pandas`

In [2]:
df = pd.read_csv("~/Desktop/toy.csv")
df.head()

Unnamed: 0,gene_a,gene_b,correlation
0,M,t,0.763
1,k,n,0.956
2,r,x,0.248
3,I,q,0.97
4,H,a,0.269


## Process

Calculate all genes and build `py2neo` nodes

In [3]:
genes = set(df['gene_a']) | set(df['gene_b'])

gene_map = {}
for gene in genes:
    gene_map[gene] = Node("Gene", name=gene)

len(genes)

38

Convert all data to `py2neo` relationships

In [4]:
rels = []
for gene_a, gene_b, correlation in np.array(df):
    rel = Relationship(gene_map[gene_a], "TO", gene_map[gene_b], correlation=correlation)
    rels.append(rel)
    
len(rels)

138

## Write

Write all data to given `neo4j` graph

In [5]:
graph_url = "http://localhost:7474/db/data/"
graph = Graph(graph_url)

for node in gene_map.values():
    graph.create(node)
    
for rel in rels:
    graph.create(rel)