# Neo4j - building a Neo4j graph

## Neo4j web interface [neo4j](http://127.0.0.1:7474)

In [None]:
import warnings
warnings.simplefilter("ignore")
warnings.filterwarnings("ignore", "DeprecationWarning")
warnings.filterwarnings("ignore", "SAWarning")

In [None]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', 512)
pd.set_option('display.max_colwidth', 512)
pd.set_option('display.max_rows', 512)
pd.set_option('display.width', 1024)
pd.set_option('display.max_info_rows', 512)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('mode.chained_assignment', 'warn')
pd.set_option('precision', 2)
pd.set_option('float_format', '{:6.2f}'.format)
pd.set_option('display.notebook_repr_html', True)

In [None]:
import datetime
import string

In [None]:
from tqdm import *

# Load customer data using pandas read csv feature

In [None]:
pd_df = pd.read_csv("/media/sf_mnlytics/data/fake_customers_100.csv.gz",sep='\t',dtype={'zipcode':np.str})

In [None]:
pd_df.info()

## apply a group by operation to get all the 'first_name', 'last_name' combinations.

In [None]:
edges_a = pd_df.groupby(['first_name','last_name']).size()
edges_a.head(10)

## Connect to Neo4j graph DB server

In [None]:
import py2neo
from py2neo import *
graph = Graph(host="localhost", user="neo4j", password="jupyter")

## Empty the graph DB 

In [None]:
graph.delete_all()

## Select only the first 200 combinations [make load fast for the workshop]

In [None]:
eds = edges_a[:200]

## Loop to load Neo4j DB in transcational mode

In [None]:
for edge in tqdm_notebook(eds.iteritems(),total=len(eds)):
    # ------------------------------------------------------------
    f = Node("firstName",name=edge[0][0],key=edge[0][0])
    l = Node("LastName",name=edge[0][1],key=edge[0][1])
    # ------------------------------------------------------------
    f1 = graph.find_one('firstName', 'key', f.get('key'))
    l1 = graph.find_one('LastName', 'key', l.get('key'))
    # ------------------------------------------------------------
    tx = graph.begin()
    if (f1 == None):
        tx.create(f)
    if (l1 == None):
        tx.create(l)
    tx.commit()
    tx.finished()
    # ------------------------------------------------------------
    f2 = graph.find_one('firstName', 'key', f.get('key'))
    l2 = graph.find_one('LastName', 'key', l.get('key'))
    # ------------------------------------------------------------
    r = Relationship(f2, "connect_to_last_name", l2)
    # ------------------------------------------------------------
    tx = graph.begin()
    if (graph.exists(r) == False):
        tx.create(r)
    tx.commit()
    tx.finished()

# Another jupyter magic!

In [None]:
%load_ext cypher

## Query Neo4j using cypher language!

In [None]:
%%cypher  http://neo4j:jupyter@localhost:7474/db/data
MATCH p=()-[r:connect_to_last_name]->() RETURN p LIMIT 5

## Query Neo4j and get the result in a pandas dataframe

In [None]:
results = %cypher MATCH (a)-[r]-(b) RETURN a,r,b
results.get_dataframe().head(5)

# Neo4j to python NetworkX

In [None]:
import networkx as nx
import matplotlib.pyplot as plt

from networkx import *
from networkx.drawing.nx_agraph import *

import pygraphviz as pgv
from pygraphviz import *

In [None]:
G = to_agraph(results.get_graph(directed=False))

In [None]:
G.graph_attr['fontsize'] = 24
G.graph_attr['ranksep '] = 2.0
G.graph_attr['nodesep  '] = 2.0

G.node_attr['fontsize'] = 24
G.node_attr['width'] = 1.0
G.node_attr['height'] = 1.0
G.node_attr['style'] = 'filled'

G.edge_attr['fontsize'] = 24
G.edge_attr['weight'] = 1.0
G.edge_attr['color']= 'black'

## Fix node properties

In [None]:
for n in G.nodes():
    n.attr['label'] = n.attr['name']
    if 'firstName' in n.attr['labels']:
        n.attr['color'] = 'green'
        n.attr['fillcolor'] = 'PaleGreen'        
    if 'LastName' in n.attr['labels']:
        n.attr['color'] = 'blue'
        n.attr['fillcolor'] = 'LightBlue'        
    
G.layout(prog="dot")
G.write(path="/media/sf_mnlytics/data/dot.txt")    

# More Jupyter Magic!!!

In [None]:
%%dot /media/sf_mnlytics/data/dot.txt
#