## Useful links
https://neo4j.com/developer/cypher-query-language/  
https://neo4j.com/developer/python/  
https://neo4j.com/docs/operations-manual/current/introduction/  
http://py2neo.org/v3/index.html  
###### Do example in neo4j browser
:play movie graph
:play northwind graph


Lotta links  
http://py2neo.org/v3/database.html  
http://neo4j.com/docs/developer-manual/current/get-started/cypher/importing-csv-files-with-cypher/  
https://neo4j.com/blog/bulk-data-import-neo4j-3-0/  
https://neo4j.com/developer/guide-import-csv/  
https://neo4j.com/docs/developer-manual/current/cypher/clauses/create/#create-create-a-relationship-and-set-properties  
https://neo4j.com/docs/developer-manual/current/cypher/clauses/match/  
https://neo4j.com/blog/importing-data-neo4j-via-csv/  
http://neo4j.com/docs/developer-manual/current/cypher/clauses/load-csv/

## Start and stop neo4j commands
sudo service neo4j start
sudo service neo4j stop

In [None]:
# do it in python?
import getpass
import os

password = getpass.getpass()
command = "sudo -S service neo4j start"
os.system('echo %s | %s' % (password, command))

## Create csv for neo4j-admin import

In [48]:
import json
import csv
import gzip
import time
import re

# choose which file to use
# fname = "data/reviews_Patio_Lawn_and_Garden_5.json.gz"
# fname = "data/reviews_Pet_Supplies_5.json.gz"
fname = "data/reviews_Health_and_Personal_Care_5.json.gz"

# create csv writers
csvPeople = open('data/people.csv', 'w')
ppl = csv.writer(csvPeople)
ppl.writerow(["reviewerID:ID", "name", ":LABEL"])

csvProducts = open('data/products.csv', 'w')
prod = csv.writer(csvProducts)
prod.writerow(["asin:ID", ":LABEL"])

csvReviews = open('data/reviews.csv', 'w')
rev = csv.writer(csvReviews)
rev.writerow([":START_ID", "score", "reviewText", "summary", "helpful0",
              "helpful1", "ts", ":END_ID", ":TYPE"])

# create sets for ensuring only unique items
pplSet = set()
prodSet = set()

# open gzip json and write
sttime = time.time() # time the process
count = 0
with gzip.open(fname, "r") as f:
    for line in f:
        ln = line.decode("ascii")
        d = json.loads(ln)
        
        # add person
        if d.get("reviewerID") not in pplSet:
            pplSet.add(d.get("reviewerID"))
            tempNm = d.get("reviewerName")
            if tempNm != None:
                tempNm = re.sub("\n", " ", tempNm)
                tempNm = tempNm.replace("\\", "")
                tempNm = tempNm.replace(",", "")
            ppl.writerow([d.get("reviewerID"), tempNm, "Person"])
        # add product (temporary - use metadata later)
        if d.get("asin") not in prodSet:
            prodSet.add(d.get("asin"))
            prod.writerow([d.get("asin"), "Product"])
        # add review
        tr = d.get("reviewText")
        tsu = d.get("summary")
        if tr != None:
            tr = re.sub("\n", " ", tr)
            tr = tr.replace("\\", "")
#             tr = tr.replace(",", "")
        if tsu != None:
            tsu = re.sub("\n", " ", tsu)
            tsu = tsu.replace("\\", "")
#             tsu = tsu.replace(",", "")
        rev.writerow([d.get("reviewerID"), d.get("overall"), tr, tsu,
                      d.get("helpful")[0], d.get("helpful")[1],
                      d.get("unixReviewTime"), d.get("asin"),
                      "Reviewed"])
        count += 1
print(count, time.time()-sttime)

csvPeople.close()
csvProducts.close()
csvReviews.close()

(346355, 24.498215913772583)


In [None]:
import json
import csv
employee_parsed = json.loads(employee_data)
emp_data = employee_parsed['employee_details']
# open a file for writing
employ_data = open('/tmp/EmployData.csv', 'w')
# create the csv writer object
csvwriter = csv.writer(employ_data)
count = 0
for emp in emp_data:
    if count == 0:
        header = emp.keys()
        csvwriter.writerow(header)
        count += 1
    csvwriter.writerow(emp.values())
employ_data.close()

### py2neo module

In [53]:
import json
import time
from py2neo import Graph, Path, Node, NodeSelector, Relationship
graph = Graph("http://neo4j:bigdata@localhost:7474/db/data/")
# graph.delete_all()
selector = NodeSelector(graph)
l = selector.select(label="Person")
list(l)

[]

In [None]:
# C:/home/danny/Repos/csci5980_graph_database/data/
sttime = time.time()
fname = "file:///Musical_Instruments_5.csv"
q1 = """
create constraint on (pe:Person) assert pe.id is unique;
"""

q2 = """
create constraint on (pr:Product) assert pr.id is unique;
"""

q3 = """
using periodic commit
load csv with headers from "%s" as row
merge(person:Person {id:row.reviewerID})
on create set person.name = row.reviewerName;
""" % fname

q4 = """
using periodic commit
load csv with headers from "%s" as row
merge(product:Product {id:row.asin});
""" % fname

q5 = """
using periodic commit
load csv with headers from "%s" as row
match (person:Person {id:row.reviewerID})
match (product:Product {id:row.asin})
merge (person)-[:Reviewed {ts:row.unixReviewTime, reviewText:row.reviewText, score:row.overall, summary:row.summary}]->(product);
""" % fname

graph.run(q1)
graph.run(q2)

graph.run(q3)

graph.run(q4)

graph.run(q5)
print("Done", time.time()-sttime)

In [None]:
# C:/home/danny/Repos/csci5980_graph_database/data/
sttime = time.time()
fname = "file:///Video_Games_5.csv"
query = """
load csv with headers from "%s" as row

merge(person:Person {id:row.reviewerID})
on create set person.name = row.reviewerName

merge(product:Product {id:row.asin})

merge (person)-[:Reviewed {ts:row.unixReviewTime, reviewText:row.reviewText, score:row.overall, summary:row.summary}]->(product)
""" % fname
graph.run(query)
print("Done", time.time()-sttime)

In [None]:
# the slow python way of importing everything
def createReview(line):
    d = json.loads(line)
    per = Node("person", id=d.get("reviewerID"))
    per["name"] = d.get("reviewerName")

    prod = Node("product", asin=d.get("asin"))

    rev = Relationship(per, "reviewed", prod)
    rev["ts"] = d.get("unixReviewTime")
    rev["reviewText"] = d.get("reviewText")
    rev["score"] = d.get("overall")
    rev["summary"] = d.get("summary")
    return rev

fname = "data/Musical_Instruments_5.json"
sttime = time.time()
with open(fname, "r") as revFile:
    count = 0
    tx = graph.begin()
    for line in revFile:
        rev = createReview(line)
        tx.merge(rev)
        tx.commit()
        tx = graph.begin()
        
        count += 1
        if count % 1000 == 0:
            print(count, time.time()-sttime)
        if count % 10000 == 0:
            print(count, time.time()-sttime)
            tx.commit()
            tx = graph.begin()
    if count % 10000 != 0:
        tx.commit()
print("Done", time.time()-sttime)

sttime = time.time()
with open(fname, "r") as revFile:
    count = 0
    revList = []
    for line in revFile:
        rev = createReview(line)
        revList.append(rev)
        count += 1
        if count % 1000 == 0:
            print(count, time.time()-sttime)
        if count % 10000 == 0:
            tx = graph.begin()
            for rev in revList:
                tx.merge(rev)
            print("Before commit", count, time.time()-sttime)
            tx.commit()
            print("After commit", count, time.time()-sttime)
            revList = []
    if len(revList) > 0:
        tx = graph.begin()
        for rev in revList:
            tx.merge(rev)
        print("Before commit", count, time.time()-sttime)
        tx.commit()
        revList = []
print("Done", time.time()-sttime)

In [None]:
# simple example of adding nodes
from py2neo import Graph, Path, Node, NodeSelector, Relationship
graph = Graph("http://neo4j:bigdata@localhost:7474/db/data/")
graph.delete_all()

for name in ["alex", "bob", "carol"]:
    per = Node("person", name=name)
    graph.create(per)
Sir_Daniel = Node("person", name="Sir Daniel Jon Hanson III", title="Archduke of Stuffington")
graph.create(Sir_Daniel)

selector = NodeSelector(graph)
people = selector.select("person")
for person in people:
    print(person)
    if person.get("name") != Sir_Daniel.get("name"):
        rel = Relationship(person, "pays_taxes_to", Sir_Daniel)
        print(rel)
        graph.create(rel)

### official neo4j module

In [None]:
from neo4j.v1 import GraphDatabase, basic_auth

driver = GraphDatabase.driver("bolt://localhost:7687", auth=basic_auth("neo4j", "bigdata"))
session = driver.session()

session.run("CREATE (a:Person {name: {name}, title: {title}})",
          {"name": "Arthur", "title": "King"})

result = session.run("MATCH (a:Person) WHERE a.name = {name} "
                   "RETURN a.name AS name, a.title AS title",
                   {"name": "Arthur"})
for record in result:
    print("%s %s" % (record["title"], record["name"]))

session.close()