In [28]:
! pip install ipython-cypher neo4j py2neo yfiles_jupyter_graphs graphdatascience pandas --quiet

In [7]:
import os
%load_ext dotenv
%dotenv

DATASET_PATH=os.getenv("DATASET_PATH")
JSON_DATASET_PATH=os.getenv("JSON_DATASET_PATH")
POSTGRESDB_URL=os.getenv("POSTGRESDB_URL")
HASURA_RESTAPI_HOST = os.getenv("HASURA_RESTAPI_HOST")
NEO4J_HOST=os.getenv("NEO4J_HOST")
NEO4J_USER=os.getenv("NEO4J_USER")
NEO4J_PASSWORD=os.getenv("NEO4J_PASSWORD")

The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


In [8]:
from neo4j import GraphDatabase

uri = "bolt://{host}:7687".format(host=NEO4J_HOST)

with GraphDatabase.driver(uri, auth=(NEO4J_USER, NEO4J_PASSWORD)) as driver: 
    driver.verify_connectivity() 
    session = driver.session(database='')


  driver.verify_connectivity()


In [9]:
def cypher_run(cypher):
    result = session.run(cypher)
    return result.to_df()

def cypher_run_g(cypher):
    result = session.run(cypher)
    return result.graph()

## Find Popular Products

In [10]:
from yfiles_jupyter_graphs import GraphWidget

query = """
MATCH (c:Customer)-[r1:PURCHASED]->(o:Order)-[r2:PRODUCT]->(p:Product)
RETURN c,r1,o,r2,p
limit 5
"""
w = GraphWidget(graph = cypher_run_g(query))
w.show()




GraphWidget(layout=Layout(height='500px', width='100%'))

## Content Based Recommendations

In [11]:
query = """

MATCH (c:Customer)-[r1:PURCHASED]->(o:Order)-[r2:PRODUCT]->(p:Product)
<-[:PRODUCT]-(o2:Order)-[:PRODUCT]->(p2:Product)-[:PARTOF]->(cat:Category)<-[:PARTOF]-(p)
where c.customerID="65" and NOT( (c)-[:PURCHASED]->(:Order)-[:PRODUCT]->(p2) )
return c , p , p2 ,r1,r2,cat
limit 25
"""
w = GraphWidget(graph = cypher_run_g(query))
w.show()

GraphWidget(layout=Layout(height='500px', width='100%'))

# Using Graph Data Science

In [12]:
query = """
MATCH (n:Customer)-[r:RATED]->(n2:Product) 
RETURN n.customerID, r.rating, n2.productID
ORDER BY r.rating DESC
LIMIT 5
"""

cypher_run(query)


  return result.to_df()


Unnamed: 0,n.customerID,r.rating,n2.productID
0,27,1.0,26
1,3,1.0,11
2,47,1.0,3
3,54,0.5,14
4,11,0.5,3


In [16]:
query = """

CALL gds.graph.project(
    'northwindGraph1',
    ['Customer', 'Product'],
    {
        RATED: {
            properties: {
                rating: {
                    property: 'rating',
                    defaultValue: 1.0
                }
            }
        }
    }
);

"""

cypher_run(query)

  return result.to_df()


Unnamed: 0,nodeProjection,relationshipProjection,graphName,nodeCount,relationshipCount,projectMillis
0,"{'Customer': {'label': 'Customer', 'properties...","{'RATED': {'orientation': 'NATURAL', 'indexInv...",northwindGraph1,168,484,1386


In [17]:
query = """

CALL gds.nodeSimilarity.write.estimate('northwindGraph1', {
  writeRelationshipType: 'SIMILAR',
  writeProperty: 'score'
})
YIELD nodeCount, relationshipCount, bytesMin, bytesMax, requiredMemory

"""

cypher_run(query)

  return result.to_df()


Unnamed: 0,nodeCount,relationshipCount,bytesMin,bytesMax,requiredMemory
0,168,484,53872,60592,[52 KiB ... 59 KiB]


In [18]:
query = """
CALL gds.nodeSimilarity.stream('northwindGraph1')
YIELD node1, node2, similarity
RETURN gds.util.asNode(node1).customerID AS customer1, gds.util.asNode(node2).customerID AS customer2, similarity
ORDER BY similarity DESCENDING, customer1, customer2
"""

cypher_run(query)

  return result.to_df()


Unnamed: 0,customer1,customer2,similarity
0,11,47,0.500000
1,47,11,0.500000
2,56,59,0.400000
3,59,56,0.400000
4,13,54,0.333333
...,...,...,...
714,54,71,0.071429
715,11,51,0.066667
716,3,20,0.032258
717,11,20,0.031250


In [19]:
query = """
CALL gds.nodeSimilarity.write('northwindGraph1', {
    writeRelationshipType: 'SIMILAR',
    writeProperty: 'score'
})
YIELD nodesCompared, relationshipsWritten
"""

cypher_run(query)

  return result.to_df()


Unnamed: 0,nodesCompared,relationshipsWritten
0,74,719


In [22]:
query = """MATCH p=()-[r:SIMILAR]->() RETURN p LIMIT 25"""

w = GraphWidget(graph = cypher_run_g(query))
w.show()

GraphWidget(layout=Layout(height='500px', width='100%'))

In [23]:
query = """
MATCH (c1:Customer)-[r:SIMILAR]->(c2:Customer) 
RETURN c1.customerID,r.score,c2.customerID 
ORDER BY r.score DESCENDING 
LIMIT 10
"""
cypher_run(query)

  return result.to_df()


Unnamed: 0,c1.customerID,r.score,c2.customerID
0,47,0.5,11
1,11,0.5,47
2,56,0.4,59
3,59,0.4,56
4,54,0.333333,13
5,2,0.333333,18
6,83,0.333333,28
7,56,0.333333,19
8,56,0.333333,3
9,18,0.333333,2


In [24]:
query = """
MATCH (c1:Customer {customerID:'47'})-[r:SIMILAR]->(c2:Customer) 
RETURN c1.customerID,r.score,c2.customerID 
ORDER BY r.score DESCENDING 
"""
cypher_run(query)


  return result.to_df()


Unnamed: 0,c1.customerID,r.score,c2.customerID
0,47,0.5,11


In [25]:
query = """
MATCH (c1:Customer {customerID:'47'})-->(p1:Product)
WITH collect(p1) as products
MATCH (c2:Customer {customerID:'11'})-->(p2:Product)
WHERE not p2 in products
RETURN p2.productName as recommendation
"""
cypher_run(query)

  return result.to_df()


Unnamed: 0,recommendation
0,Wimmers gute Semmelknödel
