In [None]:
!pip install neo4j
!pip install networkx
!pip install graphdatascience

# Graph Data Science

## Configure connection

In [None]:
from graphdatascience import GraphDataScience

# configure connection
gds = GraphDataScience("neo4j://localhost:7687", auth=("neo4j", "BDII2023"))
print(f"GDS version: {gds.version()}")

## Link prediction
Can be executed without a projection

In [None]:
query = """
MATCH (u:User {id: $name}) WITH u
MATCH (b:Beer) WHERE not (u)-[:LIKED]->(b)
RETURN b.name as beer, gds.alpha.linkprediction.preferentialAttachment(u, b, {relationshipQuery: "LIKED"}) AS score
ORDER BY score DESC LIMIT 10
"""

df_recommendation = gds.run_cypher(query, params={"name": "Gyle41386"})
df_recommendation

## Pagerank on beer projection

### Cypher query

In [None]:
"""
CALL gds.graph.project.cypher(
    "coliked_beers_cypher",
    "MATCH (b1:Beer)<-[:LIKED]-(u:User)-[:LIKED]->(b2:Beer) WITH b1, b2, count(DISTINCT u) as liked WHERE liked > 3 RETURN DISTINCT id(b1) as id",
    "MATCH (b1:Beer)<-[:LIKED]-(u:User)-[:LIKED]->(b2:Beer) WITH b1, b2, count(distinct u) as liked WHERE liked > 3 RETURN id(b1) as source, id(b2) as target, (liked - 3) as weight"
)
"""
"""
MATCH (:User {id: $name})-[:LIKED]->(b:Beer) 
WITH collect(id(b)) as sources
CALL gds.pageRank.stream(
    "coliked_beers_cypher",
    {
        sourceNodes: sources, 
        relationshipWeightProperty: "weight"
    }
) YIELD nodeId, score WHERE score > 0 AND NOT(nodeId IN sources)
RETURN gds.util.asNode(nodeId).name, score
ORDER BY score DESC LIMIT 10
"""

### GDS Python client

In [None]:
node_query = """
    MATCH (b1:Beer)<-[:LIKED]-(u:User)-[:LIKED]->(b2:Beer) 
    WITH b1, b2, count(DISTINCT u) as liked WHERE liked > 3 
    RETURN DISTINCT id(b1) as id
"""
edge_query = """
    MATCH (b1:Beer)<-[:LIKED]-(u:User)-[:LIKED]->(b2:Beer) 
    WITH b1, b2, count(distinct u) as liked WHERE liked > 3 
    RETURN id(b1) as source, id(b2) as target, (liked - 3) as weight
"""

with gds.graph.project.cypher("coliked_beers_temp", node_query, edge_query) as g_temp:
    q_source = """
        MATCH (:User {id: $name})-[:LIKED]->(b:Beer) RETURN collect(id(b)) as sources
    """
    sources = gds.run_cypher(q_source,params={"name": "Gyle41386"}).sources[0]
    result = gds.pageRank.stream(g_temp, sourceNodes=sources, relationshipWeightProperty="weight")
    result = result.query("score > 0")

In [None]:
nodes = result.nodeId.to_list()
q = """
MATCH (:User {id: $name})-[:LIKED]->(b:Beer) WITH collect(b) as sources
MATCH (b:Beer) WHERE id(b) IN $nodes AND not(b in sources)
RETURN id(b) AS nodeId, b.name AS beer
"""
df = gds.run_cypher(q, params={"name": "Gyle41386","nodes": nodes})

result.join(df.set_index("nodeId"), on="nodeId").dropna().sort_values("score", ascending=False).head(10)

## Jaccard similarity on beer projection

### Cypher query

In [None]:
"""
MATCH (:User {id: $name})-[:LIKED]->(b:Beer) 
WITH collect(id(b)) as sources
CALL gds.nodeSimilarity.filtered.stream(
  'coliked_beers_cypher', 
  {
    sourceNodeFilter:sources,
    similarityMetric: 'JACCARD', 
    topK:10
  }
)
YIELD node1, node2, similarity WHERE similarity > 0 AND NOT(node2 IN sources)
WITH node2, max(similarity) as similarity
RETURN gds.util.asNode(node2).name, similarity
ORDER BY similarity DESC LIMIT 10
"""

### GDS Python client

In [None]:
node_query = """
    MATCH (b1:Beer)<-[:LIKED]-(u:User)-[:LIKED]->(b2:Beer) 
    WITH b1, b2, count(DISTINCT u) as liked WHERE liked > 3 
    RETURN DISTINCT id(b1) as id
"""
edge_query = """
    MATCH (b1:Beer)<-[:LIKED]-(u:User)-[:LIKED]->(b2:Beer) 
    WITH b1, b2, count(distinct u) as liked WHERE liked > 3 
    RETURN id(b1) as source, id(b2) as target, (liked - 3) as weight
"""

with gds.graph.project.cypher("coliked_beers_temp", node_query, edge_query) as g_temp:
    q_source = """
        MATCH (:User {id: $name})-[:LIKED]->(b:Beer) RETURN collect(id(b)) as sources
    """
    sources = gds.run_cypher(q_source,params={"name": "Gyle41386"}).sources[0]
    result = gds.nodeSimilarity.filtered.stream(g_temp, sourceNodeFilter=sources, similarityMetric="JACCARD", topK=10)
    result = result.query("similarity > 0")
    result = result.groupby("node2").similarity.max().reset_index().rename(columns={"node2": "nodeId", "similarity": "score"})

In [None]:
nodes = result.nodeId.to_list()
q = """
MATCH (:User {id: $name})-[:LIKED]->(b:Beer) WITH collect(b) as sources
MATCH (b:Beer) WHERE id(b) IN $nodes AND not(b in sources)
RETURN id(b) AS nodeId, b.name AS beer
"""
df = gds.run_cypher(q, params={"name": "Gyle41386","nodes": nodes})

result.join(df.set_index("nodeId"), on="nodeId").dropna().sort_values("score", ascending=False).head(10)

## Communities in beer projection

### Create links

In [None]:
q = """
    MATCH (b1:Beer)<-[:LIKED]-(u:User)-[:LIKED]->(b2:Beer) 
    WITH b1, b2, count(distinct u) as liked WHERE liked > 3 
    MERGE (b1)-[:COLIKED {weight: liked - 3}]-(b2)
"""
gds.run_cypher(q)

### Calculate communities

In [None]:
with gds.graph.project("coliked_beers_temp", "Beer", {"COLIKED": {"orientation": "UNDIRECTED", "properties": "weight"}}) as g_temp:
    results = gds.leiden.stats(g_temp, relationshipWeightProperty="weight")
    print(results)
    gds.leiden.write(g_temp, relationshipWeightProperty="weight", writeProperty="leiden_cid")

### Clean spurios communities

In [None]:
q = """
MATCH (b:Beer) WITH b.leiden_cid AS leiden_cid, count(*) AS count WHERE count > 1
WITH collect(leiden_cid) as leiden_cids
MATCH (b:Beer) WHERE NOT(b.leiden_cid IN leiden_cids)
SET b.leiden_cid = -1
"""
gds.run_cypher(q)

### Recommend beer on the same community

In [None]:
query = """
MATCH (:User {id: $name})-[:LIKED]->(b1:Beer) 
WITH collect(b1) as source
MATCH (b2:Beer) WHERE not(b2 in source) AND b2.leiden_cid >= 0
UNWIND source AS b1
WITH b2, gds.alpha.linkprediction.sameCommunity(b1, b2, "leiden_cid") AS score WHERE score > 0
RETURN DISTINCT b2.name as beer
"""
gds.run_cypher(query, params={"name": "Gyle41386"})

## Embeddings in beer projection

### Cypher query

In [None]:
"""
CALL gds.graph.project(
    "coliked_beers_attributes",
    {Beer: {properties: ["style_embedding"]}},
    {COLIKED: {orientation: "UNDIRECTED", properties: "weight"}}
)
"""
"""
CALL gds.node2vec.mutate(
'coliked_beers_attributes',
{
  embeddingDimension: 64,
  relationshipWeightProperty: "weight",
  mutateProperty: 'node2vec_embedding'
})
YIELD nodePropertiesWritten
"""
"""
CALL gds.fastRP.mutate(
'coliked_beers_attributes',
{
  embeddingDimension: 64,
  propertyRatio: 0.31,
  featureProperties: ['style_embedding'],
  relationshipWeightProperty: 'weight',
  mutateProperty: 'fastrp_embedding'
})
YIELD nodePropertiesWritten
"""
"""
MATCH (u:User {id: $name})-[:LIKED]->(b:Beer) WITH collect(b) as sources
CALL gds.knn.filtered.stream('coliked_beers_attributes', {
  topK: 10,
  nodeProperties: {fastrp_embedding:'COSINE'},
  sourceNodeFilter: sources
})
YIELD node1, node2, similarity WHERE similarity > 0 AND NOT(node2 IN sources)
WITH node2, max(similarity) as similarity
RETURN gds.util.asNode(node2).name, similarity
ORDER BY similarity DESC LIMIT 10
"""

### GDS Python client

In [None]:
projection_name = "coliked_beers_attributes"

if gds.graph.exists(projection_name)["exists"]:
    g_projection = gds.graph.get(projection_name)
else:
    g_projection, result = gds.graph.project(
        projection_name, 
        {"Beer": {"properties": ["style_embedding"]}}, 
        {"COLIKED": {"orientation": "UNDIRECTED", "properties": "weight"}}
    )
    print(f"Projection created:\n {result}")
    gds.node2vec.mutate(g_projection, embeddingDimension=64, relationshipWeightProperty="weight", mutateProperty="node2vec_embedding")
    gds.fastRP.mutate(g_projection, embeddingDimension=64, propertyRatio=0.31, featureProperties=["style_embedding"], relationshipWeightProperty="weight", mutateProperty="fastrp_embedding")

In [None]:
q_source = """
    MATCH (:User {id: $name})-[:LIKED]->(b:Beer) RETURN collect(id(b)) as sources
"""
sources = gds.run_cypher(q_source,params={"name": "Gyle41386"}).sources[0]
result = gds.knn.filtered.stream(g_projection, topK=10, nodeProperties={"node2vec_embedding": "EUCLIDEAN"}, sourceNodeFilter=sources)
result = result.query("similarity > 0")
result = result.groupby("node2").similarity.max().reset_index().rename(columns={"node2": "nodeId", "similarity": "score"})

nodes = result.nodeId.to_list()
q = """
MATCH (:User {id: $name})-[:LIKED]->(b:Beer) WITH collect(b) as sources
MATCH (b:Beer) WHERE id(b) IN $nodes AND not(b in sources)
RETURN id(b) AS nodeId, b.name AS beer
"""
df = gds.run_cypher(q, params={"name": "Gyle41386","nodes": nodes})

result.join(df.set_index("nodeId"), on="nodeId").dropna().sort_values("score", ascending=False).head(10)