In [77]:
pip install neo4j-driver

Note: you may need to restart the kernel to use updated packages.


In [78]:
from neo4j import GraphDatabase, basic_auth

In [79]:
import pandas as pd

### Documentation
1. For documentation on the python driver being used here _neo4j_ , refer https://neo4j.com/docs/api/python-driver/current/# .
1. There is another popular python driver for Neo4j called _py2neo_. You can find it at https://py2neo.org/2021.1/
1. General Link to python drivers for Neo4j : https://neo4j.com/developer/python/

In [81]:
#from neo4j import GraphDatabase, basic_auth

driver = GraphDatabase.driver(
  "bolt://54.172.10.220:7687",
  auth=basic_auth("neo4j", "analysis-definition-millimeter"))
   

### Delete the Node type and relationship if it exists
Refer: https://neo4j.com/docs/cypher-manual/current/clauses/delete/

In [82]:
with driver.session(database="neo4j") as session:
    result = session.run("""
       MATCH (u:User) DETACH DELETE u
    """)    

### Create the Node type _User_ and _relationships_
Refer: https://neo4j.com/docs/cypher-manual/current/clauses/create/

In [83]:
with driver.session(database="neo4j") as session:
    result = session.run("""
        CREATE
          (nAlice:User {name: 'Alice', seed: 42}),
          (nBridget:User {name: 'Bridget', seed: 42}),
          (nCharles:User {name: 'Charles', seed: 42}),
          (nDoug:User {name: 'Doug'}),
          (nMark:User {name: 'Mark'}),
          (nMichael:User {name: 'Michael'}),

          (nAlice)-[:LINK {weight: 1}]->(nBridget),
          (nAlice)-[:LINK {weight: 1}]->(nCharles),
          (nCharles)-[:LINK {weight: 1}]->(nBridget),

          (nAlice)-[:LINK {weight: 5}]->(nDoug),

          (nMark)-[:LINK {weight: 1}]->(nDoug),
          (nMark)-[:LINK {weight: 1}]->(nMichael),
          (nMichael)-[:LINK {weight: 1}]->(nMark);

    """)

### Drop Graph catalog (in memory projection) if exists
Refer: https://neo4j.com/docs/graph-data-science/current/graph-drop/

In [84]:
with driver.session(database="neo4j") as session:
    result = session.run("""
        CALL gds.graph.drop('myCommunity',false) 
        YIELD graphName, nodeCount, relationshipCount
        RETURN graphName, nodeCount, relationshipCount  
    """)
    
    graphdrop_df = pd.DataFrame([dict(record) for record in result])
graphdrop_df

Unnamed: 0,graphName,nodeCount,relationshipCount
0,myCommunity,6,14


### Create in memory Graph Projection or Graph Catalog
Refer: https://neo4j.com/docs/graph-data-science/current/graph-create/

In [85]:
with driver.session(database="neo4j") as session:
    result = session.run("""
        CALL gds.graph.create(
        'myCommunity',
        'User',
        {
            LINK: {
                orientation: 'UNDIRECTED'
            }
        },
        {
            nodeProperties: 'seed',
            relationshipProperties: 'weight'
        }
        )
        YIELD graphName, nodeCount, relationshipCount
        RETURN graphName, nodeCount, relationshipCount  

    """)
    
    graphcreate_df = pd.DataFrame([dict(record) for record in result])
graphcreate_df

Unnamed: 0,graphName,nodeCount,relationshipCount
0,myCommunity,6,14


### Run Community Detection
1. Using Louvain
1. Returns communityId - community id of the node when all iterations are done
1. Returns intermediateCommunityIds -  Louvain is a hierarchical clustering algorithm. That means that after every clustering step all nodes that belong to the same cluster are reduced to a single node. Relationships between nodes of the same cluster become self-relationships, relationships to nodes of other clusters connect to the clusters representative. This condensed graph is then used to run the next level of clustering. The process is repeated until the clusters are stable. And community id detected at each step _except for the final step_ is stored in this column
1. Refer: https://neo4j.com/docs/graph-data-science/current/algorithms/louvain/

In [86]:
with driver.session(database="neo4j") as session:
    result = session.run("""
        CALL gds.louvain.stream('myCommunity',{includeIntermediateCommunities: true}) 
        YIELD nodeId, communityId, intermediateCommunityIds
        RETURN gds.util.asNode(nodeId).name AS name, communityId, intermediateCommunityIds
        ORDER BY name ASC
    """)
    
    community_df = pd.DataFrame([dict(record) for record in result])
community_df    


Unnamed: 0,name,communityId,intermediateCommunityIds
0,Alice,2,[2]
1,Bridget,2,[2]
2,Charles,2,[2]
3,Doug,5,[5]
4,Mark,5,[5]
5,Michael,5,[5]


In [87]:
n= community_df.communityId.nunique()
print("Number of Unique Communities detected: ",n)

Number of Unique Communities detected:  2
