In [2]:
import neo4j

import csv

import math
import numpy as np
import pandas as pd

import psycopg2

In [3]:
driver = neo4j.GraphDatabase.driver(uri="neo4j://neo4j:7687", auth=("neo4j","ucb_mids_w205"))

In [4]:
session = driver.session(database="neo4j")

In [5]:
def my_neo4j_run_query_pandas(query, **kwargs):
    "run a query and return the results in a pandas dataframe"
    
    result = session.run(query, **kwargs)
    
    df = pd.DataFrame([r.values() for r in result], columns=result.keys())
    
    return df

In [6]:
def my_neo4j_wipe_out_mst_relationships():
    "wipe out mst relationships"
    
    query = "match (node)-[relationship:MST]->() delete relationship"
    session.run(query)

### Minimum Spanning Tree

In [7]:
my_neo4j_wipe_out_mst_relationships()

In [20]:
query = "CALL gds.graph.drop('ds_graph', false)"
session.run(query)

query = """

CALL gds.graph.project('ds_graph', 'Station', 
                        {
                            LINK: {
                                properties: 'weight',
                                orientation: 'UNDIRECTED'
                            }
                        }
                       )

"""

session.run(query)

<neo4j._sync.work.result.Result at 0x7f64e493ee50>

In [21]:
query = """

MATCH (n:Station {name: $source})
CALL gds.beta.spanningTree.write('ds_graph',
                                          {sourceNode: id(n),
                                           relationshipWeightProperty: 'weight',
                                           writeProperty: 'writeCost',
                                           writeRelationshipType: 'MST'
                                          }
                                         )
YIELD preProcessingMillis, computeMillis, writeMillis, effectiveNodeCount
RETURN preProcessingMillis, computeMillis, writeMillis, effectiveNodeCount;

"""

source = "Dublin"

my_neo4j_run_query_pandas(query, source=source)

Unnamed: 0,preProcessingMillis,computeMillis,writeMillis,effectiveNodeCount


In [22]:
query = """

MATCH path = (n:Station {name: $source})-[:MST*]-()
WITH relationships(path) AS rels
UNWIND rels AS rel
WITH DISTINCT rel AS rel
RETURN startNode(rel).name AS source, endNode(rel).name AS destination, rel.writeCost AS cost

"""

source = "Dublin"

my_neo4j_run_query_pandas(query, source=source)

Unnamed: 0,source,destination,cost


### Louvain Modularity

In [8]:
query = "CALL gds.graph.drop('ds_graph', false)"
session.run(query)

query = """

CALL gds.graph.project('ds_graph', 'Station', 'LINK', 
                      {relationshipProperties: 'weight'})
"""

session.run(query)

<neo4j._sync.work.result.Result at 0x7f84d9ba3820>

In [35]:
query = """

CALL gds.louvain.stream('ds_graph', { relationshipWeightProperty: 'weight' })
YIELD nodeId, communityId, intermediateCommunityIds
RETURN gds.util.asNode(nodeId).name AS name, communityId as community
ORDER BY community, name ASC

"""

my_neo4j_run_query_pandas(query)


Unnamed: 0,name,community
0,blue Montgomery Street,0
1,blue Powell Street,0
2,green Montgomery Street,0
3,green Powell Street,0
4,red Montgomery Street,0
...,...,...
209,orange Hayward,211
210,orange Milpitas,211
211,orange South Hayward,211
212,orange Union City,211


In [36]:
df = my_neo4j_run_query_pandas(query)

In [37]:
df = df[df["name"].str.contains("arrive") == False] 
df = df[df["name"].str.contains("depart") == False] 

In [42]:
df.community.value_counts()

211    16
43     14
204    14
188    11
173    10
0       8
2       8
165     8
196     8
17      7
8       6
33      4
Name: community, dtype: int64

In [51]:
df[df["name"].str.contains("Daly City") == True] 

Unnamed: 0,name,community
156,blue Daly City,173
158,green Daly City,173
161,red Daly City,173
164,yellow Daly City,173


In [39]:
df[df["name"].str.contains("Pittsburg") == True] 

Unnamed: 0,name,community
19,yellow Pittsburg,8
20,yellow Pittsburg Center,8
